From 091806edd458486af13ad83c9802f5b8b54d6d19 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 29 Apr 2008 12:35:48 -0500 Subject: [GFS2] filesystem consistency error from do_strip This patch fixes a GFS2 filesystem consistency error reported from function do_strip. The problem was caused by a timing window that allowed two vfs inodes to be created in memory that point to the same file. The problem is fixed by making the vfs's iget_test, iget_set mechanism check and set a new bit in the in-core gfs2_inode structure while the vfs inode spin_lock is held. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glops.c | 2 +- fs/gfs2/incore.h | 1 + fs/gfs2/inode.c | 10 +++++----- fs/gfs2/meta_io.c | 6 ++++-- fs/gfs2/ops_super.c | 16 +++++++++------- 5 files changed, 20 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index d31badadef8f..07d84d16cda4 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -249,7 +249,7 @@ static int inode_go_lock(struct gfs2_holder *gh) struct gfs2_inode *ip = gl->gl_object; int error = 0; - if (!ip) + if (!ip || (gh->gh_flags & GL_SKIP)) return 0; if (test_bit(GIF_INVALID, &ip->i_flags)) { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 9c2c0b90b22a..eabe5eac41da 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -236,6 +236,7 @@ enum { GIF_INVALID = 0, GIF_QD_LOCKED = 1, GIF_SW_PAGED = 3, + GIF_USER = 4, /* user inode, not metadata addr space */ }; struct gfs2_dinode_host { diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 3a9ef526c308..09453d057e41 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -47,8 +47,7 @@ static int iget_test(struct inode *inode, void *opaque) struct gfs2_inode *ip = GFS2_I(inode); u64 *no_addr = opaque; - if (ip->i_no_addr == *no_addr && - inode->i_private != NULL) + if (ip->i_no_addr == *no_addr && test_bit(GIF_USER, &ip->i_flags)) return 1; return 0; @@ -61,6 +60,7 @@ static int iget_set(struct inode *inode, void *opaque) inode->i_ino = (unsigned long)*no_addr; ip->i_no_addr = *no_addr; + set_bit(GIF_USER, &ip->i_flags); return 0; } @@ -86,7 +86,7 @@ static int iget_skip_test(struct inode *inode, void *opaque) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_skip_data *data = opaque; - if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){ + if (ip->i_no_addr == data->no_addr && test_bit(GIF_USER, &ip->i_flags)){ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ data->skipped = 1; return 0; @@ -105,6 +105,7 @@ static int iget_skip_set(struct inode *inode, void *opaque) return 1; inode->i_ino = (unsigned long)(data->no_addr); ip->i_no_addr = data->no_addr; + set_bit(GIF_USER, &ip->i_flags); return 0; } @@ -166,7 +167,7 @@ void gfs2_set_iop(struct inode *inode) * Returns: A VFS inode, or an error */ -struct inode *gfs2_inode_lookup(struct super_block *sb, +struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, u64 no_addr, u64 no_formal_ino, int skip_freeing) @@ -187,7 +188,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, if (inode->i_state & I_NEW) { struct gfs2_sbd *sdp = GFS2_SB(inode); - inode->i_private = ip; ip->i_no_formal_ino = no_formal_ino; error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 85aea27b4a86..78d75f892f82 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -69,13 +69,15 @@ static const struct address_space_operations aspace_aops = { struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp) { struct inode *aspace; + struct gfs2_inode *ip; aspace = new_inode(sdp->sd_vfs); if (aspace) { mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS); aspace->i_mapping->a_ops = &aspace_aops; aspace->i_size = ~0ULL; - aspace->i_private = NULL; + ip = GFS2_I(aspace); + clear_bit(GIF_USER, &ip->i_flags); insert_inode_hash(aspace); } return aspace; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 2278c68b7e35..0b7cc920eb89 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -52,7 +52,7 @@ static int gfs2_write_inode(struct inode *inode, int sync) struct gfs2_inode *ip = GFS2_I(inode); /* Check this is a "normal" inode */ - if (inode->i_private) { + if (test_bit(GIF_USER, &ip->i_flags)) { if (current->flags & PF_MEMALLOC) return 0; if (sync) @@ -297,8 +297,9 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) */ static void gfs2_drop_inode(struct inode *inode) { - if (inode->i_private && inode->i_nlink) { - struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_inode *ip = GFS2_I(inode); + + if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) clear_nlink(inode); @@ -314,12 +315,13 @@ static void gfs2_drop_inode(struct inode *inode) static void gfs2_clear_inode(struct inode *inode) { + struct gfs2_inode *ip = GFS2_I(inode); + /* This tells us its a "real" inode and not one which only * serves to contain an address space (see rgrp.c, meta_io.c) * which therefore doesn't have its own glocks. */ - if (inode->i_private) { - struct gfs2_inode *ip = GFS2_I(inode); + if (test_bit(GIF_USER, &ip->i_flags)) { ip->i_gl->gl_object = NULL; gfs2_glock_schedule_for_reclaim(ip->i_gl); gfs2_glock_put(ip->i_gl); @@ -419,7 +421,7 @@ static void gfs2_delete_inode(struct inode *inode) struct gfs2_holder gh; int error; - if (!inode->i_private) + if (!test_bit(GIF_USER, &ip->i_flags)) goto out; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); -- cgit v1.2.3-70-g09d2 From ad99f77778e83358c371dab7a50bde69270ed6b8 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Thu, 1 May 2008 11:55:38 +0100 Subject: [GFS2] Fix cast from unsigned int to s64 This fixes bz 444829 where allocating a new block caused gfs2 file systems to report 0 bytes used in df. It was caused by a broken cast from an unsigned int in gfs2_block_alloc() to a negative s64 in gfs2_statfs_change(). This patch casts the unsigned int to an s64 before the unary minus is applied. Signed-off-by: Andrew Price Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7e8f0b1d6c6e..6387523a3153 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1495,7 +1495,7 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) al->al_alloced += *n; - gfs2_statfs_change(sdp, 0, -*n, 0); + gfs2_statfs_change(sdp, 0, -(s64)*n, 0); gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid); spin_lock(&sdp->sd_rindex_spin); -- cgit v1.2.3-70-g09d2 From 00377d8e3842776d1da633ad9c79a16ecb548b92 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 9 May 2008 17:59:51 +0200 Subject: [GFS2] Prefer strlcpy() over snprintf() strlcpy is faster than snprintf when you don't use the returned value. Signed-off-by: Jean Delvare Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_fstype.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index ef9c6c4f80f6..b2028c82e8d1 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -142,8 +142,8 @@ static int init_names(struct gfs2_sbd *sdp, int silent) if (!table[0]) table = sdp->sd_vfs->s_id; - snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto); - snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table); + strlcpy(sdp->sd_proto_name, proto, GFS2_FSNAME_LEN); + strlcpy(sdp->sd_table_name, table, GFS2_FSNAME_LEN); table = sdp->sd_table_name; while ((table = strchr(table, '/'))) -- cgit v1.2.3-70-g09d2 From d0a9c078db4769f7305ff9774558776d12bfb25b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 12 May 2008 22:23:49 +0000 Subject: [CIFS] CIFS currently allows for permissions to be changed on files, even when unix extensions and cifsacl support are disabled. These permissions changes are "ephemeral" however. They are lost whenever a share is mounted and unmounted, or when memory pressure forces the inode out of the cache. Because of this, we'd like to introduce a behavior change to make CIFS behave more like local DOS/Windows filesystems. When unix extensions and cifsacl support aren't enabled, then don't silently ignore changes to permission bits that can't be reflected on the server. Still, there may be people relying on the current behavior for certain applications. This patch adds a new "dynperm" (and a corresponding "nodynperm") mount option that will be intended to make the client fall back to legacy behavior when setting these modes. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifs_fs_sb.h | 1 + fs/cifs/connect.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 8ad2330ba061..877c85409f1f 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -30,6 +30,7 @@ #define CIFS_MOUNT_CIFS_ACL 0x200 /* send ACL requests to non-POSIX srv */ #define CIFS_MOUNT_OVERR_UID 0x400 /* override uid returned from server */ #define CIFS_MOUNT_OVERR_GID 0x800 /* override gid returned from server */ +#define CIFS_MOUNT_DYNPERM 0x1000 /* allow in-memory only mode setting */ struct cifs_sb_info { struct cifsTconInfo *tcon; /* primary mount */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f428bf3bf1a9..8e2fa6d46c72 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -75,6 +75,7 @@ struct smb_vol { bool setuids:1; bool override_uid:1; bool override_gid:1; + bool dynperm:1; bool noperm:1; bool no_psx_acl:1; /* set if posix acl support should be disabled */ bool cifs_acl:1; @@ -1246,6 +1247,10 @@ cifs_parse_mount_options(char *options, const char *devname, vol->setuids = 1; } else if (strnicmp(data, "nosetuids", 9) == 0) { vol->setuids = 0; + } else if (strnicmp(data, "dynperm", 7) == 0) { + vol->dynperm = true; + } else if (strnicmp(data, "nodynperm", 9) == 0) { + vol->dynperm = false; } else if (strnicmp(data, "nohard", 6) == 0) { vol->retry = 0; } else if (strnicmp(data, "nosoft", 6) == 0) { @@ -2125,6 +2130,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID; if (volume_info.override_gid) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID; + if (volume_info.dynperm) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; if (volume_info.direct_io) { cFYI(1, ("mounting share using direct i/o")); cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; -- cgit v1.2.3-70-g09d2 From 6353450a2deefaa79cdb4fd2b72830c7db610256 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 12 May 2008 19:56:05 -0700 Subject: fix memory leak in CIFSFindNext When CIFSFindNext gets back an -EBADF from a call, it sets the return code of the function to 0 and eventually exits. Doing this makes the cleanup at the end of the function skip freeing the SMB buffer, so we need to make sure we free the buffer explicitly when doing this. If we don't you end up with errors like this when unplugging the cifs kernel module: slab error in kmem_cache_destroy(): cache `cifs_request': Can't free all objects [] kmem_cache_destroy+0x61/0xf3 [] cifs_destroy_request_bufs+0x14/0x28 [cifs] [] exit_cifs+0x1e/0x80 [cifs] [] sys_delete_module+0x192/0x1b8 [] audit_syscall_entry+0x14b/0x17d [] syscall_call+0x7/0xb ======================= Signed-off-by: Jeff Layton --- fs/cifs/CHANGES | 3 ++- fs/cifs/cifssmb.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 8355e918fddf..502a4c2b8414 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -12,7 +12,8 @@ Add ability to modify cifs acls for handling chmod (when mounted with cifsacl flag). Fix prefixpath path separator so we can handle mounts with prefixpaths longer than one directory (one path component) when mounted to Windows servers. Fix slow file open when cifsacl -enabled. +enabled. Fix memory leak in FindNext when the SMB call returns -EBADF. + Version 1.51 ------------ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 95fbba4ea7d4..641cc8ffc51b 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3679,6 +3679,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, if (rc) { if (rc == -EBADF) { psrch_inf->endOfSearch = true; + cifs_buf_release(pSMB); rc = 0; /* search probably was closed at end of search*/ } else cFYI(1, ("FindNext returned = %d", rc)); -- cgit v1.2.3-70-g09d2 From ed5f037005d728de19a0f63678ac35b42064966d Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Tue, 13 May 2008 04:01:01 +0000 Subject: [CIFS] CIFSSMBPosixLock should return -EINVAL on error all other codepaths in this function return negative values on errors Signed-off-by: Marcin Slusarz Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 641cc8ffc51b..1cbe61524efc 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1767,7 +1767,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, cFYI(1, ("Posix Lock")); if (pLockData == NULL) - return EINVAL; + return -EINVAL; rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); -- cgit v1.2.3-70-g09d2 From 582d21e5e319d38c0485d8b9e92f6f2341f7c79b Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 13 May 2008 04:54:12 +0000 Subject: [CIFS] cleanup old checkpatch warnings Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 6 +++--- fs/cifs/cifssmb.c | 51 +++++++++++++++++++++++++++++++++------------------ fs/cifs/connect.c | 7 ++++--- fs/cifs/netmisc.c | 6 +++--- fs/cifs/ntlmssp.h | 4 ++-- 5 files changed, 45 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index d481f6c5a2be..08248e85b788 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -93,7 +93,7 @@ extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time); extern int cifs_get_inode_info(struct inode **pinode, const unsigned char *search_path, - FILE_ALL_INFO * pfile_info, + FILE_ALL_INFO *pfile_info, struct super_block *sb, int xid, const __u16 *pfid); extern int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *search_path, @@ -130,7 +130,7 @@ extern int CIFSFindClose(const int, struct cifsTconInfo *tcon, extern int CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon, const unsigned char *searchName, - FILE_ALL_INFO * findData, + FILE_ALL_INFO *findData, int legacy /* whether to use old info level */, const struct nls_table *nls_codepage, int remap); extern int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, @@ -141,7 +141,7 @@ extern int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, extern int CIFSSMBUnixQPathInfo(const int xid, struct cifsTconInfo *tcon, const unsigned char *searchName, - FILE_UNIX_BASIC_INFO * pFindData, + FILE_UNIX_BASIC_INFO *pFindData, const struct nls_table *nls_codepage, int remap); extern int CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1cbe61524efc..3c05c2de50e1 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1247,7 +1247,7 @@ OldOpenRetry: } else { /* BB verify if wct == 15 */ -/* *pOplock = pSMBr->OplockLevel; */ /* BB take from action field BB */ +/* *pOplock = pSMBr->OplockLevel; */ /* BB take from action field*/ *netfid = pSMBr->Fid; /* cifs fid stays in le */ /* Let caller know file was created so we can set the mode. */ @@ -1944,7 +1944,7 @@ renameRetry: /* protocol requires ASCII signature byte on Unicode string */ pSMB->OldFileName[name_len + 1] = 0x00; name_len2 = - cifsConvertToUCS((__le16 *) &pSMB->OldFileName[name_len + 2], + cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2], toName, PATH_MAX, nls_codepage, remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ @@ -2925,7 +2925,8 @@ setAclRetry: } params = 6 + name_len; pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB size from sess */ + /* BB find max SMB size from sess */ + pSMB->MaxDataCount = cpu_to_le16(1000); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -3322,7 +3323,8 @@ QPathInfoRetry: params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */; pSMB->TotalDataCount = 0; pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(4000); /* BB find exact max SMB PDU from sess structure BB */ + /* BB find exact max SMB PDU from sess structure BB */ + pSMB->MaxDataCount = cpu_to_le16(4000); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -3388,7 +3390,7 @@ QPathInfoRetry: int CIFSSMBUnixQPathInfo(const int xid, struct cifsTconInfo *tcon, const unsigned char *searchName, - FILE_UNIX_BASIC_INFO * pFindData, + FILE_UNIX_BASIC_INFO *pFindData, const struct nls_table *nls_codepage, int remap) { /* SMB_QUERY_FILE_UNIX_BASIC */ @@ -3922,7 +3924,8 @@ getDFSRetry: pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->MaxParameterCount = 0; - pSMB->MaxDataCount = cpu_to_le16(4000); /* BB find exact max SMB PDU from sess structure BB */ + /* BB find exact max SMB PDU from sess structure BB */ + pSMB->MaxDataCount = cpu_to_le16(4000); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -4230,7 +4233,8 @@ QFSAttributeRetry: params = 2; /* level */ pSMB->TotalDataCount = 0; pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find exact max SMB PDU from sess structure BB */ + /* BB find exact max SMB PDU from sess structure BB */ + pSMB->MaxDataCount = cpu_to_le16(1000); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -4299,7 +4303,8 @@ QFSDeviceRetry: params = 2; /* level */ pSMB->TotalDataCount = 0; pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find exact max SMB PDU from sess structure BB */ + /* BB find exact max SMB PDU from sess structure BB */ + pSMB->MaxDataCount = cpu_to_le16(1000); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -4370,7 +4375,8 @@ QFSUnixRetry: pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(100); /* BB find exact max SMB PDU from sess structure BB */ + /* BB find exact max SMB PDU from sess structure BB */ + pSMB->MaxDataCount = cpu_to_le16(100); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -4445,7 +4451,8 @@ SETFSUnixRetry: offset = param_offset + params; pSMB->MaxParameterCount = cpu_to_le16(4); - pSMB->MaxDataCount = cpu_to_le16(100); /* BB find exact max SMB PDU from sess structure BB */ + /* BB find exact max SMB PDU from sess structure BB */ + pSMB->MaxDataCount = cpu_to_le16(100); pSMB->SetupCount = 1; pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FS_INFORMATION); @@ -4513,7 +4520,8 @@ QFSPosixRetry: pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(100); /* BB find exact max SMB PDU from sess structure BB */ + /* BB find exact max SMB PDU from sess structure BB */ + pSMB->MaxDataCount = cpu_to_le16(100); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -4703,7 +4711,8 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, count = sizeof(struct file_end_of_file_info); pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */ + /* BB find exact max SMB PDU from sess structure BB */ + pSMB->MaxDataCount = cpu_to_le16(1000); pSMB->SetupCount = 1; pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); @@ -4790,7 +4799,8 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, count = sizeof(FILE_BASIC_INFO); pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */ + /* BB find max SMB PDU from sess */ + pSMB->MaxDataCount = cpu_to_le16(1000); pSMB->SetupCount = 1; pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); @@ -4857,7 +4867,8 @@ SetTimesRetry: params = 6 + name_len; count = sizeof(FILE_BASIC_INFO); pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find exact max SMB PDU from sess structure BB */ + /* BB find max SMB PDU from sess structure BB */ + pSMB->MaxDataCount = cpu_to_le16(1000); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -4987,7 +4998,8 @@ setPermsRetry: params = 6 + name_len; count = sizeof(FILE_UNIX_BASIC_INFO); pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find exact max SMB PDU from sess structure BB */ + /* BB find max SMB PDU from sess structure BB */ + pSMB->MaxDataCount = cpu_to_le16(1000); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -5170,7 +5182,8 @@ QAllEAsRetry: params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */; pSMB->TotalDataCount = 0; pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(4000); /* BB find exact max SMB PDU from sess structure BB */ + /* BB find exact max SMB PDU from sess structure BB */ + pSMB->MaxDataCount = cpu_to_le16(4000); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -5318,7 +5331,8 @@ QEARetry: params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */; pSMB->TotalDataCount = 0; pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(4000); /* BB find exact max SMB PDU from sess structure BB */ + /* BB find exact max SMB PDU from sess structure BB */ + pSMB->MaxDataCount = cpu_to_le16(4000); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -5476,7 +5490,8 @@ SetEARetry: count = sizeof(*parm_data) + ea_value_len + name_len; pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB size from sess */ + /* BB find max SMB PDU from sess */ + pSMB->MaxDataCount = cpu_to_le16(1000); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8e2fa6d46c72..7c2e5ea03305 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1820,7 +1820,7 @@ convert_delimiter(char *path, char delim) if (path == NULL) return; - if (delim == '/') + if (delim == '/') old_delim = '\\'; else old_delim = '/'; @@ -2321,9 +2321,10 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, user = ses->userName; domain = ses->domainName; smb_buffer = cifs_buf_get(); - if (smb_buffer == NULL) { + + if (smb_buffer == NULL) return -ENOMEM; - } + smb_buffer_response = smb_buffer; pSMBr = pSMB = (SESSION_SETUP_ANDX *) smb_buffer; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 00f4cff400b3..8703d68f5b20 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -141,11 +141,11 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst) int ret = 0; /* calculate length by finding first slash or NULL */ - if (address_family == AF_INET) { + if (address_family == AF_INET) ret = in4_pton(cp, -1 /* len */, dst, '\\', NULL); - } else if (address_family == AF_INET6) { + else if (address_family == AF_INET6) ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL); - } + cFYI(DBG2, ("address conversion returned %d for %s", ret, cp)); if (ret > 0) ret = 1; diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 7170a9b70f1e..c377d8065d99 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -64,7 +64,7 @@ typedef struct _SECURITY_BUFFER { } __attribute__((packed)) SECURITY_BUFFER; typedef struct _NEGOTIATE_MESSAGE { - __u8 Signature[sizeof (NTLMSSP_SIGNATURE)]; + __u8 Signature[sizeof(NTLMSSP_SIGNATURE)]; __le32 MessageType; /* 1 */ __le32 NegotiateFlags; SECURITY_BUFFER DomainName; /* RFC 1001 style and ASCII */ @@ -74,7 +74,7 @@ typedef struct _NEGOTIATE_MESSAGE { } __attribute__((packed)) NEGOTIATE_MESSAGE, *PNEGOTIATE_MESSAGE; typedef struct _CHALLENGE_MESSAGE { - __u8 Signature[sizeof (NTLMSSP_SIGNATURE)]; + __u8 Signature[sizeof(NTLMSSP_SIGNATURE)]; __le32 MessageType; /* 2 */ SECURITY_BUFFER TargetName; __le32 NegotiateFlags; -- cgit v1.2.3-70-g09d2 From 77c57ec89682c73785d12d51a6d1f873b292fa42 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 13 May 2008 21:39:32 +0000 Subject: [CIFS] don't explicitly do a FindClose on rewind when directory search has ended Do the following series of operations on a CIFS share: opendir(dir) readdir(dir) unlink(file in dir) rewinddir(dir) readdir(dir) If the readdir read all entries in the directory this will make CIFS throw an error like this: CIFS VFS: Send error in FindClose = -9 CIFS requests "Close at end of search" of the server by setting this bit when issuing FindFirst or FindNext. Therefore when all search entries are returned, the server may return "end of search" and close the search implicitly when this bit is set by the client on the request. We check for this when a readdir is explicitly closed - but when the client notices that a directory has changed after the last operation, we attempt to close the directory before reopening by reissuing a second FindFirst. But, the directory may already been implicitly closed (due to end of search) because the first readdir finished. So we only want to issue a FindClose call in this case when we don't expect it to already be closed. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/readdir.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 34ec32100c72..713c25110197 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -670,8 +670,11 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, (index_to_find < first_entry_in_buffer)) { /* close and restart search */ cFYI(1, ("search backing up - close and restart search")); - cifsFile->invalidHandle = true; - CIFSFindClose(xid, pTcon, cifsFile->netfid); + if (!cifsFile->srch_inf.endOfSearch && + !cifsFile->invalidHandle) { + cifsFile->invalidHandle = true; + CIFSFindClose(xid, pTcon, cifsFile->netfid); + } kfree(cifsFile->search_resume_name); cifsFile->search_resume_name = NULL; if (cifsFile->srch_inf.ntwrk_buf_start) { -- cgit v1.2.3-70-g09d2 From e10f7b551d2a79b113d5ce66b5dc9f3657035445 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 14 May 2008 10:21:33 -0700 Subject: clarify return value of cifs_convert_flags() cifs_convert_flags returns 0x20197 in the default case. It's not immediately evident where that number comes from, so change it to be an or'ed set of flags. The compiler will boil it down anyway. (Thanks to Guenter Kukkukk for clarifying the flags). Signed-off-by: Steve French --- fs/cifs/file.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 31a0a33b9d95..8636cec2642c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -75,7 +75,11 @@ static inline int cifs_convert_flags(unsigned int flags) return (GENERIC_READ | GENERIC_WRITE); } - return 0x20197; + return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES | + FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA | + FILE_READ_DATA); + + } static inline int cifs_get_disposition(unsigned int flags) -- cgit v1.2.3-70-g09d2 From 35fc37d5175091c36d034a28c057da0f9594ee7e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 14 May 2008 10:22:03 -0700 Subject: add function to convert access flags to legacy open mode SMBLegacyOpen always opens a file as r/w. This could be problematic for files with ATTR_READONLY set. Have it interpret the access_mode into a sane open mode. Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 22 +++++++++++++++------- fs/cifs/inode.c | 3 +-- 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 3c05c2de50e1..9c04ad404553 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1166,6 +1166,20 @@ static __u16 convert_disposition(int disposition) return ofun; } +static int +access_flags_to_smbopen_mode(const int access_flags) +{ + int masked_flags = access_flags & (GENERIC_READ | GENERIC_WRITE); + + if (masked_flags == GENERIC_READ) + return SMBOPEN_READ; + else if (masked_flags == GENERIC_WRITE) + return SMBOPEN_WRITE; + + /* just go for read/write */ + return SMBOPEN_READWRITE; +} + int SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon, const char *fileName, const int openDisposition, @@ -1207,13 +1221,7 @@ OldOpenRetry: pSMB->OpenFlags = cpu_to_le16(REQ_BATCHOPLOCK); pSMB->OpenFlags |= cpu_to_le16(REQ_MORE_INFO); - /* BB fixme add conversion for access_flags to bits 0 - 2 of mode */ - /* 0 = read - 1 = write - 2 = rw - 3 = execute - */ - pSMB->Mode = cpu_to_le16(2); + pSMB->Mode = cpu_to_le16(access_flags_to_smbopen_mode(access_flags)); pSMB->Mode |= cpu_to_le16(0x40); /* deny none */ /* set file as system file if special file such as fifo and server expecting SFU style and diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index fcbdbb6ad7bf..2d53b436d511 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1502,8 +1502,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) int oplock = 0; rc = SMBLegacyOpen(xid, pTcon, full_path, - FILE_OPEN, - SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, + FILE_OPEN, GENERIC_WRITE, CREATE_NOT_DIR, &netfid, &oplock, NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & -- cgit v1.2.3-70-g09d2 From 646dd539878a194bc14b104621c0b2b33587e40f Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 15 May 2008 01:50:56 +0000 Subject: [CIFS] Fix paths when share is in DFS to include proper prefix Some versions of Samba (3.2-pre e.g.) are stricter about checking to make sure that paths in DFS name spaces are sent in the form \\server\share\dir\subdir ... instead of \dir\subdir Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 3 --- fs/cifs/connect.c | 26 +++------------------ fs/cifs/dir.c | 28 ++++++++++++++++++----- fs/cifs/inode.c | 65 +++++++---------------------------------------------- fs/cifs/link.c | 42 +++------------------------------- 5 files changed, 37 insertions(+), 127 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 08248e85b788..845b18e1abe5 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -150,9 +150,6 @@ extern int CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, unsigned int *number_of_UNC_in_array, const struct nls_table *nls_codepage, int remap); -extern int connect_to_dfs_path(int xid, struct cifsSesInfo *pSesInfo, - const char *old_path, - const struct nls_table *nls_codepage, int remap); extern int get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, const struct nls_table *nls_codepage, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7c2e5ea03305..d5747e30f1c9 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1418,27 +1418,6 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) return NULL; } -int -connect_to_dfs_path(int xid, struct cifsSesInfo *pSesInfo, - const char *old_path, const struct nls_table *nls_codepage, - int remap) -{ - struct dfs_info3_param *referrals = NULL; - unsigned int num_referrals; - int rc = 0; - - rc = get_dfs_path(xid, pSesInfo, old_path, nls_codepage, - &num_referrals, &referrals, remap); - - /* BB Add in code to: if valid refrl, if not ip address contact - the helper that resolves tcp names, mount to it, try to - tcon to it unmount it if fail */ - - kfree(referrals); - - return rc; -} - int get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, const struct nls_table *nls_codepage, unsigned int *pnum_referrals, @@ -2161,10 +2140,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, if ((strchr(volume_info.UNC + 3, '\\') == NULL) && (strchr(volume_info.UNC + 3, '/') == NULL)) { - rc = connect_to_dfs_path(xid, pSesInfo, +/* rc = connect_to_dfs_path(xid, pSesInfo, "", cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + CIFS_MOUNT_MAP_SPECIAL_CHR);*/ + cFYI(1, ("DFS root not supported")); rc = -ENODEV; goto out; } else { diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index e4e0078a0526..05afe33ea644 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -49,18 +49,25 @@ build_path_from_dentry(struct dentry *direntry) struct dentry *temp; int namelen; int pplen; + int dfsplen; char *full_path; char dirsep; + struct cifs_sb_info *cifs_sb; if (direntry == NULL) return NULL; /* not much we can do if dentry is freed and we need to reopen the file after it was closed implicitly when the server crashed */ - dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb)); - pplen = CIFS_SB(direntry->d_sb)->prepathlen; + cifs_sb = CIFS_SB(direntry->d_sb); + dirsep = CIFS_DIR_SEP(cifs_sb); + pplen = cifs_sb->prepathlen; + if (cifs_sb->tcon && (cifs_sb->tcon->Flags & SMB_SHARE_IS_IN_DFS)) + dfsplen = strnlen(cifs_sb->tcon->treeName, MAX_TREE_SIZE + 1); + else + dfsplen = 0; cifs_bp_rename_retry: - namelen = pplen; + namelen = pplen + dfsplen; for (temp = direntry; !IS_ROOT(temp);) { namelen += (1 + temp->d_name.len); temp = temp->d_parent; @@ -91,7 +98,7 @@ cifs_bp_rename_retry: return NULL; } } - if (namelen != pplen) { + if (namelen != pplen + dfsplen) { cERROR(1, ("did not end path lookup where expected namelen is %d", namelen)); @@ -107,7 +114,18 @@ cifs_bp_rename_retry: since the '\' is a valid posix character so we can not switch those safely to '/' if any are found in the middle of the prepath */ /* BB test paths to Windows with '/' in the midst of prepath */ - strncpy(full_path, CIFS_SB(direntry->d_sb)->prepath, pplen); + + if (dfsplen) { + strncpy(full_path, cifs_sb->tcon->treeName, dfsplen); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { + int i; + for (i = 0; i < dfsplen; i++) { + if (full_path[i] == '\\') + full_path[i] = '/'; + } + } + } + strncpy(full_path + dfsplen, CIFS_SB(direntry->d_sb)->prepath, pplen); return full_path; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 2d53b436d511..9d9b56a9c08e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -161,52 +161,18 @@ static void cifs_unix_info_to_inode(struct inode *inode, spin_unlock(&inode->i_lock); } -static const unsigned char *cifs_get_search_path(struct cifs_sb_info *cifs_sb, - const char *search_path) -{ - int tree_len; - int path_len; - int i; - char *tmp_path; - struct cifsTconInfo *pTcon = cifs_sb->tcon; - - if (!(pTcon->Flags & SMB_SHARE_IS_IN_DFS)) - return search_path; - - /* use full path name for working with DFS */ - tree_len = strnlen(pTcon->treeName, MAX_TREE_SIZE + 1); - path_len = strnlen(search_path, MAX_PATHCONF); - - tmp_path = kmalloc(tree_len+path_len+1, GFP_KERNEL); - if (tmp_path == NULL) - return search_path; - - strncpy(tmp_path, pTcon->treeName, tree_len); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) - for (i = 0; i < tree_len; i++) { - if (tmp_path[i] == '\\') - tmp_path[i] = '/'; - } - strncpy(tmp_path+tree_len, search_path, path_len); - tmp_path[tree_len+path_len] = 0; - return tmp_path; -} - int cifs_get_inode_info_unix(struct inode **pinode, - const unsigned char *search_path, struct super_block *sb, int xid) + const unsigned char *full_path, struct super_block *sb, int xid) { int rc = 0; FILE_UNIX_BASIC_INFO findData; struct cifsTconInfo *pTcon; struct inode *inode; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - const unsigned char *full_path; bool is_dfs_referral = false; pTcon = cifs_sb->tcon; - cFYI(1, ("Getting info on %s", search_path)); - - full_path = cifs_get_search_path(cifs_sb, search_path); + cFYI(1, ("Getting info on %s", full_path)); try_again_CIFSSMBUnixQPathInfo: /* could have done a find first instead but this returns more info */ @@ -218,10 +184,6 @@ try_again_CIFSSMBUnixQPathInfo: if (rc) { if (rc == -EREMOTE && !is_dfs_referral) { is_dfs_referral = true; - if (full_path != search_path) { - kfree(full_path); - full_path = search_path; - } goto try_again_CIFSSMBUnixQPathInfo; } goto cgiiu_exit; @@ -271,8 +233,6 @@ try_again_CIFSSMBUnixQPathInfo: cifs_set_ops(inode, is_dfs_referral); } cgiiu_exit: - if (full_path != search_path) - kfree(full_path); return rc; } @@ -380,20 +340,19 @@ static int get_sfu_mode(struct inode *inode, } int cifs_get_inode_info(struct inode **pinode, - const unsigned char *search_path, FILE_ALL_INFO *pfindData, + const unsigned char *full_path, FILE_ALL_INFO *pfindData, struct super_block *sb, int xid, const __u16 *pfid) { int rc = 0; struct cifsTconInfo *pTcon; struct inode *inode; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - const unsigned char *full_path = NULL; char *buf = NULL; bool adjustTZ = false; bool is_dfs_referral = false; pTcon = cifs_sb->tcon; - cFYI(1, ("Getting info on %s", search_path)); + cFYI(1, ("Getting info on %s", full_path)); if ((pfindData == NULL) && (*pinode != NULL)) { if (CIFS_I(*pinode)->clientCanCacheRead) { @@ -409,8 +368,6 @@ int cifs_get_inode_info(struct inode **pinode, return -ENOMEM; pfindData = (FILE_ALL_INFO *)buf; - full_path = cifs_get_search_path(cifs_sb, search_path); - try_again_CIFSSMBQPathInfo: /* could do find first instead but this returns more info */ rc = CIFSSMBQPathInfo(xid, pTcon, full_path, pfindData, @@ -432,10 +389,6 @@ try_again_CIFSSMBQPathInfo: if (rc) { if (rc == -EREMOTE && !is_dfs_referral) { is_dfs_referral = true; - if (full_path != search_path) { - kfree(full_path); - full_path = search_path; - } goto try_again_CIFSSMBQPathInfo; } goto cgii_exit; @@ -470,7 +423,7 @@ try_again_CIFSSMBQPathInfo: __u64 inode_num; rc1 = CIFSGetSrvInodeNumber(xid, pTcon, - search_path, &inode_num, + full_path, &inode_num, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); @@ -539,7 +492,7 @@ try_again_CIFSSMBQPathInfo: (cifsInfo->cifsAttrs & ATTR_SYSTEM)) { if (decode_sfu_inode(inode, le64_to_cpu(pfindData->EndOfFile), - search_path, + full_path, cifs_sb, xid)) cFYI(1, ("Unrecognized sfu inode type")); @@ -582,12 +535,12 @@ try_again_CIFSSMBQPathInfo: /* fill in 0777 bits from ACL */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { cFYI(1, ("Getting mode bits from ACL")); - acl_to_uid_mode(inode, search_path, pfid); + acl_to_uid_mode(inode, full_path, pfid); } #endif if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { /* fill in remaining high mode bits e.g. SUID, VTX */ - get_sfu_mode(inode, search_path, cifs_sb, xid); + get_sfu_mode(inode, full_path, cifs_sb, xid); } else if (atomic_read(&cifsInfo->inUse) == 0) { inode->i_uid = cifs_sb->mnt_uid; inode->i_gid = cifs_sb->mnt_gid; @@ -599,8 +552,6 @@ try_again_CIFSSMBQPathInfo: cifs_set_ops(inode, is_dfs_referral); } cgii_exit: - if (full_path != search_path) - kfree(full_path); kfree(buf); return rc; } diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 1c2c3ce5020b..316f9830ce3b 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -295,45 +295,9 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) cFYI(1, ("Error closing junction point " "(open for ioctl)")); } - /* BB unwind this long, nested function, or remove BB */ - if (rc == -EIO) { - /* Query if DFS Junction */ - unsigned int num_referrals = 0; - struct dfs_info3_param *refs = NULL; - tmp_path = - kmalloc(MAX_TREE_SIZE + MAX_PATHCONF + 1, - GFP_KERNEL); - if (tmp_path) { - strncpy(tmp_path, pTcon->treeName, - MAX_TREE_SIZE); - strncat(tmp_path, full_path, - MAX_PATHCONF); - rc = get_dfs_path(xid, pTcon->ses, - tmp_path, - cifs_sb->local_nls, - &num_referrals, &refs, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - cFYI(1, ("Get DFS for %s rc = %d ", - tmp_path, rc)); - if ((num_referrals == 0) && (rc == 0)) - rc = -EACCES; - else { - cFYI(1, ("num referral: %d", - num_referrals)); - if (refs && refs->path_name) { - strncpy(tmpbuffer, - refs->path_name, - len-1); - } - } - kfree(refs); - kfree(tmp_path); -} - /* BB add code like else decode referrals - then memcpy to tmpbuffer and free referrals - string array BB */ - } + /* If it is a DFS junction earlier we would have gotten + PATH_NOT_COVERED returned from server so we do + not need to request the DFS info here */ } } /* BB Anything else to do to handle recursive links? */ -- cgit v1.2.3-70-g09d2 From 0599ad53fee2d084f9ba26247d7452f06a40d298 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 14 May 2008 22:34:16 -0700 Subject: sysfs: remove error messages for -EEXIST case It is possible that the entry in sysfs already exists, one case of this is when a network device is renamed to bonding_masters. Anyway, in this case the proper error path is for device_rename to return an error code, not to generate bogus backtrace and errors. Also, to avoid possible races, the create link should be done before the remove link. This makes a device rename atomic operation like other renames. Signed-off-by: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- drivers/base/core.c | 8 +++----- fs/sysfs/dir.c | 6 +----- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/drivers/base/core.c b/drivers/base/core.c index be288b5e4180..3eeac5a78581 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1218,13 +1218,11 @@ int device_rename(struct device *dev, char *new_name) } #else if (dev->class) { - sysfs_remove_link(&dev->class->subsys.kobj, old_device_name); error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, dev->bus_id); - if (error) { - dev_err(dev, "%s: sysfs_create_symlink failed (%d)\n", - __func__, error); - } + if (error) + goto out; + sysfs_remove_link(&dev->class->subsys.kobj, old_device_name); } #endif diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index a1c3a1fab7f0..8c0e4b92574f 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -419,12 +419,8 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, */ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { - if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) { - printk(KERN_WARNING "sysfs: duplicate filename '%s' " - "can not be created\n", sd->s_name); - WARN_ON(1); + if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) return -EEXIST; - } sd->s_parent = sysfs_get(acxt->parent_sd); -- cgit v1.2.3-70-g09d2 From c32916374b2b4f4d2b7ccdb357fe7989f3b407a6 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 15 May 2008 05:41:54 +0000 Subject: [CIFS] suppress duplicate warning fs/cifs/dir.c: In function 'cifs_ci_compare': fs/cifs/dir.c:582: warning: passing argument 1 of 'memcpy' discards qualifiers from pointer target type Signed-off-by: Andrew Morton Signed-off-by: Steve French --- fs/cifs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 05afe33ea644..f0b5b5f3dd2e 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -608,7 +608,7 @@ static int cifs_ci_compare(struct dentry *dentry, struct qstr *a, * case take precedence. If a is not a negative dentry, this * should have no side effects */ - memcpy(a->name, b->name, a->len); + memcpy((void *)a->name, b->name, a->len); return 0; } return 1; -- cgit v1.2.3-70-g09d2 From f9ddcca4cf7d95238beb295484d1de7c0bf490dd Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 15 May 2008 05:51:55 +0000 Subject: [CIFS] BKL-removal: convert CIFS over to unlocked_ioctl cifs_ioctl doesn't seem to need the BKL for anything, so convert it over to use unlocked_ioctl. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 10 +++++----- fs/cifs/cifsfs.h | 3 +-- fs/cifs/ioctl.c | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 427a7c695896..b6436b888cf9 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -657,7 +657,7 @@ const struct file_operations cifs_file_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, #ifdef CONFIG_CIFS_POSIX - .ioctl = cifs_ioctl, + .unlocked_ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ #ifdef CONFIG_CIFS_EXPERIMENTAL @@ -677,7 +677,7 @@ const struct file_operations cifs_file_direct_ops = { .flush = cifs_flush, .splice_read = generic_file_splice_read, #ifdef CONFIG_CIFS_POSIX - .ioctl = cifs_ioctl, + .unlocked_ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, #ifdef CONFIG_CIFS_EXPERIMENTAL @@ -697,7 +697,7 @@ const struct file_operations cifs_file_nobrl_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, #ifdef CONFIG_CIFS_POSIX - .ioctl = cifs_ioctl, + .unlocked_ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ #ifdef CONFIG_CIFS_EXPERIMENTAL @@ -716,7 +716,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .flush = cifs_flush, .splice_read = generic_file_splice_read, #ifdef CONFIG_CIFS_POSIX - .ioctl = cifs_ioctl, + .unlocked_ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, #ifdef CONFIG_CIFS_EXPERIMENTAL @@ -731,7 +731,7 @@ const struct file_operations cifs_dir_ops = { #ifdef CONFIG_CIFS_EXPERIMENTAL .dir_notify = cifs_dir_notify, #endif /* CONFIG_CIFS_EXPERIMENTAL */ - .ioctl = cifs_ioctl, + .unlocked_ioctl = cifs_ioctl, }; static void diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index cd1301a09b3b..25a6cbd15529 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -95,8 +95,7 @@ extern int cifs_setxattr(struct dentry *, const char *, const void *, size_t, int); extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); -extern int cifs_ioctl(struct inode *inode, struct file *filep, - unsigned int command, unsigned long arg); +extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); #ifdef CONFIG_CIFS_EXPERIMENTAL extern const struct export_operations cifs_export_ops; diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 5c792df13d62..0088a5b52564 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -30,9 +30,9 @@ #define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2) -int cifs_ioctl(struct inode *inode, struct file *filep, - unsigned int command, unsigned long arg) +long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) { + struct inode *inode = filep->f_dentry->d_inode; int rc = -ENOTTY; /* strange error - but the precedent */ int xid; struct cifs_sb_info *cifs_sb; -- cgit v1.2.3-70-g09d2 From c2cf07d591ef7bc25c220249822d9bdf0f44c75c Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 15 May 2008 06:20:02 +0000 Subject: [CIFS] Finishup DFS code Fixup GetDFSRefer to prepare for cleanup of SMB response processing Fix build warning in link.c Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 4 +- fs/cifs/cifssmb.c | 169 ++++++++++++++++++++++++---------------------------- fs/cifs/connect.c | 3 +- fs/cifs/link.c | 1 - 4 files changed, 80 insertions(+), 97 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 845b18e1abe5..b9f5e935f821 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -146,8 +146,8 @@ extern int CIFSSMBUnixQPathInfo(const int xid, extern int CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, const unsigned char *searchName, - unsigned char **targetUNCs, - unsigned int *number_of_UNC_in_array, + struct dfs_info3_param **target_nodes, + unsigned int *number_of_nodes_in_array, const struct nls_table *nls_codepage, int remap); extern int get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 9c04ad404553..fc297383cb0e 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3870,8 +3870,8 @@ GetInodeNumOut: int CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, const unsigned char *searchName, - unsigned char **targetUNCs, - unsigned int *number_of_UNC_in_array, + struct dfs_info3_param **target_nodes, + unsigned int *num_of_nodes, const struct nls_table *nls_codepage, int remap) { /* TRANS2_GET_DFS_REFERRAL */ @@ -3884,8 +3884,8 @@ CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, unsigned int i; char *temp; __u16 params, byte_count; - *number_of_UNC_in_array = 0; - *targetUNCs = NULL; + *num_of_nodes = 0; + *target_nodes = NULL; cFYI(1, ("In GetDFSRefer the path %s", searchName)); if (ses == NULL) @@ -3955,99 +3955,84 @@ getDFSRetry: (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cFYI(1, ("Send error in GetDFSRefer = %d", rc)); - } else { /* decode response */ -/* BB Add logic to parse referrals here */ - rc = validate_t2((struct smb_t2_rsp *)pSMBr); - - /* BB Also check if enough total bytes returned? */ - if (rc || (pSMBr->ByteCount < 17)) - rc = -EIO; /* bad smb */ - else { - __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); - __u16 data_count = le16_to_cpu(pSMBr->t2.DataCount); - - cFYI(1, - ("Decoding GetDFSRefer response BCC: %d Offset %d", - pSMBr->ByteCount, data_offset)); - referrals = - (struct dfs_referral_level_3 *) - (8 /* sizeof start of data block */ + - data_offset + - (char *) &pSMBr->hdr.Protocol); - cFYI(1, ("num_referrals: %d dfs flags: 0x%x ... \n" - "for referral one refer size: 0x%x srv " - "type: 0x%x refer flags: 0x%x ttl: 0x%x", - le16_to_cpu(pSMBr->NumberOfReferrals), - le16_to_cpu(pSMBr->DFSFlags), - le16_to_cpu(referrals->ReferralSize), - le16_to_cpu(referrals->ServerType), - le16_to_cpu(referrals->ReferralFlags), - le16_to_cpu(referrals->TimeToLive))); - /* BB This field is actually two bytes in from start of - data block so we could do safety check that DataBlock - begins at address of pSMBr->NumberOfReferrals */ - *number_of_UNC_in_array = - le16_to_cpu(pSMBr->NumberOfReferrals); - - /* BB Fix below so can return more than one referral */ - if (*number_of_UNC_in_array > 1) - *number_of_UNC_in_array = 1; - - /* get the length of the strings describing refs */ - name_len = 0; - for (i = 0; i < *number_of_UNC_in_array; i++) { - /* make sure that DfsPathOffset not past end */ - __u16 offset = - le16_to_cpu(referrals->DfsPathOffset); - if (offset > data_count) { - /* if invalid referral, stop here and do - not try to copy any more */ - *number_of_UNC_in_array = i; - break; - } - temp = ((char *)referrals) + offset; + goto GetDFSRefExit; + } + rc = validate_t2((struct smb_t2_rsp *)pSMBr); - if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len += UniStrnlen((wchar_t *)temp, - data_count); - } else { - name_len += strnlen(temp, data_count); - } - referrals++; - /* BB add check that referral pointer does - not fall off end PDU */ - } - /* BB add check for name_len bigger than bcc */ - *targetUNCs = - kmalloc(name_len+1+(*number_of_UNC_in_array), - GFP_KERNEL); - if (*targetUNCs == NULL) { - rc = -ENOMEM; - goto GetDFSRefExit; + /* BB Also check if enough total bytes returned? */ + if (rc || (pSMBr->ByteCount < 17)) + rc = -EIO; /* bad smb */ + else { + __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); + __u16 data_count = le16_to_cpu(pSMBr->t2.DataCount); + + cFYI(1, ("Decoding GetDFSRefer response BCC: %d Offset %d", + pSMBr->ByteCount, data_offset)); + referrals = + (struct dfs_referral_level_3 *) + (8 /* sizeof start of data block */ + + data_offset + + (char *) &pSMBr->hdr.Protocol); + cFYI(1, ("num_referrals: %d dfs flags: 0x%x ... \n" + "for referral one refer size: 0x%x srv " + "type: 0x%x refer flags: 0x%x ttl: 0x%x", + le16_to_cpu(pSMBr->NumberOfReferrals), + le16_to_cpu(pSMBr->DFSFlags), + le16_to_cpu(referrals->ReferralSize), + le16_to_cpu(referrals->ServerType), + le16_to_cpu(referrals->ReferralFlags), + le16_to_cpu(referrals->TimeToLive))); + /* BB This field is actually two bytes in from start of + data block so we could do safety check that DataBlock + begins at address of pSMBr->NumberOfReferrals */ + *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals); + + /* BB Fix below so can return more than one referral */ + if (*num_of_nodes > 1) + *num_of_nodes = 1; + + /* get the length of the strings describing refs */ + name_len = 0; + for (i = 0; i < *num_of_nodes; i++) { + /* make sure that DfsPathOffset not past end */ + __u16 offset = le16_to_cpu(referrals->DfsPathOffset); + if (offset > data_count) { + /* if invalid referral, stop here and do + not try to copy any more */ + *num_of_nodes = i; + break; } - /* copy the ref strings */ - referrals = (struct dfs_referral_level_3 *) - (8 /* sizeof data hdr */ + data_offset + - (char *) &pSMBr->hdr.Protocol); - - for (i = 0; i < *number_of_UNC_in_array; i++) { - temp = ((char *)referrals) + - le16_to_cpu(referrals->DfsPathOffset); - if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { - cifs_strfromUCS_le(*targetUNCs, - (__le16 *) temp, - name_len, - nls_codepage); - } else { - strncpy(*targetUNCs, temp, name_len); - } - /* BB update target_uncs pointers */ - referrals++; + temp = ((char *)referrals) + offset; + + if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { + name_len += UniStrnlen((wchar_t *)temp, + data_count); + } else { + name_len += strnlen(temp, data_count); } - temp = *targetUNCs; - temp[name_len] = 0; + referrals++; + /* BB add check that referral pointer does + not fall off end PDU */ + } + /* BB add check for name_len bigger than bcc */ + *target_nodes = + kmalloc(name_len+1+(*num_of_nodes), + GFP_KERNEL); + if (*target_nodes == NULL) { + rc = -ENOMEM; + goto GetDFSRefExit; } + referrals = (struct dfs_referral_level_3 *) + (8 /* sizeof data hdr */ + data_offset + + (char *) &pSMBr->hdr.Protocol); + + for (i = 0; i < *num_of_nodes; i++) { + temp = ((char *)referrals) + + le16_to_cpu(referrals->DfsPathOffset); + /* BB update target_uncs pointers */ + referrals++; + } } GetDFSRefExit: if (pSMB) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d5747e30f1c9..c397fcfd9f1a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1425,7 +1425,6 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, { char *temp_unc; int rc = 0; - unsigned char *targetUNCs; *pnum_referrals = 0; *preferrals = NULL; @@ -1448,7 +1447,7 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, kfree(temp_unc); } if (rc == 0) - rc = CIFSGetDFSRefer(xid, pSesInfo, old_path, &targetUNCs, + rc = CIFSGetDFSRefer(xid, pSesInfo, old_path, preferrals, pnum_referrals, nls_codepage, remap); /* BB map targetUNCs to dfs_info3 structures, here or in CIFSGetDFSRefer BB */ diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 316f9830ce3b..63f644000ce5 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -234,7 +234,6 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; char *full_path = NULL; - char *tmp_path = NULL; char *tmpbuffer; int len; __u16 fid; -- cgit v1.2.3-70-g09d2 From 95b1cb90b79896c4bf5ea484bee2b41d7d293f43 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 15 May 2008 16:44:38 +0000 Subject: [CIFS] enable parsing for transport encryption mount parm Samba now supports transport encryption on particular exports (mounted tree ids can be encrypted for servers which support the unix extensions). This adds parsing support to cifs mount option parsing for this. Signed-off-by: Steve French --- fs/cifs/README | 5 +++++ fs/cifs/cifsglob.h | 1 + fs/cifs/connect.c | 25 +++++++++++++++++-------- 3 files changed, 23 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/README b/fs/cifs/README index 621aa1a85971..2bd6fe556f88 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -483,6 +483,11 @@ A partial list of the supported mount options follows: sign Must use packet signing (helps avoid unwanted data modification by intermediate systems in the route). Note that signing does not work with lanman or plaintext authentication. + seal Must seal (encrypt) all data on this mounted share before + sending on the network. Requires support for Unix Extensions. + Note that this differs from the sign mount option in that it + causes encryption of data sent over this mounted share but other + shares mounted to the same server are unaffected. sec Security mode. Allowed values are: none attempt to connection as a null user (no name) krb5 Use Kerberos version 5 authentication diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b7d9f698e63e..08914053242b 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -281,6 +281,7 @@ struct cifsTconInfo { bool ipc:1; /* set if connection to IPC$ eg for RPC/PIPES */ bool retry:1; bool nocase:1; + bool seal:1; /* transport encryption for this mounted share */ bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol for this mount even if server would support */ /* BB add field for back pointer to sb struct(s)? */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index c397fcfd9f1a..023434f72c15 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -60,7 +60,7 @@ struct smb_vol { char *domainname; char *UNC; char *UNCip; - char *in6_addr; /* ipv6 address as human readable form of in6_addr */ + char *in6_addr; /* ipv6 address as human readable form of in6_addr */ char *iocharset; /* local code page for mapping to and from Unicode */ char source_rfc1001_name[16]; /* netbios name of client */ char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */ @@ -82,13 +82,14 @@ struct smb_vol { bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ bool server_ino:1; /* use inode numbers from server ie UniqueId */ bool direct_io:1; - bool remap:1; /* set to remap seven reserved chars in filenames */ - bool posix_paths:1; /* unset to not ask for posix pathnames. */ + bool remap:1; /* set to remap seven reserved chars in filenames */ + bool posix_paths:1; /* unset to not ask for posix pathnames. */ bool no_linux_ext:1; bool sfu_emul:1; - bool nullauth:1; /* attempt to authenticate with null user */ - unsigned nocase; /* request case insensitive filenames */ - unsigned nobrl; /* disable sending byte range locks to srv */ + bool nullauth:1; /* attempt to authenticate with null user */ + bool nocase:1; /* request case insensitive filenames */ + bool nobrl:1; /* disable sending byte range locks to srv */ + bool seal:1; /* request transport encryption on share */ unsigned int rsize; unsigned int wsize; unsigned int sockopt; @@ -1273,8 +1274,12 @@ cifs_parse_mount_options(char *options, const char *devname, vol->no_psx_acl = 1; } else if (strnicmp(data, "sign", 4) == 0) { vol->secFlg |= CIFSSEC_MUST_SIGN; -/* } else if (strnicmp(data, "seal",4) == 0) { - vol->secFlg |= CIFSSEC_MUST_SEAL; */ + } else if (strnicmp(data, "seal", 4) == 0) { + /* we do not do the following in secFlags because seal + is a per tree connection (mount) not a per socket + or per-smb connection option in the protocol */ + /* vol->secFlg |= CIFSSEC_MUST_SEAL; */ + vol->seal = 1; } else if (strnicmp(data, "direct", 6) == 0) { vol->direct_io = 1; } else if (strnicmp(data, "forcedirectio", 13) == 0) { @@ -2126,6 +2131,9 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, for the retry flag is used */ tcon->retry = volume_info.retry; tcon->nocase = volume_info.nocase; + if (tcon->seal != volume_info.seal) + cERROR(1, ("transport encryption setting " + "conflicts with existing tid")); } else { tcon = tconInfoAlloc(); if (tcon == NULL) @@ -2159,6 +2167,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, atomic_inc(&pSesInfo->inUse); tcon->retry = volume_info.retry; tcon->nocase = volume_info.nocase; + tcon->seal = volume_info.seal; } } } -- cgit v1.2.3-70-g09d2 From de2db8d790b058fcd75d603780b913bd824972b3 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 16 May 2008 13:10:32 +0400 Subject: Fixed DFS code to work with new 'build_path_from_dentry', that returns full path if share in the dfs, now. Signed-off-by: Igor Mammedov Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 49 +------------------------------------------------ 1 file changed, 1 insertion(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index f6fdecf6598c..d82374c9e329 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -219,53 +219,6 @@ static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent, } -static char *build_full_dfs_path_from_dentry(struct dentry *dentry) -{ - char *full_path = NULL; - char *search_path; - char *tmp_path; - size_t l_max_len; - struct cifs_sb_info *cifs_sb; - - if (dentry->d_inode == NULL) - return NULL; - - cifs_sb = CIFS_SB(dentry->d_inode->i_sb); - - if (cifs_sb->tcon == NULL) - return NULL; - - search_path = build_path_from_dentry(dentry); - if (search_path == NULL) - return NULL; - - if (cifs_sb->tcon->Flags & SMB_SHARE_IS_IN_DFS) { - int i; - /* we should use full path name for correct working with DFS */ - l_max_len = strnlen(cifs_sb->tcon->treeName, MAX_TREE_SIZE+1) + - strnlen(search_path, MAX_PATHCONF) + 1; - tmp_path = kmalloc(l_max_len, GFP_KERNEL); - if (tmp_path == NULL) { - kfree(search_path); - return NULL; - } - strncpy(tmp_path, cifs_sb->tcon->treeName, l_max_len); - tmp_path[l_max_len-1] = 0; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) - for (i = 0; i < l_max_len; i++) { - if (tmp_path[i] == '\\') - tmp_path[i] = '/'; - } - strncat(tmp_path, search_path, l_max_len - strlen(tmp_path)); - - full_path = tmp_path; - kfree(search_path); - } else { - full_path = search_path; - } - return full_path; -} - static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd, struct list_head *mntlist) { @@ -333,7 +286,7 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) goto out_err; } - full_path = build_full_dfs_path_from_dentry(dentry); + full_path = build_path_from_dentry(dentry); if (full_path == NULL) { rc = -ENOMEM; goto out_err; -- cgit v1.2.3-70-g09d2 From b0b539739fe9b7d75002412a787cfdf4efddbc33 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 May 2008 11:45:41 -0400 Subject: NFS: Ensure that 'noac' and/or 'actimeo=0' turn off attribute caching Both the 'noac' and 'actimeo=0' mount options should ensure that attributes are not cached, however a bug in nfs_attribute_timeout() means that currently, the attributes may in fact get cached for up to one jiffy. This has been seen to cause corruption in some applications. The reason for the bug is that the time_in_range() test returns 'true' as long as the current time lies between nfsi->read_cache_jiffies and nfsi->read_cache_jiffies + nfsi->attrtimeo. In other words, if jiffies equals nfsi->read_cache_jiffies, then we still cache the attribute data. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5cb3345eb694..2501b864f7c3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -707,6 +707,13 @@ int nfs_attribute_timeout(struct inode *inode) if (nfs_have_delegation(inode, FMODE_READ)) return 0; + /* + * Special case: if the attribute timeout is set to 0, then always + * treat the cache as having expired (unless holding + * a delegation). + */ + if (nfsi->attrtimeo == 0) + return 1; return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } -- cgit v1.2.3-70-g09d2 From 38def50fabc479dc96ea6bd2cb2526e0dfc36fa4 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 1 May 2008 20:03:22 +0300 Subject: nfs: fix race in nfs_dirty_request When called from nfs_flush_incompatible, the req is not locked, so req->wb_page might be set to NULL before it is used by PageWriteback. Signed-off-by: Fred Isaman Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1ade11d1ba07..6d8ace3e3259 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -415,7 +415,7 @@ nfs_dirty_request(struct nfs_page *req) if (page == NULL || test_bit(PG_NEED_COMMIT, &req->wb_flags)) return 0; - return !PageWriteback(req->wb_page); + return !PageWriteback(page); } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -- cgit v1.2.3-70-g09d2 From 3a6258e1fb5ff717dcefa04afc35f81aaae3f3e0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 6 May 2008 13:32:40 -0400 Subject: NFSv4: Check the return value of decode_compound_hdr_arg() If decode_compound_hdr_arg() returns a resource error, then we cannot proceed to process the callback. Return a 'GARBAGE_ARGS' rpc-level error to the caller instead. If, however, the minor version field is incorrect, then we need to propagate the resulting NFS4ERR_MINOR_VERS_MISMATCH error back as the compound status field (setting the nops field to 0). Finally, if encode_compound_hdr_res() returns an error, we need to return an RPC_SYSTEM_ERR to the caller. Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 13619d24f023..646d4d85072e 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -401,12 +401,12 @@ static __be32 process_op(struct svc_rqst *rqstp, */ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp) { - struct cb_compound_hdr_arg hdr_arg; - struct cb_compound_hdr_res hdr_res; + struct cb_compound_hdr_arg hdr_arg = { 0 }; + struct cb_compound_hdr_res hdr_res = { NULL }; struct xdr_stream xdr_in, xdr_out; __be32 *p; __be32 status; - unsigned int nops = 1; + unsigned int nops = 0; dprintk("%s: start\n", __FUNCTION__); @@ -415,20 +415,20 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); xdr_init_encode(&xdr_out, &rqstp->rq_res, p); - decode_compound_hdr_arg(&xdr_in, &hdr_arg); + status = decode_compound_hdr_arg(&xdr_in, &hdr_arg); + if (status == __constant_htonl(NFS4ERR_RESOURCE)) + return rpc_garbage_args; + hdr_res.taglen = hdr_arg.taglen; hdr_res.tag = hdr_arg.tag; - hdr_res.nops = NULL; - encode_compound_hdr_res(&xdr_out, &hdr_res); + if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) + return rpc_system_err; - for (;;) { + while (status == 0 && nops != hdr_arg.nops) { status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp); - if (status != 0) - break; - if (nops == hdr_arg.nops) - break; nops++; } + *hdr_res.status = status; *hdr_res.nops = htonl(nops); dprintk("%s: done, status = %u\n", __FUNCTION__, ntohl(status)); -- cgit v1.2.3-70-g09d2 From 46c8ac74250a396aca855e494f49a960797a6b5e Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 2 May 2008 13:42:42 -0700 Subject: nfs/lsm: make NFSv4 set LSM mount options NFSv3 get_sb operations call into the LSM layer to set security options passed from userspace. NFSv4 hooks were not originally added since it was reasonably late in the merge window and NFSv3 was the only thing that had regressed (v4 has never supported any LSM options) This patch makes NFSv4 call into the LSM to set security options rather than just blindly dropping them with no notice to the user as happens today. This patch was tested in a simple NFSv4 environment with the context= option and appeared to work as expected. Signed-off-by: Eric Paris Cc: Trond Myklebust Cc: "J. Bruce Fields" Cc: Stephen Smalley Acked-by: James Morris Cc: Casey Schaufler Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 7226a506f3ca..5ed86ac0fd9b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2015,6 +2015,10 @@ static int nfs4_get_sb(struct file_system_type *fs_type, goto error_splat_super; } + error = security_sb_set_mnt_opts(s, &data.lsm_opts); + if (error) + goto error_splat_root; + s->s_flags |= MS_ACTIVE; mnt->mnt_sb = s; mnt->mnt_root = mntroot; @@ -2031,6 +2035,8 @@ out_free: nfs_free_server(server); goto out; +error_splat_root: + dput(mntroot); error_splat_super: up_write(&s->s_umount); deactivate_super(s); @@ -2114,6 +2120,8 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, mnt->mnt_sb = s; mnt->mnt_root = mntroot; + security_sb_clone_mnt_opts(data->sb, s); + dprintk("<-- nfs4_xdev_get_sb() = 0\n"); return 0; @@ -2197,6 +2205,8 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, mnt->mnt_sb = s; mnt->mnt_root = mntroot; + security_sb_clone_mnt_opts(data->sb, s); + dprintk("<-- nfs4_referral_get_sb() = 0\n"); return 0; -- cgit v1.2.3-70-g09d2 From 3110ff8048fb757b36112b044b384aea9c44d6e4 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 2 May 2008 13:42:44 -0700 Subject: nfs: replace remaining __FUNCTION__ occurrences __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison Cc: Trond Myklebust Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 2 +- fs/nfs/callback_proc.c | 4 +- fs/nfs/callback_xdr.c | 18 ++++----- fs/nfs/client.c | 8 ++-- fs/nfs/delegation.c | 4 +- fs/nfs/dir.c | 18 ++++----- fs/nfs/file.c | 2 +- fs/nfs/inode.c | 4 +- fs/nfs/namespace.c | 8 ++-- fs/nfs/nfs3proc.c | 6 +-- fs/nfs/nfs4namespace.c | 12 +++--- fs/nfs/nfs4proc.c | 32 ++++++++-------- fs/nfs/nfs4renewd.c | 10 ++--- fs/nfs/nfs4state.c | 6 +-- fs/nfs/nfs4xdr.c | 100 ++++++++++++++++++++++++------------------------- fs/nfs/proc.c | 8 ++-- fs/nfs/read.c | 2 +- fs/nfs/super.c | 2 +- 18 files changed, 123 insertions(+), 123 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 5606ae3d72d3..c1e7c8300629 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -182,7 +182,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) if (clp == NULL) return SVC_DROP; - dprintk("%s: %s NFSv4 callback!\n", __FUNCTION__, + dprintk("%s: %s NFSv4 callback!\n", __func__, svc_print_addr(rqstp, buf, sizeof(buf))); nfs_put_client(clp); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 15f7785048d3..f7e83e23cf9f 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -57,7 +57,7 @@ out_iput: out_putclient: nfs_put_client(clp); out: - dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status)); + dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status)); return res->status; } @@ -98,6 +98,6 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) nfs_put_client(prev); } while (clp != NULL); out: - dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res)); + dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); return res; } diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 646d4d85072e..dd0ef34b5845 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -141,7 +141,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound /* We do not like overly long tags! */ if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", - __FUNCTION__, hdr->taglen); + __func__, hdr->taglen); return htonl(NFS4ERR_RESOURCE); } p = read_buf(xdr, 12); @@ -151,7 +151,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound /* Check minor version is zero. */ if (minor_version != 0) { printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n", - __FUNCTION__, minor_version); + __func__, minor_version); return htonl(NFS4ERR_MINOR_VERS_MISMATCH); } hdr->callback_ident = ntohl(*p++); @@ -179,7 +179,7 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr args->addr = svc_addr(rqstp); status = decode_bitmap(xdr, args->bitmap); out: - dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(status)); + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; } @@ -200,7 +200,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, args->truncate = ntohl(*p); status = decode_fh(xdr, &args->fh); out: - dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(status)); + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; } @@ -349,7 +349,7 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, status = encode_attr_mtime(xdr, res->bitmap, &res->mtime); *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1))); out: - dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(status)); + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; } @@ -363,7 +363,7 @@ static __be32 process_op(struct svc_rqst *rqstp, long maxlen; __be32 res; - dprintk("%s: start\n", __FUNCTION__); + dprintk("%s: start\n", __func__); status = decode_op_hdr(xdr_in, &op_nr); if (likely(status == 0)) { switch (op_nr) { @@ -392,7 +392,7 @@ static __be32 process_op(struct svc_rqst *rqstp, status = res; if (op->encode_res != NULL && status == 0) status = op->encode_res(rqstp, xdr_out, resp); - dprintk("%s: done, status = %d\n", __FUNCTION__, ntohl(status)); + dprintk("%s: done, status = %d\n", __func__, ntohl(status)); return status; } @@ -408,7 +408,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r __be32 status; unsigned int nops = 0; - dprintk("%s: start\n", __FUNCTION__); + dprintk("%s: start\n", __func__); xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); @@ -431,7 +431,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r *hdr_res.status = status; *hdr_res.nops = htonl(nops); - dprintk("%s: done, status = %u\n", __FUNCTION__, ntohl(status)); + dprintk("%s: done, status = %u\n", __func__, ntohl(status)); return rpc_success; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 89ac5bb0401c..f2a092ca69b5 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -488,7 +488,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, clnt = rpc_create(&args); if (IS_ERR(clnt)) { dprintk("%s: cannot create RPC client. Error = %ld\n", - __FUNCTION__, PTR_ERR(clnt)); + __func__, PTR_ERR(clnt)); return PTR_ERR(clnt); } @@ -576,7 +576,7 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, server->client = rpc_clone_client(clp->cl_rpcclient); if (IS_ERR(server->client)) { - dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__); + dprintk("%s: couldn't create rpc_client!\n", __func__); return PTR_ERR(server->client); } @@ -590,7 +590,7 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, auth = rpcauth_create(pseudoflavour, server->client); if (IS_ERR(auth)) { - dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + dprintk("%s: couldn't create credcache!\n", __func__); return PTR_ERR(auth); } } @@ -985,7 +985,7 @@ static int nfs4_init_client(struct nfs_client *clp, error = nfs_idmap_new(clp); if (error < 0) { dprintk("%s: failed to create idmapper. Error = %d\n", - __FUNCTION__, error); + __func__, error); goto error; } __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 00a5e4405e16..cc563cfa6940 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -60,7 +60,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ switch (status) { default: printk(KERN_ERR "%s: unhandled error %d.\n", - __FUNCTION__, status); + __func__, status); case -NFS4ERR_EXPIRED: /* kill_proc(fl->fl_pid, SIGLOST, 1); */ case -NFS4ERR_STALE_CLIENTID: @@ -186,7 +186,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct */ dfprintk(FILE, "%s: server %s handed out " "a duplicate delegation!\n", - __FUNCTION__, clp->cl_hostname); + __func__, clp->cl_hostname); if (delegation->type <= nfsi->delegation->type) { freeme = delegation; delegation = NULL; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f288b3ecab4a..58d43daec084 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -180,7 +180,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) int error; dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", - __FUNCTION__, (long long)desc->entry->cookie, + __func__, (long long)desc->entry->cookie, page->index); again: @@ -256,7 +256,7 @@ int find_dirent(nfs_readdir_descriptor_t *desc) while((status = dir_decode(desc)) == 0) { dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n", - __FUNCTION__, (unsigned long long)entry->cookie); + __func__, (unsigned long long)entry->cookie); if (entry->prev_cookie == *desc->dir_cookie) break; if (loop_count++ > 200) { @@ -315,7 +315,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) int status; dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n", - __FUNCTION__, desc->page_index, + __func__, desc->page_index, (long long) *desc->dir_cookie); /* If we find the page in the page_cache, we cannot be sure @@ -339,7 +339,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) if (status < 0) dir_page_release(desc); out: - dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status); + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; } @@ -380,7 +380,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) } } - dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res); + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, res); return res; } @@ -506,7 +506,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->entry->eof = 0; out: dfprintk(DIRCACHE, "NFS: %s: returns %d\n", - __FUNCTION__, status); + __func__, status); return status; out_release: dir_page_release(desc); @@ -780,7 +780,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) if (is_bad_inode(inode)) { dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", - __FUNCTION__, dentry->d_parent->d_name.name, + __func__, dentry->d_parent->d_name.name, dentry->d_name.name); goto out_bad; } @@ -808,7 +808,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) unlock_kernel(); dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", - __FUNCTION__, dentry->d_parent->d_name.name, + __func__, dentry->d_parent->d_name.name, dentry->d_name.name); return 1; out_zap_parent: @@ -827,7 +827,7 @@ out_zap_parent: unlock_kernel(); dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", - __FUNCTION__, dentry->d_parent->d_name.name, + __func__, dentry->d_parent->d_name.name, dentry->d_name.name); return 0; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 3536b01164f9..d84a3d8f32af 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -526,7 +526,7 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) if (res < 0) dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager" " - error %d!\n", - __FUNCTION__, res); + __func__, res); return res; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2501b864f7c3..421d338c698c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1002,7 +1002,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) unsigned long now = jiffies; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", - __FUNCTION__, inode->i_sb->s_id, inode->i_ino, + __func__, inode->i_sb->s_id, inode->i_ino, atomic_read(&inode->i_count), fattr->valid); if (nfsi->fileid != fattr->fileid) @@ -1126,7 +1126,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) * Big trouble! The inode has become a different object. */ printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", - __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); + __func__, inode->i_ino, inode->i_mode, fattr->mode); out_err: /* * No need to worry about unhashing the dentry, as the diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index af4d0f1e402c..fca518006a52 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -106,7 +106,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) dprintk("--> nfs_follow_mountpoint()\n"); BUG_ON(IS_ROOT(dentry)); - dprintk("%s: enter\n", __FUNCTION__); + dprintk("%s: enter\n", __func__); dput(nd->path.dentry); nd->path.dentry = dget(dentry); @@ -143,7 +143,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) nd->path.dentry = dget(mnt->mnt_root); schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); out: - dprintk("%s: done, returned %d\n", __FUNCTION__, err); + dprintk("%s: done, returned %d\n", __func__, err); dprintk("<-- nfs_follow_mountpoint() = %d\n", err); return ERR_PTR(err); @@ -230,7 +230,7 @@ static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, dprintk("--> nfs_do_submount()\n"); - dprintk("%s: submounting on %s/%s\n", __FUNCTION__, + dprintk("%s: submounting on %s/%s\n", __func__, dentry->d_parent->d_name.name, dentry->d_name.name); if (page == NULL) @@ -243,7 +243,7 @@ static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, free_page: free_page((unsigned long)page); out: - dprintk("%s: done\n", __FUNCTION__); + dprintk("%s: done\n", __func__); dprintk("<-- nfs_do_submount() = %p\n", mnt); return mnt; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 549dbce714a4..c3523ad03ed1 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -63,15 +63,15 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, }; int status; - dprintk("%s: call fsinfo\n", __FUNCTION__); + dprintk("%s: call fsinfo\n", __func__); nfs_fattr_init(info->fattr); status = rpc_call_sync(client, &msg, 0); - dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status); + dprintk("%s: reply fsinfo: %d\n", __func__, status); if (!(info->fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_resp = info->fattr; status = rpc_call_sync(client, &msg, 0); - dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); + dprintk("%s: reply getattr: %d\n", __func__, status); } return status; } diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 5f9ba41ed5bf..b112857301f7 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -86,7 +86,7 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, if (strncmp(path, fs_path, strlen(fs_path)) != 0) { dprintk("%s: path %s does not begin with fsroot %s\n", - __FUNCTION__, path, fs_path); + __func__, path, fs_path); return -ENOENT; } @@ -134,7 +134,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, if (locations == NULL || locations->nlocations <= 0) goto out; - dprintk("%s: referral at %s/%s\n", __FUNCTION__, + dprintk("%s: referral at %s/%s\n", __func__, dentry->d_parent->d_name.name, dentry->d_name.name); page = (char *) __get_free_page(GFP_USER); @@ -204,7 +204,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, out: free_page((unsigned long) page); free_page((unsigned long) page2); - dprintk("%s: done\n", __FUNCTION__); + dprintk("%s: done\n", __func__); return mnt; } @@ -223,7 +223,7 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr int err; /* BUG_ON(IS_ROOT(dentry)); */ - dprintk("%s: enter\n", __FUNCTION__); + dprintk("%s: enter\n", __func__); page = alloc_page(GFP_KERNEL); if (page == NULL) @@ -238,7 +238,7 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr parent = dget_parent(dentry); dprintk("%s: getting locations for %s/%s\n", - __FUNCTION__, parent->d_name.name, dentry->d_name.name); + __func__, parent->d_name.name, dentry->d_name.name); err = nfs4_proc_fs_locations(parent->d_inode, &dentry->d_name, fs_locations, page); dput(parent); @@ -252,6 +252,6 @@ out_free: __free_page(page); kfree(fs_locations); out: - dprintk("%s: done\n", __FUNCTION__); + dprintk("%s: done\n", __func__); return mnt; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index dbc09271af02..f533318b005f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -73,7 +73,7 @@ int nfs4_map_errors(int err) { if (err < -1000) { dprintk("%s could not handle NFSv4 error %d\n", - __FUNCTION__, -err); + __func__, -err); return -EIO; } return err; @@ -1578,7 +1578,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct goto out; /* Make sure server returned a different fsid for the referral */ if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { - dprintk("%s: server did not return a different fsid for a referral at %s\n", __FUNCTION__, name->name); + dprintk("%s: server did not return a different fsid for a referral at %s\n", __func__, name->name); status = -EIO; goto out; } @@ -2211,7 +2211,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, }; int status; - dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __FUNCTION__, + dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __func__, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long long)cookie); @@ -2223,7 +2223,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, nfs_invalidate_atime(dir); - dprintk("%s: returns %d\n", __FUNCTION__, status); + dprintk("%s: returns %d\n", __func__, status); return status; } @@ -3342,7 +3342,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) struct nfs4_lockdata *data = calldata; struct nfs4_state *state = data->lsp->ls_state; - dprintk("%s: begin!\n", __FUNCTION__); + dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) return; /* Do we need to do an open_to_lock_owner? */ @@ -3356,14 +3356,14 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) data->arg.new_lock_owner = 0; data->timestamp = jiffies; rpc_call_start(task); - dprintk("%s: done!, ret = %d\n", __FUNCTION__, data->rpc_status); + dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); } static void nfs4_lock_done(struct rpc_task *task, void *calldata) { struct nfs4_lockdata *data = calldata; - dprintk("%s: begin!\n", __FUNCTION__); + dprintk("%s: begin!\n", __func__); data->rpc_status = task->tk_status; if (RPC_ASSASSINATED(task)) @@ -3381,14 +3381,14 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); } out: - dprintk("%s: done, ret = %d!\n", __FUNCTION__, data->rpc_status); + dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); } static void nfs4_lock_release(void *calldata) { struct nfs4_lockdata *data = calldata; - dprintk("%s: begin!\n", __FUNCTION__); + dprintk("%s: begin!\n", __func__); nfs_free_seqid(data->arg.open_seqid); if (data->cancelled != 0) { struct rpc_task *task; @@ -3396,13 +3396,13 @@ static void nfs4_lock_release(void *calldata) data->arg.lock_seqid); if (!IS_ERR(task)) rpc_put_task(task); - dprintk("%s: cancelling lock!\n", __FUNCTION__); + dprintk("%s: cancelling lock!\n", __func__); } else nfs_free_seqid(data->arg.lock_seqid); nfs4_put_lock_state(data->lsp); put_nfs_open_context(data->ctx); kfree(data); - dprintk("%s: done!\n", __FUNCTION__); + dprintk("%s: done!\n", __func__); } static const struct rpc_call_ops nfs4_lock_ops = { @@ -3428,7 +3428,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f }; int ret; - dprintk("%s: begin!\n", __FUNCTION__); + dprintk("%s: begin!\n", __func__); data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), fl->fl_u.nfs4_fl.owner); if (data == NULL) @@ -3451,7 +3451,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f } else data->cancelled = 1; rpc_put_task(task); - dprintk("%s: done, ret = %d!\n", __FUNCTION__, ret); + dprintk("%s: done, ret = %d!\n", __func__, ret); return ret; } @@ -3527,7 +3527,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock /* Note: we always want to sleep here! */ request->fl_flags = fl_flags | FL_SLEEP; if (do_vfs_lock(request->fl_file, request) < 0) - printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); out_unlock: up_read(&clp->cl_sem); out: @@ -3665,12 +3665,12 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, }; int status; - dprintk("%s: start\n", __FUNCTION__); + dprintk("%s: start\n", __func__); nfs_fattr_init(&fs_locations->fattr); fs_locations->server = server; fs_locations->nlocations = 0; status = rpc_call_sync(server->client, &msg, 0); - dprintk("%s: returned status = %d\n", __FUNCTION__, status); + dprintk("%s: returned status = %d\n", __func__, status); return status; } diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 5e2e4af1a0e6..3305acbbe2ae 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -66,7 +66,7 @@ nfs4_renew_state(struct work_struct *work) unsigned long last, now; down_read(&clp->cl_sem); - dprintk("%s: start\n", __FUNCTION__); + dprintk("%s: start\n", __func__); /* Are there any active superblocks? */ if (list_empty(&clp->cl_superblocks)) goto out; @@ -92,17 +92,17 @@ nfs4_renew_state(struct work_struct *work) spin_lock(&clp->cl_lock); } else dprintk("%s: failed to call renewd. Reason: lease not expired \n", - __FUNCTION__); + __func__); if (timeout < 5 * HZ) /* safeguard */ timeout = 5 * HZ; dprintk("%s: requeueing work. Lease period = %ld\n", - __FUNCTION__, (timeout + HZ - 1) / HZ); + __func__, (timeout + HZ - 1) / HZ); cancel_delayed_work(&clp->cl_renewd); schedule_delayed_work(&clp->cl_renewd, timeout); spin_unlock(&clp->cl_lock); out: up_read(&clp->cl_sem); - dprintk("%s: done\n", __FUNCTION__); + dprintk("%s: done\n", __func__); } /* Must be called with clp->cl_sem locked for writes */ @@ -117,7 +117,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) if (timeout < 5 * HZ) timeout = 5 * HZ; dprintk("%s: requeueing work. Lease period = %ld\n", - __FUNCTION__, (timeout + HZ - 1) / HZ); + __func__, (timeout + HZ - 1) / HZ); cancel_delayed_work(&clp->cl_renewd); schedule_delayed_work(&clp->cl_renewd, timeout); set_bit(NFS_CS_RENEWD, &clp->cl_res_state); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 46eb624e4f16..5a1e02c8b75e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -828,7 +828,7 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s switch (status) { default: printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", - __FUNCTION__, status); + __func__, status); case -NFS4ERR_EXPIRED: case -NFS4ERR_NO_GRACE: case -NFS4ERR_RECLAIM_BAD: @@ -869,14 +869,14 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n list_for_each_entry(lock, &state->lock_states, ls_locks) { if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) printk("%s: Lock reclaim failed!\n", - __FUNCTION__); + __func__); } continue; } switch (status) { default: printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", - __FUNCTION__, status); + __func__, status); case -ENOENT: case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_CONFLICT: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5a2d64927b35..b916297d2334 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1831,7 +1831,7 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->count); dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", - __FUNCTION__, replen, args->pages, + __func__, replen, args->pages, args->pgbase, args->count); out: @@ -2192,9 +2192,9 @@ out: p = xdr_inline_decode(xdr, nbytes); \ if (unlikely(!p)) { \ dprintk("nfs: %s: prematurely hit end of receive" \ - " buffer\n", __FUNCTION__); \ + " buffer\n", __func__); \ dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \ - __FUNCTION__, xdr->p, nbytes, xdr->end); \ + __func__, xdr->p, nbytes, xdr->end); \ return -EIO; \ } \ } while (0) @@ -2306,12 +2306,12 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t * READ_BUF(4); READ32(*type); if (*type < NF4REG || *type > NF4NAMEDATTR) { - dprintk("%s: bad type %d\n", __FUNCTION__, *type); + dprintk("%s: bad type %d\n", __func__, *type); return -EIO; } bitmap[0] &= ~FATTR4_WORD0_TYPE; } - dprintk("%s: type=0%o\n", __FUNCTION__, nfs_type2fmt[*type].nfs2type); + dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type].nfs2type); return 0; } @@ -2327,7 +2327,7 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t READ64(*change); bitmap[0] &= ~FATTR4_WORD0_CHANGE; } - dprintk("%s: change attribute=%Lu\n", __FUNCTION__, + dprintk("%s: change attribute=%Lu\n", __func__, (unsigned long long)*change); return 0; } @@ -2344,7 +2344,7 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t * READ64(*size); bitmap[0] &= ~FATTR4_WORD0_SIZE; } - dprintk("%s: file size=%Lu\n", __FUNCTION__, (unsigned long long)*size); + dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size); return 0; } @@ -2360,7 +2360,7 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui READ32(*res); bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; } - dprintk("%s: link support=%s\n", __FUNCTION__, *res == 0 ? "false" : "true"); + dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true"); return 0; } @@ -2376,7 +2376,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, READ32(*res); bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; } - dprintk("%s: symlink support=%s\n", __FUNCTION__, *res == 0 ? "false" : "true"); + dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true"); return 0; } @@ -2394,7 +2394,7 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs READ64(fsid->minor); bitmap[0] &= ~FATTR4_WORD0_FSID; } - dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __FUNCTION__, + dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __func__, (unsigned long long)fsid->major, (unsigned long long)fsid->minor); return 0; @@ -2412,7 +2412,7 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint READ32(*res); bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; } - dprintk("%s: file size=%u\n", __FUNCTION__, (unsigned int)*res); + dprintk("%s: file size=%u\n", __func__, (unsigned int)*res); return 0; } @@ -2428,7 +2428,7 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint READ32(*res); bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT; } - dprintk("%s: ACLs supported=%u\n", __FUNCTION__, (unsigned int)*res); + dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res); return 0; } @@ -2444,7 +2444,7 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t READ64(*fileid); bitmap[0] &= ~FATTR4_WORD0_FILEID; } - dprintk("%s: fileid=%Lu\n", __FUNCTION__, (unsigned long long)*fileid); + dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); return 0; } @@ -2460,7 +2460,7 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma READ64(*fileid); bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; } - dprintk("%s: fileid=%Lu\n", __FUNCTION__, (unsigned long long)*fileid); + dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); return 0; } @@ -2477,7 +2477,7 @@ static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin READ64(*res); bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL; } - dprintk("%s: files avail=%Lu\n", __FUNCTION__, (unsigned long long)*res); + dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res); return status; } @@ -2494,7 +2494,7 @@ static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint READ64(*res); bitmap[0] &= ~FATTR4_WORD0_FILES_FREE; } - dprintk("%s: files free=%Lu\n", __FUNCTION__, (unsigned long long)*res); + dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res); return status; } @@ -2511,7 +2511,7 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin READ64(*res); bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL; } - dprintk("%s: files total=%Lu\n", __FUNCTION__, (unsigned long long)*res); + dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res); return status; } @@ -2569,7 +2569,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st status = 0; if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) goto out; - dprintk("%s: fsroot ", __FUNCTION__); + dprintk("%s: fsroot ", __func__); status = decode_pathname(xdr, &res->fs_path); if (unlikely(status != 0)) goto out; @@ -2586,7 +2586,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st READ32(m); loc->nservers = 0; - dprintk("%s: servers ", __FUNCTION__); + dprintk("%s: servers ", __func__); while (loc->nservers < m) { struct nfs4_string *server = &loc->servers[loc->nservers]; status = decode_opaque_inline(xdr, &server->len, &server->data); @@ -2599,7 +2599,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st unsigned int i; dprintk("%s: using first %u of %u servers " "returned for location %u\n", - __FUNCTION__, + __func__, NFS4_FS_LOCATION_MAXSERVERS, m, res->nlocations); for (i = loc->nservers; i < m; i++) { @@ -2618,7 +2618,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st res->nlocations++; } out: - dprintk("%s: fs_locations done, error = %d\n", __FUNCTION__, status); + dprintk("%s: fs_locations done, error = %d\n", __func__, status); return status; out_eio: status = -EIO; @@ -2638,7 +2638,7 @@ static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uin READ64(*res); bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE; } - dprintk("%s: maxfilesize=%Lu\n", __FUNCTION__, (unsigned long long)*res); + dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res); return status; } @@ -2655,7 +2655,7 @@ static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_ READ32(*maxlink); bitmap[0] &= ~FATTR4_WORD0_MAXLINK; } - dprintk("%s: maxlink=%u\n", __FUNCTION__, *maxlink); + dprintk("%s: maxlink=%u\n", __func__, *maxlink); return status; } @@ -2672,7 +2672,7 @@ static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_ READ32(*maxname); bitmap[0] &= ~FATTR4_WORD0_MAXNAME; } - dprintk("%s: maxname=%u\n", __FUNCTION__, *maxname); + dprintk("%s: maxname=%u\n", __func__, *maxname); return status; } @@ -2693,7 +2693,7 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_ *res = (uint32_t)maxread; bitmap[0] &= ~FATTR4_WORD0_MAXREAD; } - dprintk("%s: maxread=%lu\n", __FUNCTION__, (unsigned long)*res); + dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res); return status; } @@ -2714,7 +2714,7 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32 *res = (uint32_t)maxwrite; bitmap[0] &= ~FATTR4_WORD0_MAXWRITE; } - dprintk("%s: maxwrite=%lu\n", __FUNCTION__, (unsigned long)*res); + dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res); return status; } @@ -2731,7 +2731,7 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t * *mode &= ~S_IFMT; bitmap[1] &= ~FATTR4_WORD1_MODE; } - dprintk("%s: file mode=0%o\n", __FUNCTION__, (unsigned int)*mode); + dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode); return 0; } @@ -2747,7 +2747,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t READ32(*nlink); bitmap[1] &= ~FATTR4_WORD1_NUMLINKS; } - dprintk("%s: nlink=%u\n", __FUNCTION__, (unsigned int)*nlink); + dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink); return 0; } @@ -2766,13 +2766,13 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf if (len < XDR_MAX_NETOBJ) { if (nfs_map_name_to_uid(clp, (char *)p, len, uid) != 0) dprintk("%s: nfs_map_name_to_uid failed!\n", - __FUNCTION__); + __func__); } else dprintk("%s: name too long (%u)!\n", - __FUNCTION__, len); + __func__, len); bitmap[1] &= ~FATTR4_WORD1_OWNER; } - dprintk("%s: uid=%d\n", __FUNCTION__, (int)*uid); + dprintk("%s: uid=%d\n", __func__, (int)*uid); return 0; } @@ -2791,13 +2791,13 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf if (len < XDR_MAX_NETOBJ) { if (nfs_map_group_to_gid(clp, (char *)p, len, gid) != 0) dprintk("%s: nfs_map_group_to_gid failed!\n", - __FUNCTION__); + __func__); } else dprintk("%s: name too long (%u)!\n", - __FUNCTION__, len); + __func__, len); bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; } - dprintk("%s: gid=%d\n", __FUNCTION__, (int)*gid); + dprintk("%s: gid=%d\n", __func__, (int)*gid); return 0; } @@ -2820,7 +2820,7 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde *rdev = tmp; bitmap[1] &= ~ FATTR4_WORD1_RAWDEV; } - dprintk("%s: rdev=(0x%x:0x%x)\n", __FUNCTION__, major, minor); + dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor); return 0; } @@ -2837,7 +2837,7 @@ static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin READ64(*res); bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL; } - dprintk("%s: space avail=%Lu\n", __FUNCTION__, (unsigned long long)*res); + dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res); return status; } @@ -2854,7 +2854,7 @@ static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint READ64(*res); bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE; } - dprintk("%s: space free=%Lu\n", __FUNCTION__, (unsigned long long)*res); + dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res); return status; } @@ -2871,7 +2871,7 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin READ64(*res); bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL; } - dprintk("%s: space total=%Lu\n", __FUNCTION__, (unsigned long long)*res); + dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res); return status; } @@ -2887,7 +2887,7 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint READ64(*used); bitmap[1] &= ~FATTR4_WORD1_SPACE_USED; } - dprintk("%s: space used=%Lu\n", __FUNCTION__, + dprintk("%s: space used=%Lu\n", __func__, (unsigned long long)*used); return 0; } @@ -2918,7 +2918,7 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str status = decode_attr_time(xdr, time); bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS; } - dprintk("%s: atime=%ld\n", __FUNCTION__, (long)time->tv_sec); + dprintk("%s: atime=%ld\n", __func__, (long)time->tv_sec); return status; } @@ -2934,7 +2934,7 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s status = decode_attr_time(xdr, time); bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA; } - dprintk("%s: ctime=%ld\n", __FUNCTION__, (long)time->tv_sec); + dprintk("%s: ctime=%ld\n", __func__, (long)time->tv_sec); return status; } @@ -2950,7 +2950,7 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str status = decode_attr_time(xdr, time); bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY; } - dprintk("%s: mtime=%ld\n", __FUNCTION__, (long)time->tv_sec); + dprintk("%s: mtime=%ld\n", __func__, (long)time->tv_sec); return status; } @@ -2962,7 +2962,7 @@ static int verify_attr_len(struct xdr_stream *xdr, __be32 *savep, uint32_t attrl if (unlikely(attrwords != nwords)) { dprintk("%s: server returned incorrect attribute length: " "%u %c %u\n", - __FUNCTION__, + __func__, attrwords << 2, (attrwords < nwords) ? '<' : '>', nwords << 2); @@ -3067,7 +3067,7 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: - dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status); + dprintk("%s: xdr returned %d!\n", __func__, -status); return status; } @@ -3100,7 +3100,7 @@ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) status = verify_attr_len(xdr, savep, attrlen); xdr_error: - dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status); + dprintk("%s: xdr returned %d!\n", __func__, -status); return status; } @@ -3125,7 +3125,7 @@ static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf status = verify_attr_len(xdr, savep, attrlen); xdr_error: - dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status); + dprintk("%s: xdr returned %d!\n", __func__, -status); return status; } @@ -3193,7 +3193,7 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; xdr_error: - dprintk("%s: xdr returned %d\n", __FUNCTION__, -status); + dprintk("%s: xdr returned %d\n", __func__, -status); return status; } @@ -3226,7 +3226,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) status = verify_attr_len(xdr, savep, attrlen); xdr_error: - dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status); + dprintk("%s: xdr returned %d!\n", __func__, -status); return status; } @@ -3418,7 +3418,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) return decode_delegation(xdr, res); xdr_error: - dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen); + dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); return -EIO; } @@ -3575,7 +3575,7 @@ short_pkt: * the call was successful, but incomplete. The caller can retry the * readdir starting at the last cookie. */ - dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr); + dprintk("%s: short packet at entry %d\n", __func__, nr); entry[0] = entry[1] = 0; if (nr) goto out; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 5ccf7faee19c..03599bfe81cf 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -63,17 +63,17 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, }; int status; - dprintk("%s: call getattr\n", __FUNCTION__); + dprintk("%s: call getattr\n", __func__); nfs_fattr_init(fattr); status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); - dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); + dprintk("%s: reply getattr: %d\n", __func__, status); if (status) return status; - dprintk("%s: call statfs\n", __FUNCTION__); + dprintk("%s: call statfs\n", __func__); msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; msg.rpc_resp = &fsinfo; status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); - dprintk("%s: reply statfs: %d\n", __FUNCTION__, status); + dprintk("%s: reply statfs: %d\n", __func__, status); if (status) return status; info->rtmax = NFS_MAXDATA; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 16f57e0af999..40d17987d0e8 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -329,7 +329,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) { int status; - dprintk("NFS: %s: %5u, (status %d)\n", __FUNCTION__, task->tk_pid, + dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, task->tk_status); status = NFS_PROTO(data->inode)->read_done(task, data); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5ed86ac0fd9b..2a4a024a4e7b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -405,7 +405,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; out_err: - dprintk("%s: statfs error = %d\n", __FUNCTION__, -error); + dprintk("%s: statfs error = %d\n", __func__, -error); unlock_kernel(); return error; } -- cgit v1.2.3-70-g09d2 From 31f31db1a15671513df9cd9fbe56ef45ee1e9a2a Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Fri, 2 May 2008 13:42:45 -0700 Subject: nfs: path_{get,put}() cleanups Here are some more places where path_{get,put}() can be used instead of dput()/mntput() pair. Signed-off-by: Jan Blunck Cc: Trond Myklebust Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 3 +-- fs/nfs/namespace.c | 3 +-- fs/nfs/nfs4proc.c | 6 ++---- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 421d338c698c..596c5d8e86f4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -541,8 +541,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait) } if (ctx->cred != NULL) put_rpccred(ctx->cred); - dput(ctx->path.dentry); - mntput(ctx->path.mnt); + path_put(&ctx->path); kfree(ctx); } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index fca518006a52..2f285ef76399 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -137,8 +137,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) goto out_follow; goto out_err; } - mntput(nd->path.mnt); - dput(nd->path.dentry); + path_put(&nd->path); nd->path.mnt = mnt; nd->path.dentry = dget(mnt->mnt_root); schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f533318b005f..1293e0acd82b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -306,8 +306,7 @@ static void nfs4_opendata_free(struct kref *kref) nfs4_put_open_state(p->state); nfs4_put_state_owner(p->owner); dput(p->dir); - dput(p->path.dentry); - mntput(p->path.mnt); + path_put(&p->path); kfree(p); } @@ -1210,8 +1209,7 @@ static void nfs4_free_closedata(void *data) nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); - dput(calldata->path.dentry); - mntput(calldata->path.mnt); + path_put(&calldata->path); kfree(calldata); } -- cgit v1.2.3-70-g09d2 From 1d2e88e73ee0af52b0ed63b5fb8f42a919a4d9de Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 2 May 2008 13:42:45 -0700 Subject: nfs: make nfs4_drop_state_owner() static nfs4_drop_state_owner() can now become static. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 - fs/nfs/nfs4state.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index bd1b9d663fb9..ea790645fda6 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -206,7 +206,6 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); -extern void nfs4_drop_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5a1e02c8b75e..856a8934f610 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -282,7 +282,7 @@ nfs4_alloc_state_owner(void) return sp; } -void +static void nfs4_drop_state_owner(struct nfs4_state_owner *sp) { if (!RB_EMPTY_NODE(&sp->so_client_node)) { -- cgit v1.2.3-70-g09d2 From fec4585fd71cc5ec35d134e8c3854f6e8c4503f0 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 16 May 2008 13:06:30 +0400 Subject: CIFSGetDFSRefer cleanup + dfs_referral_level_3 fixed to conform REFERRAL_V3 the MS-DFSC spec. Signed-off-by: Igor Mammedov Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 16 ++--- fs/cifs/cifssmb.c | 205 +++++++++++++++++++++++++++++++++++------------------- 2 files changed, 139 insertions(+), 82 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index c43bf4b7a556..93d5ee02a25b 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1906,17 +1906,15 @@ typedef struct smb_com_transaction2_get_dfs_refer_req { typedef struct dfs_referral_level_3 { __le16 VersionNumber; - __le16 ReferralSize; - __le16 ServerType; /* 0x0001 = CIFS server */ - __le16 ReferralFlags; /* or proximity - not clear which since it is - always set to zero - SNIA spec says 0x01 - means strip off PathConsumed chars before - submitting RequestFileName to remote node */ - __le16 TimeToLive; - __le16 Proximity; + __le16 Size; + __le16 ServerType; /* 0x0001 = root targets; 0x0000 = link targets */ + __le16 ReferralEntryFlags; /* 0x0200 bit set only for domain + or DC referral responce */ + __le32 TimeToLive; __le16 DfsPathOffset; __le16 DfsAlternatePathOffset; - __le16 NetworkAddressOffset; + __le16 NetworkAddressOffset; /* offset of the link target */ + __le16 ServiceSiteGuid; } __attribute__((packed)) REFERRAL3; typedef struct smb_com_transaction_get_dfs_refer_rsp { diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index fc297383cb0e..6f8ed93a4ae8 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -81,6 +81,39 @@ static struct { #endif /* CONFIG_CIFS_WEAK_PW_HASH */ #endif /* CIFS_POSIX */ +/* Allocates buffer into dst and copies smb string from src to it. + * caller is responsible for freeing dst if function returned 0. + * returns: + * on success - 0 + * on failure - errno + */ +static int +cifs_strncpy_to_host(char **dst, const char *src, const int maxlen, + const bool is_unicode, const struct nls_table *nls_codepage) +{ + int plen; + + if (is_unicode) { + plen = UniStrnlen((wchar_t *)src, maxlen); + *dst = kmalloc(plen + 2, GFP_KERNEL); + if (!*dst) + goto cifs_strncpy_to_host_ErrExit; + cifs_strfromUCS_le(*dst, (__le16 *)src, plen, nls_codepage); + } else { + plen = strnlen(src, maxlen); + *dst = kmalloc(plen + 2, GFP_KERNEL); + if (!*dst) + goto cifs_strncpy_to_host_ErrExit; + strncpy(*dst, src, plen); + } + (*dst)[plen] = 0; + return 0; + +cifs_strncpy_to_host_ErrExit: + cERROR(1, ("Failed to allocate buffer for string\n")); + return -ENOMEM; +} + /* Mark as invalid, all open files on tree connections since they were closed when session to server was lost */ @@ -3867,6 +3900,96 @@ GetInodeNumOut: return rc; } +/* parses DFS refferal V3 structure + * caller is responsible for freeing target_nodes + * returns: + * on success - 0 + * on failure - errno + */ +static int +parse_DFS_REFERRALS(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, + unsigned int *num_of_nodes, + struct dfs_info3_param **target_nodes, + const struct nls_table *nls_codepage) +{ + int i, rc = 0; + char *data_end; + bool is_unicode; + struct dfs_referral_level_3 *ref; + + is_unicode = pSMBr->hdr.Flags2 & SMBFLG2_UNICODE; + *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals); + + if (*num_of_nodes < 1) { + cERROR(1, ("num_referrals: must be at least > 0," + "but we get num_referrals = %d\n", *num_of_nodes)); + rc = -EINVAL; + goto parse_DFS_REFERRALS_exit; + } + + ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals); + if (ref->VersionNumber != 3) { + cERROR(1, ("Referrals of V%d version are not supported," + "should be V3", ref->VersionNumber)); + rc = -EINVAL; + goto parse_DFS_REFERRALS_exit; + } + + /* get the upper boundary of the resp buffer */ + data_end = (char *)(&(pSMBr->PathConsumed)) + + le16_to_cpu(pSMBr->t2.DataCount); + + cFYI(1, ("num_referrals: %d dfs flags: 0x%x ... \n", + *num_of_nodes, + le16_to_cpu(pSMBr->DFSFlags))); + + *target_nodes = kzalloc(sizeof(struct dfs_info3_param) * + *num_of_nodes, GFP_KERNEL); + if (*target_nodes == NULL) { + cERROR(1, ("Failed to allocate buffer for target_nodes\n")); + rc = -ENOMEM; + goto parse_DFS_REFERRALS_exit; + } + + /* collect neccessary data from referrals */ + for (i = 0; i < *num_of_nodes; i++) { + char *temp; + int max_len; + struct dfs_info3_param *node = (*target_nodes)+i; + + node->flags = le16_to_cpu(pSMBr->DFSFlags); + node->path_consumed = le16_to_cpu(pSMBr->PathConsumed); + node->server_type = le16_to_cpu(ref->ServerType); + node->ref_flag = le16_to_cpu(ref->ReferralEntryFlags); + + /* copy DfsPath */ + temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset); + max_len = data_end - temp; + rc = cifs_strncpy_to_host(&(node->path_name), temp, + max_len, is_unicode, nls_codepage); + if (rc) + goto parse_DFS_REFERRALS_exit; + + /* copy link target UNC */ + temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset); + max_len = data_end - temp; + rc = cifs_strncpy_to_host(&(node->node_name), temp, + max_len, is_unicode, nls_codepage); + if (rc) + goto parse_DFS_REFERRALS_exit; + + ref += ref->Size; + } + +parse_DFS_REFERRALS_exit: + if (rc) { + free_dfs_info_array(*target_nodes, *num_of_nodes); + *target_nodes = NULL; + *num_of_nodes = 0; + } + return rc; +} + int CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, const unsigned char *searchName, @@ -3877,12 +4000,9 @@ CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, /* TRANS2_GET_DFS_REFERRAL */ TRANSACTION2_GET_DFS_REFER_REQ *pSMB = NULL; TRANSACTION2_GET_DFS_REFER_RSP *pSMBr = NULL; - struct dfs_referral_level_3 *referrals = NULL; int rc = 0; int bytes_returned; int name_len; - unsigned int i; - char *temp; __u16 params, byte_count; *num_of_nodes = 0; *target_nodes = NULL; @@ -3960,80 +4080,19 @@ getDFSRetry: rc = validate_t2((struct smb_t2_rsp *)pSMBr); /* BB Also check if enough total bytes returned? */ - if (rc || (pSMBr->ByteCount < 17)) + if (rc || (pSMBr->ByteCount < 17)) { rc = -EIO; /* bad smb */ - else { - __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); - __u16 data_count = le16_to_cpu(pSMBr->t2.DataCount); - - cFYI(1, ("Decoding GetDFSRefer response BCC: %d Offset %d", - pSMBr->ByteCount, data_offset)); - referrals = - (struct dfs_referral_level_3 *) - (8 /* sizeof start of data block */ + - data_offset + - (char *) &pSMBr->hdr.Protocol); - cFYI(1, ("num_referrals: %d dfs flags: 0x%x ... \n" - "for referral one refer size: 0x%x srv " - "type: 0x%x refer flags: 0x%x ttl: 0x%x", - le16_to_cpu(pSMBr->NumberOfReferrals), - le16_to_cpu(pSMBr->DFSFlags), - le16_to_cpu(referrals->ReferralSize), - le16_to_cpu(referrals->ServerType), - le16_to_cpu(referrals->ReferralFlags), - le16_to_cpu(referrals->TimeToLive))); - /* BB This field is actually two bytes in from start of - data block so we could do safety check that DataBlock - begins at address of pSMBr->NumberOfReferrals */ - *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals); - - /* BB Fix below so can return more than one referral */ - if (*num_of_nodes > 1) - *num_of_nodes = 1; - - /* get the length of the strings describing refs */ - name_len = 0; - for (i = 0; i < *num_of_nodes; i++) { - /* make sure that DfsPathOffset not past end */ - __u16 offset = le16_to_cpu(referrals->DfsPathOffset); - if (offset > data_count) { - /* if invalid referral, stop here and do - not try to copy any more */ - *num_of_nodes = i; - break; - } - temp = ((char *)referrals) + offset; + goto GetDFSRefExit; + } - if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len += UniStrnlen((wchar_t *)temp, - data_count); - } else { - name_len += strnlen(temp, data_count); - } - referrals++; - /* BB add check that referral pointer does - not fall off end PDU */ - } - /* BB add check for name_len bigger than bcc */ - *target_nodes = - kmalloc(name_len+1+(*num_of_nodes), - GFP_KERNEL); - if (*target_nodes == NULL) { - rc = -ENOMEM; - goto GetDFSRefExit; - } + cFYI(1, ("Decoding GetDFSRefer response BCC: %d Offset %d", + pSMBr->ByteCount, + le16_to_cpu(pSMBr->t2.DataOffset))); - referrals = (struct dfs_referral_level_3 *) - (8 /* sizeof data hdr */ + data_offset + - (char *) &pSMBr->hdr.Protocol); + /* parse returned result into more usable form */ + rc = parse_DFS_REFERRALS(pSMBr, num_of_nodes, + target_nodes, nls_codepage); - for (i = 0; i < *num_of_nodes; i++) { - temp = ((char *)referrals) + - le16_to_cpu(referrals->DfsPathOffset); - /* BB update target_uncs pointers */ - referrals++; - } - } GetDFSRefExit: if (pSMB) cifs_buf_release(pSMB); -- cgit v1.2.3-70-g09d2 From a1fe78f16eac7d03d3c391dd5d54559826574982 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 16 May 2008 18:48:38 +0000 Subject: [CIFS] Add missing defines for DFS Also has minor cleanup of previous patch CC: Igor Mammedov Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 9 +++++++++ fs/cifs/cifssmb.c | 17 +++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 93d5ee02a25b..65d58b4e6a61 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1904,6 +1904,15 @@ typedef struct smb_com_transaction2_get_dfs_refer_req { char RequestFileName[1]; } __attribute__((packed)) TRANSACTION2_GET_DFS_REFER_REQ; +#define DFS_VERSION cpu_to_le16(0x0003) + +/* DFS server target type */ +#define DFS_TYPE_LINK 0x0000 /* also for sysvol targets */ +#define DFS_TYPE_ROOT 0x0001 + +/* Referral Entry Flags */ +#define DFS_NAME_LIST_REF 0x0200 + typedef struct dfs_referral_level_3 { __le16 VersionNumber; __le16 Size; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 6f8ed93a4ae8..7b9938445b07 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -107,6 +107,7 @@ cifs_strncpy_to_host(char **dst, const char *src, const int maxlen, strncpy(*dst, src, plen); } (*dst)[plen] = 0; + (*dst)[plen+1] = 0; /* harmless for ASCII case, needed for Unicode */ return 0; cifs_strncpy_to_host_ErrExit: @@ -3907,7 +3908,7 @@ GetInodeNumOut: * on failure - errno */ static int -parse_DFS_REFERRALS(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, +parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, unsigned int *num_of_nodes, struct dfs_info3_param **target_nodes, const struct nls_table *nls_codepage) @@ -3924,7 +3925,7 @@ parse_DFS_REFERRALS(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, cERROR(1, ("num_referrals: must be at least > 0," "but we get num_referrals = %d\n", *num_of_nodes)); rc = -EINVAL; - goto parse_DFS_REFERRALS_exit; + goto parse_DFS_referrals_exit; } ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals); @@ -3932,7 +3933,7 @@ parse_DFS_REFERRALS(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, cERROR(1, ("Referrals of V%d version are not supported," "should be V3", ref->VersionNumber)); rc = -EINVAL; - goto parse_DFS_REFERRALS_exit; + goto parse_DFS_referrals_exit; } /* get the upper boundary of the resp buffer */ @@ -3948,7 +3949,7 @@ parse_DFS_REFERRALS(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, if (*target_nodes == NULL) { cERROR(1, ("Failed to allocate buffer for target_nodes\n")); rc = -ENOMEM; - goto parse_DFS_REFERRALS_exit; + goto parse_DFS_referrals_exit; } /* collect neccessary data from referrals */ @@ -3968,7 +3969,7 @@ parse_DFS_REFERRALS(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, rc = cifs_strncpy_to_host(&(node->path_name), temp, max_len, is_unicode, nls_codepage); if (rc) - goto parse_DFS_REFERRALS_exit; + goto parse_DFS_referrals_exit; /* copy link target UNC */ temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset); @@ -3976,12 +3977,12 @@ parse_DFS_REFERRALS(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, rc = cifs_strncpy_to_host(&(node->node_name), temp, max_len, is_unicode, nls_codepage); if (rc) - goto parse_DFS_REFERRALS_exit; + goto parse_DFS_referrals_exit; ref += ref->Size; } -parse_DFS_REFERRALS_exit: +parse_DFS_referrals_exit: if (rc) { free_dfs_info_array(*target_nodes, *num_of_nodes); *target_nodes = NULL; @@ -4090,7 +4091,7 @@ getDFSRetry: le16_to_cpu(pSMBr->t2.DataOffset))); /* parse returned result into more usable form */ - rc = parse_DFS_REFERRALS(pSMBr, num_of_nodes, + rc = parse_DFS_referrals(pSMBr, num_of_nodes, target_nodes, nls_codepage); GetDFSRefExit: -- cgit v1.2.3-70-g09d2 From f52111b1546943545e67573c4dde1c7613ca33d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 18:19:16 -0400 Subject: [PATCH] take init_files to fs/file.c Signed-off-by: Al Viro --- arch/alpha/kernel/init_task.c | 1 - arch/arm/kernel/init_task.c | 1 - arch/avr32/kernel/init_task.c | 1 - arch/blackfin/kernel/init_task.c | 1 - arch/cris/kernel/process.c | 1 - arch/frv/kernel/init_task.c | 1 - arch/h8300/kernel/init_task.c | 1 - arch/ia64/kernel/init_task.c | 1 - arch/m32r/kernel/init_task.c | 1 - arch/m68k/kernel/process.c | 1 - arch/m68knommu/kernel/init_task.c | 1 - arch/mips/kernel/init_task.c | 1 - arch/mn10300/kernel/init_task.c | 1 - arch/parisc/kernel/init_task.c | 1 - arch/powerpc/kernel/init_task.c | 1 - arch/s390/kernel/init_task.c | 1 - arch/sh/kernel/init_task.c | 1 - arch/sparc/kernel/init_task.c | 1 - arch/sparc64/kernel/init_task.c | 1 - arch/um/kernel/init_task.c | 1 - arch/v850/kernel/init_task.c | 1 - arch/x86/kernel/init_task.c | 1 - arch/xtensa/kernel/init_task.c | 1 - fs/file.c | 13 +++++++++++++ include/linux/init_task.h | 23 +---------------------- 25 files changed, 14 insertions(+), 45 deletions(-) (limited to 'fs') diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c index 835d09a7b332..1f762189fa64 100644 --- a/arch/alpha/kernel/init_task.c +++ b/arch/alpha/kernel/init_task.c @@ -9,7 +9,6 @@ static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c index bd4ef53bc6b9..8b8c9d38a761 100644 --- a/arch/arm/kernel/init_task.c +++ b/arch/arm/kernel/init_task.c @@ -13,7 +13,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c index effcacf9d1a2..44058469c6ec 100644 --- a/arch/avr32/kernel/init_task.c +++ b/arch/avr32/kernel/init_task.c @@ -14,7 +14,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c index c640154030e2..6bdba7b21109 100644 --- a/arch/blackfin/kernel/init_task.c +++ b/arch/blackfin/kernel/init_task.c @@ -34,7 +34,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index ef2db8fd102a..5933656db5a2 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -38,7 +38,6 @@ */ static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c index 22993932b3fc..e2198815b630 100644 --- a/arch/frv/kernel/init_task.c +++ b/arch/frv/kernel/init_task.c @@ -11,7 +11,6 @@ static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c index 19272c2ac56a..93a4899e46c2 100644 --- a/arch/h8300/kernel/init_task.c +++ b/arch/h8300/kernel/init_task.c @@ -13,7 +13,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index bc8efcad28b8..9d7e1c66faf4 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -18,7 +18,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c index 9e508fd9d970..0d658dbb6766 100644 --- a/arch/m32r/kernel/init_task.c +++ b/arch/m32r/kernel/init_task.c @@ -12,7 +12,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 5de4e4ed76ab..7888cdf91f5d 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -41,7 +41,6 @@ * setup. */ static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c index 3897043a126a..344c01aede08 100644 --- a/arch/m68knommu/kernel/init_task.c +++ b/arch/m68knommu/kernel/init_task.c @@ -13,7 +13,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c index aeda7f58391b..d72487ad7c15 100644 --- a/arch/mips/kernel/init_task.c +++ b/arch/mips/kernel/init_task.c @@ -10,7 +10,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c index 39fe6882dd1d..af16f6e5c918 100644 --- a/arch/mn10300/kernel/init_task.c +++ b/arch/mn10300/kernel/init_task.c @@ -19,7 +19,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c index 26198a074d67..f5941c086551 100644 --- a/arch/parisc/kernel/init_task.c +++ b/arch/parisc/kernel/init_task.c @@ -35,7 +35,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c index 941043ae040f..4c85b8d56478 100644 --- a/arch/powerpc/kernel/init_task.c +++ b/arch/powerpc/kernel/init_task.c @@ -8,7 +8,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c index d494161b05b4..7ad003969251 100644 --- a/arch/s390/kernel/init_task.c +++ b/arch/s390/kernel/init_task.c @@ -17,7 +17,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c index f9bcc606127e..b151a25cb14d 100644 --- a/arch/sh/kernel/init_task.c +++ b/arch/sh/kernel/init_task.c @@ -8,7 +8,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct pt_regs fake_swapper_regs; diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c index d9d4f96360c7..8e64ebc445ef 100644 --- a/arch/sparc/kernel/init_task.c +++ b/arch/sparc/kernel/init_task.c @@ -9,7 +9,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/sparc64/kernel/init_task.c b/arch/sparc64/kernel/init_task.c index 90007cf88bac..d2b312381c19 100644 --- a/arch/sparc64/kernel/init_task.c +++ b/arch/sparc64/kernel/init_task.c @@ -10,7 +10,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c index dcfceca95052..910eda8fca18 100644 --- a/arch/um/kernel/init_task.c +++ b/arch/um/kernel/init_task.c @@ -12,7 +12,6 @@ static struct fs_struct init_fs = INIT_FS; struct mm_struct init_mm = INIT_MM(init_mm); -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); EXPORT_SYMBOL(init_mm); diff --git a/arch/v850/kernel/init_task.c b/arch/v850/kernel/init_task.c index ed2f93cf7c66..44b274dff33f 100644 --- a/arch/v850/kernel/init_task.c +++ b/arch/v850/kernel/init_task.c @@ -21,7 +21,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS (init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM (init_mm); diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 3d01e47777db..a4f93b4120c1 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -11,7 +11,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c index 021b4f46ff94..3df469dbe814 100644 --- a/arch/xtensa/kernel/init_task.c +++ b/arch/xtensa/kernel/init_task.c @@ -22,7 +22,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/fs/file.c b/fs/file.c index 4c6f0ea12c41..754cd05b06af 100644 --- a/fs/file.c +++ b/fs/file.c @@ -275,3 +275,16 @@ void __init files_defer_init(void) for_each_possible_cpu(i) fdtable_defer_list_init(i); } + +struct files_struct init_files = { + .count = ATOMIC_INIT(1), + .fdt = &init_files.fdtab, + .fdtab = { + .max_fds = NR_OPEN_DEFAULT, + .fd = &init_files.fd_array[0], + .close_on_exec = (fd_set *)&init_files.close_on_exec_init, + .open_fds = (fd_set *)&init_files.open_fds_init, + .rcu = RCU_HEAD_INIT, + }, + .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), +}; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index b24c2875aa05..9927a88674a3 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -1,7 +1,6 @@ #ifndef _LINUX__INIT_TASK_H #define _LINUX__INIT_TASK_H -#include #include #include #include @@ -12,27 +11,7 @@ #include #include -#define INIT_FDTABLE \ -{ \ - .max_fds = NR_OPEN_DEFAULT, \ - .fd = &init_files.fd_array[0], \ - .close_on_exec = (fd_set *)&init_files.close_on_exec_init, \ - .open_fds = (fd_set *)&init_files.open_fds_init, \ - .rcu = RCU_HEAD_INIT, \ - .next = NULL, \ -} - -#define INIT_FILES \ -{ \ - .count = ATOMIC_INIT(1), \ - .fdt = &init_files.fdtab, \ - .fdtab = INIT_FDTABLE, \ - .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), \ - .next_fd = 0, \ - .close_on_exec_init = { { 0, } }, \ - .open_fds_init = { { 0, } }, \ - .fd_array = { NULL, } \ -} +extern struct files_struct init_files; #define INIT_KIOCTX(name, which_mm) \ { \ -- cgit v1.2.3-70-g09d2 From 02afc6267f6d55d47aba9fcafdbd1b7230d2294a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 19:42:56 -0400 Subject: [PATCH] dup_fd() fixes, part 1 Move the sucker to fs/file.c in preparation to the rest Signed-off-by: Al Viro --- fs/file.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fdtable.h | 1 + kernel/fork.c | 130 ------------------------------------------------ 3 files changed, 131 insertions(+), 130 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 754cd05b06af..7dbadaaf00f0 100644 --- a/fs/file.c +++ b/fs/file.c @@ -261,6 +261,136 @@ int expand_files(struct files_struct *files, int nr) return expand_fdtable(files, nr); } +static int count_open_files(struct fdtable *fdt) +{ + int size = fdt->max_fds; + int i; + + /* Find the last open fd */ + for (i = size/(8*sizeof(long)); i > 0; ) { + if (fdt->open_fds->fds_bits[--i]) + break; + } + i = (i+1) * 8 * sizeof(long); + return i; +} + +static struct files_struct *alloc_files(void) +{ + struct files_struct *newf; + struct fdtable *fdt; + + newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); + if (!newf) + goto out; + + atomic_set(&newf->count, 1); + + spin_lock_init(&newf->file_lock); + newf->next_fd = 0; + fdt = &newf->fdtab; + fdt->max_fds = NR_OPEN_DEFAULT; + fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; + fdt->open_fds = (fd_set *)&newf->open_fds_init; + fdt->fd = &newf->fd_array[0]; + INIT_RCU_HEAD(&fdt->rcu); + fdt->next = NULL; + rcu_assign_pointer(newf->fdt, fdt); +out: + return newf; +} + +/* + * Allocate a new files structure and copy contents from the + * passed in files structure. + * errorp will be valid only when the returned files_struct is NULL. + */ +struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) +{ + struct files_struct *newf; + struct file **old_fds, **new_fds; + int open_files, size, i; + struct fdtable *old_fdt, *new_fdt; + + *errorp = -ENOMEM; + newf = alloc_files(); + if (!newf) + goto out; + + spin_lock(&oldf->file_lock); + old_fdt = files_fdtable(oldf); + new_fdt = files_fdtable(newf); + open_files = count_open_files(old_fdt); + + /* + * Check whether we need to allocate a larger fd array and fd set. + * Note: we're not a clone task, so the open count won't change. + */ + if (open_files > new_fdt->max_fds) { + new_fdt->max_fds = 0; + spin_unlock(&oldf->file_lock); + spin_lock(&newf->file_lock); + *errorp = expand_files(newf, open_files-1); + spin_unlock(&newf->file_lock); + if (*errorp < 0) + goto out_release; + new_fdt = files_fdtable(newf); + /* + * Reacquire the oldf lock and a pointer to its fd table + * who knows it may have a new bigger fd table. We need + * the latest pointer. + */ + spin_lock(&oldf->file_lock); + old_fdt = files_fdtable(oldf); + } + + old_fds = old_fdt->fd; + new_fds = new_fdt->fd; + + memcpy(new_fdt->open_fds->fds_bits, + old_fdt->open_fds->fds_bits, open_files/8); + memcpy(new_fdt->close_on_exec->fds_bits, + old_fdt->close_on_exec->fds_bits, open_files/8); + + for (i = open_files; i != 0; i--) { + struct file *f = *old_fds++; + if (f) { + get_file(f); + } else { + /* + * The fd may be claimed in the fd bitmap but not yet + * instantiated in the files array if a sibling thread + * is partway through open(). So make sure that this + * fd is available to the new process. + */ + FD_CLR(open_files - i, new_fdt->open_fds); + } + rcu_assign_pointer(*new_fds++, f); + } + spin_unlock(&oldf->file_lock); + + /* compute the remainder to be cleared */ + size = (new_fdt->max_fds - open_files) * sizeof(struct file *); + + /* This is long word aligned thus could use a optimized version */ + memset(new_fds, 0, size); + + if (new_fdt->max_fds > open_files) { + int left = (new_fdt->max_fds-open_files)/8; + int start = open_files / (8 * sizeof(unsigned long)); + + memset(&new_fdt->open_fds->fds_bits[start], 0, left); + memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); + } + + return newf; + +out_release: + kmem_cache_free(files_cachep, newf); +out: + return NULL; +} + static void __devinit fdtable_defer_list_init(int cpu) { struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index a118f3c0b240..4aab6f12cfab 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -93,6 +93,7 @@ struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); +struct files_struct *dup_fd(struct files_struct *, int *); extern struct kmem_cache *files_cachep; diff --git a/kernel/fork.c b/kernel/fork.c index 933e60ebccae..19908b26cf80 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -660,136 +660,6 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) return 0; } -static int count_open_files(struct fdtable *fdt) -{ - int size = fdt->max_fds; - int i; - - /* Find the last open fd */ - for (i = size/(8*sizeof(long)); i > 0; ) { - if (fdt->open_fds->fds_bits[--i]) - break; - } - i = (i+1) * 8 * sizeof(long); - return i; -} - -static struct files_struct *alloc_files(void) -{ - struct files_struct *newf; - struct fdtable *fdt; - - newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); - if (!newf) - goto out; - - atomic_set(&newf->count, 1); - - spin_lock_init(&newf->file_lock); - newf->next_fd = 0; - fdt = &newf->fdtab; - fdt->max_fds = NR_OPEN_DEFAULT; - fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; - fdt->open_fds = (fd_set *)&newf->open_fds_init; - fdt->fd = &newf->fd_array[0]; - INIT_RCU_HEAD(&fdt->rcu); - fdt->next = NULL; - rcu_assign_pointer(newf->fdt, fdt); -out: - return newf; -} - -/* - * Allocate a new files structure and copy contents from the - * passed in files structure. - * errorp will be valid only when the returned files_struct is NULL. - */ -static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) -{ - struct files_struct *newf; - struct file **old_fds, **new_fds; - int open_files, size, i; - struct fdtable *old_fdt, *new_fdt; - - *errorp = -ENOMEM; - newf = alloc_files(); - if (!newf) - goto out; - - spin_lock(&oldf->file_lock); - old_fdt = files_fdtable(oldf); - new_fdt = files_fdtable(newf); - open_files = count_open_files(old_fdt); - - /* - * Check whether we need to allocate a larger fd array and fd set. - * Note: we're not a clone task, so the open count won't change. - */ - if (open_files > new_fdt->max_fds) { - new_fdt->max_fds = 0; - spin_unlock(&oldf->file_lock); - spin_lock(&newf->file_lock); - *errorp = expand_files(newf, open_files-1); - spin_unlock(&newf->file_lock); - if (*errorp < 0) - goto out_release; - new_fdt = files_fdtable(newf); - /* - * Reacquire the oldf lock and a pointer to its fd table - * who knows it may have a new bigger fd table. We need - * the latest pointer. - */ - spin_lock(&oldf->file_lock); - old_fdt = files_fdtable(oldf); - } - - old_fds = old_fdt->fd; - new_fds = new_fdt->fd; - - memcpy(new_fdt->open_fds->fds_bits, - old_fdt->open_fds->fds_bits, open_files/8); - memcpy(new_fdt->close_on_exec->fds_bits, - old_fdt->close_on_exec->fds_bits, open_files/8); - - for (i = open_files; i != 0; i--) { - struct file *f = *old_fds++; - if (f) { - get_file(f); - } else { - /* - * The fd may be claimed in the fd bitmap but not yet - * instantiated in the files array if a sibling thread - * is partway through open(). So make sure that this - * fd is available to the new process. - */ - FD_CLR(open_files - i, new_fdt->open_fds); - } - rcu_assign_pointer(*new_fds++, f); - } - spin_unlock(&oldf->file_lock); - - /* compute the remainder to be cleared */ - size = (new_fdt->max_fds - open_files) * sizeof(struct file *); - - /* This is long word aligned thus could use a optimized version */ - memset(new_fds, 0, size); - - if (new_fdt->max_fds > open_files) { - int left = (new_fdt->max_fds-open_files)/8; - int start = open_files / (8 * sizeof(unsigned long)); - - memset(&new_fdt->open_fds->fds_bits[start], 0, left); - memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); - } - - return newf; - -out_release: - kmem_cache_free(files_cachep, newf); -out: - return NULL; -} - static int copy_files(unsigned long clone_flags, struct task_struct * tsk) { struct files_struct *oldf, *newf; -- cgit v1.2.3-70-g09d2 From 9dec3c4d306b09b31331e475e895bb9674e16d81 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 21:02:45 -0400 Subject: [PATCH] dup_fd() part 2 use alloc_fdtable() instead of expand_files(), get rid of pointless grabbing newf->file_lock, kill magic in copy_fdtable() that used to be there only to skip copying when called from dup_fd(). Signed-off-by: Al Viro --- fs/file.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 7dbadaaf00f0..6491b2b5bc38 100644 --- a/fs/file.c +++ b/fs/file.c @@ -119,8 +119,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) unsigned int cpy, set; BUG_ON(nfdt->max_fds < ofdt->max_fds); - if (ofdt->max_fds == 0) - return; cpy = ofdt->max_fds * sizeof(struct file *); set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); @@ -327,14 +325,24 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) * Note: we're not a clone task, so the open count won't change. */ if (open_files > new_fdt->max_fds) { - new_fdt->max_fds = 0; spin_unlock(&oldf->file_lock); - spin_lock(&newf->file_lock); - *errorp = expand_files(newf, open_files-1); - spin_unlock(&newf->file_lock); - if (*errorp < 0) + + new_fdt = alloc_fdtable(open_files - 1); + if (!new_fdt) { + *errorp = -ENOMEM; + goto out_release; + } + + /* beyond sysctl_nr_open; nothing to do */ + if (unlikely(new_fdt->max_fds < open_files)) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + *errorp = -EMFILE; goto out_release; - new_fdt = files_fdtable(newf); + } + rcu_assign_pointer(files->fdt, new_fdt); + /* * Reacquire the oldf lock and a pointer to its fd table * who knows it may have a new bigger fd table. We need -- cgit v1.2.3-70-g09d2 From afbec7fff4928c273a1f1bb14dfdfdf62688a193 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 21:11:17 -0400 Subject: [PATCH] dup_fd() - part 3 merge alloc_files() into dup_fd(), leave setting newf->fdt until the end Signed-off-by: Al Viro --- fs/file.c | 43 +++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 6491b2b5bc38..689d2b6947e3 100644 --- a/fs/file.c +++ b/fs/file.c @@ -273,31 +273,6 @@ static int count_open_files(struct fdtable *fdt) return i; } -static struct files_struct *alloc_files(void) -{ - struct files_struct *newf; - struct fdtable *fdt; - - newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); - if (!newf) - goto out; - - atomic_set(&newf->count, 1); - - spin_lock_init(&newf->file_lock); - newf->next_fd = 0; - fdt = &newf->fdtab; - fdt->max_fds = NR_OPEN_DEFAULT; - fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; - fdt->open_fds = (fd_set *)&newf->open_fds_init; - fdt->fd = &newf->fd_array[0]; - INIT_RCU_HEAD(&fdt->rcu); - fdt->next = NULL; - rcu_assign_pointer(newf->fdt, fdt); -out: - return newf; -} - /* * Allocate a new files structure and copy contents from the * passed in files structure. @@ -311,13 +286,24 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) struct fdtable *old_fdt, *new_fdt; *errorp = -ENOMEM; - newf = alloc_files(); + newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); if (!newf) goto out; + atomic_set(&newf->count, 1); + + spin_lock_init(&newf->file_lock); + newf->next_fd = 0; + new_fdt = &newf->fdtab; + new_fdt->max_fds = NR_OPEN_DEFAULT; + new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; + new_fdt->open_fds = (fd_set *)&newf->open_fds_init; + new_fdt->fd = &newf->fd_array[0]; + INIT_RCU_HEAD(&new_fdt->rcu); + new_fdt->next = NULL; + spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); - new_fdt = files_fdtable(newf); open_files = count_open_files(old_fdt); /* @@ -341,7 +327,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) *errorp = -EMFILE; goto out_release; } - rcu_assign_pointer(files->fdt, new_fdt); /* * Reacquire the oldf lock and a pointer to its fd table @@ -391,6 +376,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); } + rcu_assign_pointer(newf->fdt, new_fdt); + return newf; out_release: -- cgit v1.2.3-70-g09d2 From adbecb128cd2cc5d14b0ebef6d020ced0efd0ec6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 21:19:42 -0400 Subject: [PATCH] dup_fd() part 4 - race fix Parent _can_ be a clone task, contrary to the comment. Moreover, more files could be opened while we allocate a copy, in which case we end up copying only part into new descriptor table. Since what we get _is_ affected by all changes in the old range, we can get rather weird effects - e.g. dup2(0, 1024); close(0); in parallel with fork() resulting in child that sees the effect of close(), but not that of dup2() done just before that close(). What we need is to recalculate the open_count after having reacquired ->file_lock and if external fdtable we'd just allocated is too small for it, free the sucker and redo allocation. Signed-off-by: Al Viro --- fs/file.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 689d2b6947e3..0f705c7cfefe 100644 --- a/fs/file.c +++ b/fs/file.c @@ -308,11 +308,16 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) /* * Check whether we need to allocate a larger fd array and fd set. - * Note: we're not a clone task, so the open count won't change. */ - if (open_files > new_fdt->max_fds) { + while (unlikely(open_files > new_fdt->max_fds)) { spin_unlock(&oldf->file_lock); + if (new_fdt != &newf->fdtab) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + } + new_fdt = alloc_fdtable(open_files - 1); if (!new_fdt) { *errorp = -ENOMEM; @@ -335,6 +340,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) */ spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); + open_files = count_open_files(old_fdt); } old_fds = old_fdt->fd; -- cgit v1.2.3-70-g09d2 From eceea0b3df05ed262ae32e0c6340cc7a3626632d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 May 2008 10:08:32 -0400 Subject: [PATCH] avoid multiplication overflows and signedness issues for max_fds Limit sysctl_nr_open - we don't want ->max_fds to exceed MAX_INT and we don't want size calculation for ->fd[] to overflow. Signed-off-by: Al Viro --- fs/file.c | 4 ++++ kernel/sysctl.c | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 0f705c7cfefe..7b3887e054d0 100644 --- a/fs/file.c +++ b/fs/file.c @@ -26,6 +26,8 @@ struct fdtable_defer { }; int sysctl_nr_open __read_mostly = 1024*1024; +int sysctl_nr_open_min = BITS_PER_LONG; +int sysctl_nr_open_max = 1024 * 1024; /* raised later */ /* * We use this list to defer free fdtables that have vmalloced @@ -405,6 +407,8 @@ void __init files_defer_init(void) int i; for_each_possible_cpu(i) fdtable_defer_list_init(i); + sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & + -BITS_PER_LONG; } struct files_struct init_files = { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d7ffdc59816a..29116652dca8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -81,6 +81,7 @@ extern int compat_log; extern int maps_protect; extern int sysctl_stat_interval; extern int latencytop_enabled; +extern int sysctl_nr_open_min, sysctl_nr_open_max; /* Constants used for minimum and maximum */ #if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) @@ -1190,7 +1191,9 @@ static struct ctl_table fs_table[] = { .data = &sysctl_nr_open, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &sysctl_nr_open_min, + .extra2 = &sysctl_nr_open_max, }, { .ctl_name = FS_DENTRY, -- cgit v1.2.3-70-g09d2 From 5f719558edf9c84bfbb1f7ad37e84c483282d09f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 6 May 2008 12:45:35 +0800 Subject: [Patch] fs/binfmt_elf.c: fix a wrong free In kmalloc failing path, we shouldn't free pointers in 'info', because the struct 'info' is uninitilized when kmalloc is called. And when kmalloc returns NULL, it's needless to kfree it. Signed-off-by: WANG Cong Cc: Alexander Viro Reviewed-by: Pekka Enberg -- Signed-off-by: Al Viro --- fs/binfmt_elf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b25707fee2cc..bd08332079cf 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1900,7 +1900,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un /* alloc memory for large data structures: too large to be on stack */ elf = kmalloc(sizeof(*elf), GFP_KERNEL); if (!elf) - goto cleanup; + goto out; segs = current->mm->map_count; #ifdef ELF_CORE_EXTRA_PHDRS @@ -2034,8 +2034,9 @@ end_coredump: set_fs(fs); cleanup: - kfree(elf); free_note_info(&info); + kfree(elf); +out: return has_dumped; } -- cgit v1.2.3-70-g09d2 From 08a6fac1c63233c87eec129938022f1a9a4d51f6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 May 2008 16:38:25 -0400 Subject: [PATCH] get rid of leak in compat_execve() Even though copy_compat_strings() doesn't cache the pages, copy_strings_kernel() and stuff indirectly called by e.g. ->load_binary() is doing that, so we need to drop the cache contents in the end. [found by WANG Cong ] Signed-off-by: Al Viro --- fs/compat.c | 4 ++-- fs/exec.c | 12 ++++++++---- include/linux/binfmts.h | 1 + 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index 332a869d2c53..ed43e17a5dc6 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1405,7 +1405,7 @@ int compat_do_execve(char * filename, /* execve success */ security_bprm_free(bprm); acct_update_integrals(current); - kfree(bprm); + free_bprm(bprm); return retval; } @@ -1424,7 +1424,7 @@ out_file: } out_kfree: - kfree(bprm); + free_bprm(bprm); out_ret: return retval; diff --git a/fs/exec.c b/fs/exec.c index 1f8a24aa1f8b..3c2ba7ce11d4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1251,6 +1251,12 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) EXPORT_SYMBOL(search_binary_handler); +void free_bprm(struct linux_binprm *bprm) +{ + free_arg_pages(bprm); + kfree(bprm); +} + /* * sys_execve() executes a new program. */ @@ -1320,17 +1326,15 @@ int do_execve(char * filename, retval = search_binary_handler(bprm,regs); if (retval >= 0) { /* execve success */ - free_arg_pages(bprm); security_bprm_free(bprm); acct_update_integrals(current); - kfree(bprm); + free_bprm(bprm); if (displaced) put_files_struct(displaced); return retval; } out: - free_arg_pages(bprm); if (bprm->security) security_bprm_free(bprm); @@ -1344,7 +1348,7 @@ out_file: fput(bprm->file); } out_kfree: - kfree(bprm); + free_bprm(bprm); out_files: if (displaced) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index b512e48f6d8e..ee0ed48e8348 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -99,6 +99,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); extern void compute_creds(struct linux_binprm *binprm); extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); extern int set_binfmt(struct linux_binfmt *new); +extern void free_bprm(struct linux_binprm *); #endif /* __KERNEL__ */ #endif /* _LINUX_BINFMTS_H */ -- cgit v1.2.3-70-g09d2 From 23c4971e3d97de4e1b7961ca6eacee35aa15ce5f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 8 May 2008 21:52:33 +0800 Subject: [Patch] fs/binfmt_elf.c: fix wrong return values create_elf_tables() returns 0 on success. But when strnlen_user() "fails", it returns 0 directly. So this is wrong. Signed-off-by: WANG Cong Cc: Alexander Viro Signed-off-by: Al Viro --- fs/binfmt_elf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index bd08332079cf..0fa95b198e6e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -256,7 +256,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, return -EFAULT; len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); if (!len || len > MAX_ARG_STRLEN) - return 0; + return -EINVAL; p += len; } if (__put_user(0, argv)) @@ -268,7 +268,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, return -EFAULT; len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); if (!len || len > MAX_ARG_STRLEN) - return 0; + return -EINVAL; p += len; } if (__put_user(0, envp)) -- cgit v1.2.3-70-g09d2 From e9baf6e59842285bcf9570f5094e4c27674a0f7c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 15 May 2008 04:49:12 -0400 Subject: [PATCH] return to old errno choice in mkdir() et.al. In case when both EEXIST and EROFS would apply we used to return the former in mkdir(2) and friends. Lest anyone suspects us of being consistent, in the same situation knfsd gave clients nfs_erofs... ro-bind series had switched the syscall side of things to returning -EROFS and immediately broke an application - namely, mkdir -p. Patch restores the original behaviour... Signed-off-by: Al Viro --- fs/namei.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 32fd9655485b..c7e43536c49a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2003,18 +2003,22 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) if (IS_ERR(dentry)) goto fail; + if (dentry->d_inode) + goto eexist; /* * Special case - lookup gave negative, but... we had foo/bar/ * From the vfs_mknod() POV we just have a negative dentry - * all is fine. Let's be bastards - you had / on the end, you've * been asking for (non-existent) directory. -ENOENT for you. */ - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) - goto enoent; + if (unlikely(!is_dir && nd->last.name[nd->last.len])) { + dput(dentry); + dentry = ERR_PTR(-ENOENT); + } return dentry; -enoent: +eexist: dput(dentry); - dentry = ERR_PTR(-ENOENT); + dentry = ERR_PTR(-EEXIST); fail: return dentry; } -- cgit v1.2.3-70-g09d2 From 2b280fab12b6697b6a7a24a13aaf9f4339edd075 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 17 May 2008 03:12:45 +0000 Subject: [CIFS] add more complete mount options to cifs_show_options adds various options to cifs_show_options (displayed when you cat /proc/mounts with a cifs mount). I limited the new ones to values that are associated with the mount with the exception of "seal" (which is a per tree connection property, but I thought was important enough to show through). Eventually cifs's parse_mount_options also needs to be rewritten to use the match_token API but that would be a big enough change that I would prefer that changing parse_mount_options wait until next release. Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index b6436b888cf9..57e40c49d3b6 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1,7 +1,7 @@ /* * fs/cifs/cifsfs.c * - * Copyright (C) International Business Machines Corp., 2002,2007 + * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) * * Common Internet FileSystem (CIFS) client @@ -353,9 +353,38 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m) if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) || !(cifs_sb->tcon->unix_ext)) seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); + if (!cifs_sb->tcon->unix_ext) { + seq_printf(s, ",file_mode=0%o,dir_mode=0%o", + cifs_sb->mnt_file_mode, + cifs_sb->mnt_dir_mode); + } + if (cifs_sb->tcon->seal) + seq_printf(s, ",seal"); + seq_printf(s, ",nocase"); } if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) seq_printf(s, ",posixpaths"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) + seq_printf(s, ",setuids"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + seq_printf(s, ",serverino"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) + seq_printf(s, ",directio"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + seq_printf(s, ",nouser_xattr"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) + seq_printf(s, ",mapchars"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) + seq_printf(s, ",sfu"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + seq_printf(s, ",nobrl"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) + seq_printf(s, ",cifsacl"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + seq_printf(s, ",dynperm"); + if (m->mnt_sb->s_flags & MS_POSIXACL) + seq_printf(s, ",acl"); + seq_printf(s, ",rsize=%d", cifs_sb->rsize); seq_printf(s, ",wsize=%d", cifs_sb->wsize); } -- cgit v1.2.3-70-g09d2 From 6ee650467d5bf972d10441e99688e9b48171f99c Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Tue, 29 Apr 2008 15:01:13 -0400 Subject: [PATCH] open sessionid permissions The current permissions on sessionid are a little too restrictive. Signed-off-by: Steve Grubb Signed-off-by: Al Viro --- fs/proc/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 808cbdc193d3..c447e0743a3c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2441,7 +2441,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, loginuid), - REG("sessionid", S_IRUSR, sessionid), + REG("sessionid", S_IRUGO, sessionid), #endif #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), -- cgit v1.2.3-70-g09d2 From 88dd0be3874566796fa4ffbdf927a53c4a6a2f4b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 5 May 2008 19:47:29 -0400 Subject: nfsd: reorder printk in do_probe_callback to avoid use-after-free We're currently dereferencing the client after we drop our reference count to it. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 0b3ffa9840c2..4d4760e687c3 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -419,9 +419,9 @@ static int do_probe_callback(void *data) out_release_client: rpc_shutdown_client(client); out_err: - put_nfs4_client(clp); dprintk("NFSD: warning: no callback path to client %.*s\n", (int)clp->cl_name.len, clp->cl_name.data); + put_nfs4_client(clp); return status; } -- cgit v1.2.3-70-g09d2 From 7a936ce71eed7b887b8a0d6c54dd8a9072f71c9f Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 12 May 2008 10:04:51 -0500 Subject: dlm: convert connections_lock in a mutex The semaphore connections_lock is used as a mutex. Convert it to the mutex API. Signed-off-by: Matthias Kaehlcke Cc: Christine Caulfield Cc: David Teigland Cc: Steven Whitehouse Signed-off-by: Andrew Morton Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 7c1e5e5cccd8..c7d232a9ae12 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -138,7 +139,7 @@ static struct workqueue_struct *recv_workqueue; static struct workqueue_struct *send_workqueue; static DEFINE_IDR(connections_idr); -static DECLARE_MUTEX(connections_lock); +static DEFINE_MUTEX(connections_lock); static int max_nodeid; static struct kmem_cache *con_cache; @@ -205,9 +206,9 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) { struct connection *con; - down(&connections_lock); + mutex_lock(&connections_lock); con = __nodeid2con(nodeid, allocation); - up(&connections_lock); + mutex_unlock(&connections_lock); return con; } @@ -218,15 +219,15 @@ static struct connection *assoc2con(int assoc_id) int i; struct connection *con; - down(&connections_lock); + mutex_lock(&connections_lock); for (i=0; i<=max_nodeid; i++) { con = __nodeid2con(i, 0); if (con && con->sctp_assoc == assoc_id) { - up(&connections_lock); + mutex_unlock(&connections_lock); return con; } } - up(&connections_lock); + mutex_unlock(&connections_lock); return NULL; } @@ -381,7 +382,7 @@ static void sctp_init_failed(void) int i; struct connection *con; - down(&connections_lock); + mutex_lock(&connections_lock); for (i=1; i<=max_nodeid; i++) { con = __nodeid2con(i, 0); if (!con) @@ -393,7 +394,7 @@ static void sctp_init_failed(void) } } } - up(&connections_lock); + mutex_unlock(&connections_lock); } /* Something happened to an association */ @@ -1417,7 +1418,7 @@ void dlm_lowcomms_stop(void) /* Set all the flags to prevent any socket activity. */ - down(&connections_lock); + mutex_lock(&connections_lock); for (i = 0; i <= max_nodeid; i++) { con = __nodeid2con(i, 0); if (con) { @@ -1426,11 +1427,11 @@ void dlm_lowcomms_stop(void) con->sock->sk->sk_user_data = NULL; } } - up(&connections_lock); + mutex_unlock(&connections_lock); work_stop(); - down(&connections_lock); + mutex_lock(&connections_lock); clean_writequeues(); for (i = 0; i <= max_nodeid; i++) { @@ -1443,7 +1444,7 @@ void dlm_lowcomms_stop(void) } } max_nodeid = 0; - up(&connections_lock); + mutex_unlock(&connections_lock); kmem_cache_destroy(con_cache); idr_init(&connections_idr); } -- cgit v1.2.3-70-g09d2 From 88ad23195e4609cef73b6fcf2b4c08aaaef33204 Mon Sep 17 00:00:00 2001 From: Leonardo Potenza Date: Sun, 11 May 2008 19:15:34 +0200 Subject: dlm: section mismatch warning fix Removed the section mismatch message: WARNING: fs/dlm/dlm.o(.init.text+0x132): Section mismatch in reference from the function init_module() to the function .exit.text:dlm_netlink_exit() Since dlm_netlink_exit() is called in the init_dlm() error handling, the __exit annotation has been removed. Signed-off-by: Leonardo Potenza Signed-off-by: David Teigland --- fs/dlm/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 714593621f4f..18bda83cc892 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -95,7 +95,7 @@ int __init dlm_netlink_init(void) return rv; } -void __exit dlm_netlink_exit(void) +void dlm_netlink_exit(void) { genl_unregister_ops(&family, &dlm_nl_ops); genl_unregister_family(&family); -- cgit v1.2.3-70-g09d2 From 0035a4b14931eb62a5f8a7762284c18e7ab14289 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 11 May 2008 22:01:29 +0200 Subject: dlm: tcp_connect_to_sock should check for -EINVAL, not EINVAL Signed-off-by: Marcin Slusarz Cc: Christine Caulfield Cc: David Teigland Cc: cluster-devel@redhat.com Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index c7d232a9ae12..637018c891ef 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -931,7 +931,7 @@ out_err: * errors we try again until the max number of retries is reached. */ if (result != -EHOSTUNREACH && result != -ENETUNREACH && - result != -ENETDOWN && result != EINVAL + result != -ENETDOWN && result != -EINVAL && result != -EPROTONOSUPPORT) { lowcomms_connect_sock(con); result = 0; -- cgit v1.2.3-70-g09d2 From 817d10bad56f2fdfa321b4a864a21295226b123a Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 13 May 2008 14:28:26 -0500 Subject: dlm: fix plock dev_write return value The return value on writes to the plock device should be the number of bytes written. It was returning 0 instead when an nfs lock callback was involved. Reported-by: Nathan Straz Signed-off-by: David Teigland --- fs/dlm/plock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index d6d6e370f89c..78878c5781ca 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -379,7 +379,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, struct plock_xop *xop; xop = (struct plock_xop *)op; if (xop->callback) - count = dlm_plock_callback(op); + dlm_plock_callback(op); else wake_up(&recv_wq); } else -- cgit v1.2.3-70-g09d2 From 89562b777c50d100d1694db7b1b023279839b9ae Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 19 May 2008 22:26:42 +0000 Subject: [CIFS] add missing seq_printf to cifs_show_options for hard mount option Also Kari Hurtta noticed a missing check in the same function which is now fixed. CC: Kari Hurtta Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 57e40c49d3b6..5df93fd6303f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -360,7 +360,10 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m) } if (cifs_sb->tcon->seal) seq_printf(s, ",seal"); + if (cifs_sb->tcon->nocase) seq_printf(s, ",nocase"); + if (cifs_sb->tcon->retry) + seq_printf(s, ",hard"); } if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) seq_printf(s, ",posixpaths"); -- cgit v1.2.3-70-g09d2 From 0e4bbde94fdc33f5b3d793166b21bf768ca3e098 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 20 May 2008 19:50:46 +0000 Subject: [CIFS] Enable DFS support for Unix query path info Final piece for handling DFS in unix_query_path_info, constructing a fake inode for the junction directory which the submount will cover. Acked-by: Igor Mammedov Signed-off-by: Steve French --- fs/cifs/AUTHORS | 1 + fs/cifs/CHANGES | 2 + fs/cifs/TODO | 15 ++++---- fs/cifs/inode.c | 117 +++++++++++++++++++++++++++++++++++--------------------- 4 files changed, 85 insertions(+), 50 deletions(-) (limited to 'fs') diff --git a/fs/cifs/AUTHORS b/fs/cifs/AUTHORS index 8848e4dfa026..9c136d7803d9 100644 --- a/fs/cifs/AUTHORS +++ b/fs/cifs/AUTHORS @@ -36,6 +36,7 @@ Miklos Szeredi Kazeon team for various fixes especially for 2.4 version. Asser Ferno (Change Notify support) Shaggy (Dave Kleikamp) for inumerable small fs suggestions and some good cleanup +Igor Mammedov (DFS support) Test case and Bug Report contributors ------------------------------------- diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 502a4c2b8414..28e3d5c5fcac 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,5 +1,7 @@ Version 1.53 ------------ +DFS support added (Microsoft Distributed File System client support needed +for referrals which enable a hierarchical name space among servers). Version 1.52 ------------ diff --git a/fs/cifs/TODO b/fs/cifs/TODO index 92c9feac440f..5aff46c61e52 100644 --- a/fs/cifs/TODO +++ b/fs/cifs/TODO @@ -1,4 +1,4 @@ -Version 1.52 January 3, 2008 +Version 1.53 May 20, 2008 A Partial List of Missing Features ================================== @@ -20,20 +20,21 @@ d) Cleanup now unneeded SessSetup code in fs/cifs/connect.c and add back in NTLMSSP code if any servers need it -e) ms-dfs and ms-dfs host name resolution cleanup - -f) fix NTLMv2 signing when two mounts with different users to same +e) fix NTLMv2 signing when two mounts with different users to same server. -g) Directory entry caching relies on a 1 second timer, rather than +f) Directory entry caching relies on a 1 second timer, rather than using FindNotify or equivalent. - (started) -h) quota support (needs minor kernel change since quota calls +g) quota support (needs minor kernel change since quota calls to make it to network filesystems or deviceless filesystems) -i) investigate sync behavior (including syncpage) and check +h) investigate sync behavior (including syncpage) and check for proper behavior of intr/nointr +i) improve support for very old servers (OS/2 and Win9x for example) +Including support for changing the time remotely (utimes command). + j) hook lower into the sockets api (as NFS/SunRPC does) to avoid the extra copy in/out of the socket buffers in some cases. diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9d9b56a9c08e..422d4e219fa4 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -161,77 +161,108 @@ static void cifs_unix_info_to_inode(struct inode *inode, spin_unlock(&inode->i_lock); } +static void fill_fake_finddataunix(FILE_UNIX_BASIC_INFO *pfnd_dat, + struct super_block *sb) +{ + struct inode *pinode = NULL; + + memset(pfnd_dat, sizeof(FILE_UNIX_BASIC_INFO), 0); + +/* __le64 pfnd_dat->EndOfFile = cpu_to_le64(0); + __le64 pfnd_dat->NumOfBytes = cpu_to_le64(0); + __u64 UniqueId = 0; */ + pfnd_dat->LastStatusChange = + cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + pfnd_dat->LastAccessTime = + cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + pfnd_dat->LastModificationTime = + cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + pfnd_dat->Type = cpu_to_le32(UNIX_DIR); + pfnd_dat->Permissions = cpu_to_le64(S_IXUGO | S_IRWXU); + pfnd_dat->Nlinks = cpu_to_le64(2); + if (sb->s_root) + pinode = sb->s_root->d_inode; + if (pinode == NULL) + return; + + /* fill in default values for the remaining based on root + inode since we can not query the server for this inode info */ + pfnd_dat->DevMajor = cpu_to_le64(MAJOR(pinode->i_rdev)); + pfnd_dat->DevMinor = cpu_to_le64(MINOR(pinode->i_rdev)); + pfnd_dat->Uid = cpu_to_le64(pinode->i_uid); + pfnd_dat->Gid = cpu_to_le64(pinode->i_gid); +} + int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *full_path, struct super_block *sb, int xid) { int rc = 0; - FILE_UNIX_BASIC_INFO findData; + FILE_UNIX_BASIC_INFO find_data; struct cifsTconInfo *pTcon; struct inode *inode; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); bool is_dfs_referral = false; + struct cifsInodeInfo *cifsInfo; + __u64 num_of_bytes; + __u64 end_of_file; pTcon = cifs_sb->tcon; cFYI(1, ("Getting info on %s", full_path)); -try_again_CIFSSMBUnixQPathInfo: /* could have done a find first instead but this returns more info */ - rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &findData, + rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); -/* dump_mem("\nUnixQPathInfo return data", &findData, - sizeof(findData)); */ if (rc) { if (rc == -EREMOTE && !is_dfs_referral) { is_dfs_referral = true; - goto try_again_CIFSSMBUnixQPathInfo; + cERROR(1, ("DFS ref")); /* BB removeme BB */ + /* for DFS, server does not give us real inode data */ + fill_fake_finddataunix(&find_data, sb); + rc = 0; } - goto cgiiu_exit; - } else { - struct cifsInodeInfo *cifsInfo; - __u64 num_of_bytes = le64_to_cpu(findData.NumOfBytes); - __u64 end_of_file = le64_to_cpu(findData.EndOfFile); + } + num_of_bytes = le64_to_cpu(find_data.NumOfBytes); + end_of_file = le64_to_cpu(find_data.EndOfFile); - /* get new inode */ + /* get new inode */ + if (*pinode == NULL) { + *pinode = new_inode(sb); if (*pinode == NULL) { - *pinode = new_inode(sb); - if (*pinode == NULL) { - rc = -ENOMEM; - goto cgiiu_exit; - } - /* Is an i_ino of zero legal? */ - /* Are there sanity checks we can use to ensure that - the server is really filling in that field? */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { - (*pinode)->i_ino = - (unsigned long)findData.UniqueId; - } /* note ino incremented to unique num in new_inode */ - if (sb->s_flags & MS_NOATIME) - (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME; - - insert_inode_hash(*pinode); + rc = -ENOMEM; + goto cgiiu_exit; } + /* Is an i_ino of zero legal? */ + /* note ino incremented to unique num in new_inode */ + /* Are there sanity checks we can use to ensure that + the server is really filling in that field? */ + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + (*pinode)->i_ino = (unsigned long)find_data.UniqueId; - inode = *pinode; - cifsInfo = CIFS_I(inode); + if (sb->s_flags & MS_NOATIME) + (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME; - cFYI(1, ("Old time %ld", cifsInfo->time)); - cifsInfo->time = jiffies; - cFYI(1, ("New time %ld", cifsInfo->time)); - /* this is ok to set on every inode revalidate */ - atomic_set(&cifsInfo->inUse, 1); + insert_inode_hash(*pinode); + } - cifs_unix_info_to_inode(inode, &findData, 0); + inode = *pinode; + cifsInfo = CIFS_I(inode); + cFYI(1, ("Old time %ld", cifsInfo->time)); + cifsInfo->time = jiffies; + cFYI(1, ("New time %ld", cifsInfo->time)); + /* this is ok to set on every inode revalidate */ + atomic_set(&cifsInfo->inUse, 1); - if (num_of_bytes < end_of_file) - cFYI(1, ("allocation size less than end of file")); - cFYI(1, ("Size %ld and blocks %llu", - (unsigned long) inode->i_size, - (unsigned long long)inode->i_blocks)); + cifs_unix_info_to_inode(inode, &find_data, 0); - cifs_set_ops(inode, is_dfs_referral); - } + if (num_of_bytes < end_of_file) + cFYI(1, ("allocation size less than end of file")); + cFYI(1, ("Size %ld and blocks %llu", + (unsigned long) inode->i_size, + (unsigned long long)inode->i_blocks)); + + cifs_set_ops(inode, is_dfs_referral); cgiiu_exit: return rc; } -- cgit v1.2.3-70-g09d2 From b9a3260f25ab5d2ba5c8b9508e7952848b9d704b Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 20 May 2008 21:52:32 +0000 Subject: [CIFS] Enable DFS support for Windows query path info Final piece for handling DFS in query_path_info, constructing a fake inode for the junction directory which the submount will cover. This handles the non-Unix (Windows etc.) code path. Signed-off-by: Steve French --- fs/cifs/inode.c | 324 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 178 insertions(+), 146 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 422d4e219fa4..1cf43e101943 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -161,6 +161,12 @@ static void cifs_unix_info_to_inode(struct inode *inode, spin_unlock(&inode->i_lock); } + +/* + * Needed to setup inode data for the directory which is the + * junction to the new submount (ie to setup the fake directory + * which represents a DFS referral) + */ static void fill_fake_finddataunix(FILE_UNIX_BASIC_INFO *pfnd_dat, struct super_block *sb) { @@ -370,11 +376,42 @@ static int get_sfu_mode(struct inode *inode, #endif } +/* + * Needed to setup inode data for the directory which is the + * junction to the new submount (ie to setup the fake directory + * which represents a DFS referral) + */ +static void fill_fake_finddata(FILE_ALL_INFO *pfnd_dat, + struct super_block *sb) +{ + memset(pfnd_dat, sizeof(FILE_ALL_INFO), 0); + +/* __le64 pfnd_dat->AllocationSize = cpu_to_le64(0); + __le64 pfnd_dat->EndOfFile = cpu_to_le64(0); + __u8 pfnd_dat->DeletePending = 0; + __u8 pfnd_data->Directory = 0; + __le32 pfnd_dat->EASize = 0; + __u64 pfnd_dat->IndexNumber = 0; + __u64 pfnd_dat->IndexNumber1 = 0; */ + pfnd_dat->CreationTime = + cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + pfnd_dat->LastAccessTime = + cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + pfnd_dat->LastWriteTime = + cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + pfnd_dat->ChangeTime = + cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + pfnd_dat->Attributes = cpu_to_le32(ATTR_DIRECTORY); + pfnd_dat->NumberOfLinks = cpu_to_le32(2); +} + int cifs_get_inode_info(struct inode **pinode, const unsigned char *full_path, FILE_ALL_INFO *pfindData, struct super_block *sb, int xid, const __u16 *pfid) { int rc = 0; + __u32 attr; + struct cifsInodeInfo *cifsInfo; struct cifsTconInfo *pTcon; struct inode *inode; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); @@ -399,7 +436,6 @@ int cifs_get_inode_info(struct inode **pinode, return -ENOMEM; pfindData = (FILE_ALL_INFO *)buf; -try_again_CIFSSMBQPathInfo: /* could do find first instead but this returns more info */ rc = CIFSSMBQPathInfo(xid, pTcon, full_path, pfindData, 0 /* not legacy */, @@ -417,171 +453,167 @@ try_again_CIFSSMBQPathInfo: } } /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */ - if (rc) { - if (rc == -EREMOTE && !is_dfs_referral) { - is_dfs_referral = true; - goto try_again_CIFSSMBQPathInfo; - } + if (rc == -EREMOTE) { + is_dfs_referral = true; + fill_fake_finddata(pfindData, sb); + rc = 0; + } else if (rc) goto cgii_exit; - } else { - struct cifsInodeInfo *cifsInfo; - __u32 attr = le32_to_cpu(pfindData->Attributes); - /* get new inode */ + attr = le32_to_cpu(pfindData->Attributes); + + /* get new inode */ + if (*pinode == NULL) { + *pinode = new_inode(sb); if (*pinode == NULL) { - *pinode = new_inode(sb); - if (*pinode == NULL) { - rc = -ENOMEM; - goto cgii_exit; - } - /* Is an i_ino of zero legal? Can we use that to check - if the server supports returning inode numbers? Are - there other sanity checks we can use to ensure that - the server is really filling in that field? */ + rc = -ENOMEM; + goto cgii_exit; + } + /* Is an i_ino of zero legal? Can we use that to check + if the server supports returning inode numbers? Are + there other sanity checks we can use to ensure that + the server is really filling in that field? */ - /* We can not use the IndexNumber field by default from - Windows or Samba (in ALL_INFO buf) but we can request - it explicitly. It may not be unique presumably if - the server has multiple devices mounted under one - share */ + /* We can not use the IndexNumber field by default from + Windows or Samba (in ALL_INFO buf) but we can request + it explicitly. It may not be unique presumably if + the server has multiple devices mounted under one share */ - /* There may be higher info levels that work but are - there Windows server or network appliances for which - IndexNumber field is not guaranteed unique? */ + /* There may be higher info levels that work but are + there Windows server or network appliances for which + IndexNumber field is not guaranteed unique? */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { - int rc1 = 0; - __u64 inode_num; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { + int rc1 = 0; + __u64 inode_num; - rc1 = CIFSGetSrvInodeNumber(xid, pTcon, + rc1 = CIFSGetSrvInodeNumber(xid, pTcon, full_path, &inode_num, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc1) { - cFYI(1, ("GetSrvInodeNum rc %d", rc1)); - /* BB EOPNOSUPP disable SERVER_INUM? */ - } else /* do we need cast or hash to ino? */ - (*pinode)->i_ino = inode_num; - } /* else ino incremented to unique num in new_inode*/ - if (sb->s_flags & MS_NOATIME) - (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME; - insert_inode_hash(*pinode); - } - inode = *pinode; - cifsInfo = CIFS_I(inode); - cifsInfo->cifsAttrs = attr; - cFYI(1, ("Old time %ld", cifsInfo->time)); - cifsInfo->time = jiffies; - cFYI(1, ("New time %ld", cifsInfo->time)); - - /* blksize needs to be multiple of two. So safer to default to - blksize and blkbits set in superblock so 2**blkbits and blksize - will match rather than setting to: - (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/ - - /* Linux can not store file creation time so ignore it */ - if (pfindData->LastAccessTime) - inode->i_atime = cifs_NTtimeToUnix - (le64_to_cpu(pfindData->LastAccessTime)); - else /* do not need to use current_fs_time - time not stored */ - inode->i_atime = CURRENT_TIME; - inode->i_mtime = + if (rc1) { + cFYI(1, ("GetSrvInodeNum rc %d", rc1)); + /* BB EOPNOSUPP disable SERVER_INUM? */ + } else /* do we need cast or hash to ino? */ + (*pinode)->i_ino = inode_num; + } /* else ino incremented to unique num in new_inode*/ + if (sb->s_flags & MS_NOATIME) + (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME; + insert_inode_hash(*pinode); + } + inode = *pinode; + cifsInfo = CIFS_I(inode); + cifsInfo->cifsAttrs = attr; + cFYI(1, ("Old time %ld", cifsInfo->time)); + cifsInfo->time = jiffies; + cFYI(1, ("New time %ld", cifsInfo->time)); + + /* blksize needs to be multiple of two. So safer to default to + blksize and blkbits set in superblock so 2**blkbits and blksize + will match rather than setting to: + (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/ + + /* Linux can not store file creation time so ignore it */ + if (pfindData->LastAccessTime) + inode->i_atime = cifs_NTtimeToUnix + (le64_to_cpu(pfindData->LastAccessTime)); + else /* do not need to use current_fs_time - time not stored */ + inode->i_atime = CURRENT_TIME; + inode->i_mtime = cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime)); - inode->i_ctime = - cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); - cFYI(0, ("Attributes came in as 0x%x", attr)); - if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) { - inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj; - inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; - } + inode->i_ctime = + cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); + cFYI(DBG2, ("Attributes came in as 0x%x", attr)); + if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) { + inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj; + inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; + } - /* set default mode. will override for dirs below */ - if (atomic_read(&cifsInfo->inUse) == 0) - /* new inode, can safely set these fields */ - inode->i_mode = cifs_sb->mnt_file_mode; - else /* since we set the inode type below we need to mask off - to avoid strange results if type changes and both - get orred in */ - inode->i_mode &= ~S_IFMT; -/* if (attr & ATTR_REPARSE) */ - /* We no longer handle these as symlinks because we could not - follow them due to the absolute path with drive letter */ - if (attr & ATTR_DIRECTORY) { - /* override default perms since we do not do byte range locking - on dirs */ - inode->i_mode = cifs_sb->mnt_dir_mode; - inode->i_mode |= S_IFDIR; - } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && - (cifsInfo->cifsAttrs & ATTR_SYSTEM) && - /* No need to le64 convert size of zero */ - (pfindData->EndOfFile == 0)) { - inode->i_mode = cifs_sb->mnt_file_mode; - inode->i_mode |= S_IFIFO; + /* set default mode. will override for dirs below */ + if (atomic_read(&cifsInfo->inUse) == 0) + /* new inode, can safely set these fields */ + inode->i_mode = cifs_sb->mnt_file_mode; + else /* since we set the inode type below we need to mask off + to avoid strange results if type changes and both + get orred in */ + inode->i_mode &= ~S_IFMT; +/* if (attr & ATTR_REPARSE) */ + /* We no longer handle these as symlinks because we could not + follow them due to the absolute path with drive letter */ + if (attr & ATTR_DIRECTORY) { + /* override default perms since we do not do byte range locking + on dirs */ + inode->i_mode = cifs_sb->mnt_dir_mode; + inode->i_mode |= S_IFDIR; + } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && + (cifsInfo->cifsAttrs & ATTR_SYSTEM) && + /* No need to le64 convert size of zero */ + (pfindData->EndOfFile == 0)) { + inode->i_mode = cifs_sb->mnt_file_mode; + inode->i_mode |= S_IFIFO; /* BB Finish for SFU style symlinks and devices */ - } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && - (cifsInfo->cifsAttrs & ATTR_SYSTEM)) { - if (decode_sfu_inode(inode, - le64_to_cpu(pfindData->EndOfFile), - full_path, - cifs_sb, xid)) - cFYI(1, ("Unrecognized sfu inode type")); - - cFYI(1, ("sfu mode 0%o", inode->i_mode)); - } else { - inode->i_mode |= S_IFREG; - /* treat the dos attribute of read-only as read-only - mode e.g. 555 */ - if (cifsInfo->cifsAttrs & ATTR_READONLY) - inode->i_mode &= ~(S_IWUGO); - else if ((inode->i_mode & S_IWUGO) == 0) - /* the ATTR_READONLY flag may have been */ - /* changed on server -- set any w bits */ - /* allowed by mnt_file_mode */ - inode->i_mode |= (S_IWUGO & - cifs_sb->mnt_file_mode); - /* BB add code here - - validate if device or weird share or device type? */ - } + } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && + (cifsInfo->cifsAttrs & ATTR_SYSTEM)) { + if (decode_sfu_inode(inode, le64_to_cpu(pfindData->EndOfFile), + full_path, cifs_sb, xid)) + cFYI(1, ("Unrecognized sfu inode type")); - spin_lock(&inode->i_lock); - if (is_size_safe_to_change(cifsInfo, - le64_to_cpu(pfindData->EndOfFile))) { - /* can not safely shrink the file size here if the - client is writing to it due to potential races */ - i_size_write(inode, le64_to_cpu(pfindData->EndOfFile)); - - /* 512 bytes (2**9) is the fake blocksize that must be - used for this calculation */ - inode->i_blocks = (512 - 1 + le64_to_cpu( - pfindData->AllocationSize)) >> 9; - } - spin_unlock(&inode->i_lock); + cFYI(1, ("sfu mode 0%o", inode->i_mode)); + } else { + inode->i_mode |= S_IFREG; + /* treat dos attribute of read-only as read-only mode eg 555 */ + if (cifsInfo->cifsAttrs & ATTR_READONLY) + inode->i_mode &= ~(S_IWUGO); + else if ((inode->i_mode & S_IWUGO) == 0) + /* the ATTR_READONLY flag may have been */ + /* changed on server -- set any w bits */ + /* allowed by mnt_file_mode */ + inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); + /* BB add code to validate if device or weird share or device type? */ + } + + spin_lock(&inode->i_lock); + if (is_size_safe_to_change(cifsInfo, + le64_to_cpu(pfindData->EndOfFile))) { + /* can not safely shrink the file size here if the + client is writing to it due to potential races */ + i_size_write(inode, le64_to_cpu(pfindData->EndOfFile)); + + /* 512 bytes (2**9) is the fake blocksize that must be + used for this calculation */ + inode->i_blocks = (512 - 1 + le64_to_cpu( + pfindData->AllocationSize)) >> 9; + } + spin_unlock(&inode->i_lock); - inode->i_nlink = le32_to_cpu(pfindData->NumberOfLinks); + inode->i_nlink = le32_to_cpu(pfindData->NumberOfLinks); - /* BB fill in uid and gid here? with help from winbind? - or retrieve from NTFS stream extended attribute */ + /* BB fill in uid and gid here? with help from winbind? + or retrieve from NTFS stream extended attribute */ #ifdef CONFIG_CIFS_EXPERIMENTAL - /* fill in 0777 bits from ACL */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { - cFYI(1, ("Getting mode bits from ACL")); - acl_to_uid_mode(inode, full_path, pfid); - } + /* fill in 0777 bits from ACL */ + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { + cFYI(1, ("Getting mode bits from ACL")); + acl_to_uid_mode(inode, full_path, pfid); + } #endif - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { - /* fill in remaining high mode bits e.g. SUID, VTX */ - get_sfu_mode(inode, full_path, cifs_sb, xid); - } else if (atomic_read(&cifsInfo->inUse) == 0) { - inode->i_uid = cifs_sb->mnt_uid; - inode->i_gid = cifs_sb->mnt_gid; - /* set so we do not keep refreshing these fields with - bad data after user has changed them in memory */ - atomic_set(&cifsInfo->inUse, 1); - } - - cifs_set_ops(inode, is_dfs_referral); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + /* fill in remaining high mode bits e.g. SUID, VTX */ + get_sfu_mode(inode, full_path, cifs_sb, xid); + } else if (atomic_read(&cifsInfo->inUse) == 0) { + inode->i_uid = cifs_sb->mnt_uid; + inode->i_gid = cifs_sb->mnt_gid; + /* set so we do not keep refreshing these fields with + bad data after user has changed them in memory */ + atomic_set(&cifsInfo->inUse, 1); } + + cifs_set_ops(inode, is_dfs_referral); + + + + cgii_exit: kfree(buf); return rc; -- cgit v1.2.3-70-g09d2 From 5651ced3ab196b5e7dc485c5777f210aa41e2d8d Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 20 May 2008 13:02:01 +0400 Subject: Fix possible access to undefined memory region. Signed-off-by: Igor Mammedov Signed-off-by: Steve French --- fs/cifs/dns_resolve.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 939e256f8497..f730ef35499e 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -134,10 +134,6 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) rkey = request_key(&key_type_dns_resolver, name, ""); if (!IS_ERR(rkey)) { data = rkey->payload.data; - cFYI(1, ("%s: resolved: %s to %s", __func__, - rkey->description, - *ip_addr - )); } else { cERROR(1, ("%s: unable to resolve: %s", __func__, name)); goto out; @@ -150,6 +146,11 @@ skip_upcall: if (*ip_addr) { memcpy(*ip_addr, data, len); (*ip_addr)[len] = '\0'; + if (!IS_ERR(rkey)) + cFYI(1, ("%s: resolved: %s to %s", __func__, + name, + *ip_addr + )); rc = 0; } else { rc = -ENOMEM; -- cgit v1.2.3-70-g09d2 From 397d71ddfda5b11b85e396d6ea822011c132b962 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 21 May 2008 03:49:46 +0000 Subject: [CIFS] Remove debug statement Signed-off-by: Steve French --- fs/cifs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 1cf43e101943..00ced97bd53a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -222,7 +222,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, if (rc) { if (rc == -EREMOTE && !is_dfs_referral) { is_dfs_referral = true; - cERROR(1, ("DFS ref")); /* BB removeme BB */ + cFYI(DBG2, ("DFS ref")); /* for DFS, server does not give us real inode data */ fill_fake_finddataunix(&find_data, sb); rc = 0; -- cgit v1.2.3-70-g09d2 From 13c48c490208d9e70d8d66d56f96c5054db69af7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 21 May 2008 06:32:11 +0100 Subject: fix hppfs Makefile breakage Fallout from commit 46d7b522ebf486edbd096965d534cc6465e9e309 ("uml: move hppfs_kern.c to hppfs.c") Signed-off-by: Al Viro Acked-by: Jeff Dike Signed-off-by: Linus Torvalds --- fs/hppfs/Makefile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/hppfs/Makefile b/fs/hppfs/Makefile index 8a1f50344368..3a982bd975d2 100644 --- a/fs/hppfs/Makefile +++ b/fs/hppfs/Makefile @@ -3,7 +3,4 @@ # Licensed under the GPL # -hppfs-objs := hppfs.o - -obj-y = -obj-$(CONFIG_HPPFS) += $(hppfs-objs) +obj-$(CONFIG_HPPFS) += hppfs.o -- cgit v1.2.3-70-g09d2 From 79bc12a0a09c2eb1ccbb01c192045f994567bda2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 21 May 2008 06:32:11 +0100 Subject: ecryptfs fixes memcpy() from userland pointer is a Bad Thing(tm) Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/ecryptfs/miscdev.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 6560da1a58ce..50c994a249a5 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -243,7 +243,6 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, struct ecryptfs_daemon *daemon; struct ecryptfs_msg_ctx *msg_ctx; size_t packet_length_size; - u32 counter_nbo; char packet_length[3]; size_t i; size_t total_length; @@ -328,20 +327,18 @@ check_list: "pending message\n", __func__, count, total_length); goto out_unlock_msg_ctx; } - i = 0; - buf[i++] = msg_ctx->type; - counter_nbo = cpu_to_be32(msg_ctx->counter); - memcpy(&buf[i], (char *)&counter_nbo, 4); - i += 4; + rc = -EFAULT; + if (put_user(msg_ctx->type, buf)) + goto out_unlock_msg_ctx; + if (put_user(cpu_to_be32(msg_ctx->counter), (__be32 __user *)(buf + 1))) + goto out_unlock_msg_ctx; + i = 5; if (msg_ctx->msg) { - memcpy(&buf[i], packet_length, packet_length_size); + if (copy_to_user(&buf[i], packet_length, packet_length_size)) + goto out_unlock_msg_ctx; i += packet_length_size; - rc = copy_to_user(&buf[i], msg_ctx->msg, msg_ctx->msg_size); - if (rc) { - printk(KERN_ERR "%s: copy_to_user returned error " - "[%d]\n", __func__, rc); + if (copy_to_user(&buf[i], msg_ctx->msg, msg_ctx->msg_size)) goto out_unlock_msg_ctx; - } i += msg_ctx->msg_size; } rc = i; @@ -452,7 +449,8 @@ static ssize_t ecryptfs_miscdev_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - u32 counter_nbo, seq; + __be32 counter_nbo; + u32 seq; size_t packet_size, packet_size_length, i; ssize_t sz = 0; char *data; @@ -485,7 +483,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, count); goto out_free; } - memcpy((char *)&counter_nbo, &data[i], 4); + memcpy(&counter_nbo, &data[i], 4); seq = be32_to_cpu(counter_nbo); i += 4; rc = ecryptfs_parse_packet_length(&data[i], &packet_size, -- cgit v1.2.3-70-g09d2 From 9d8df6aa9b1ca74127b11537d91de492dbea666a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 21 May 2008 06:32:11 +0100 Subject: ocfs2 endianness fixes Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 41f84c92094f..10bfb466e068 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -2788,7 +2788,7 @@ static int ocfs2_merge_rec_right(struct inode *inode, BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); left_rec = &el->l_recs[index]; - if (index == le16_to_cpu(el->l_next_free_rec - 1) && + if (index == le16_to_cpu(el->l_next_free_rec) - 1 && le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { /* we meet with a cross extent block merge. */ ret = ocfs2_get_right_path(inode, left_path, &right_path); @@ -2802,7 +2802,7 @@ static int ocfs2_merge_rec_right(struct inode *inode, BUG_ON(next_free <= 0); right_rec = &right_el->l_recs[0]; if (ocfs2_is_empty_extent(right_rec)) { - BUG_ON(le16_to_cpu(next_free) <= 1); + BUG_ON(next_free <= 1); right_rec = &right_el->l_recs[1]; } -- cgit v1.2.3-70-g09d2 From 0d817bc0d6cdd92ff1ab2e98dd5878659a48659c Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 22 May 2008 02:02:03 +0000 Subject: [CIFS] Remove redundant NULL check Noticed by Coverity checker. Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 7b9938445b07..9b8b4cfdf993 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2159,8 +2159,7 @@ copyRetry: cFYI(1, ("Send error in copy = %d with %d files copied", rc, le16_to_cpu(pSMBr->CopyCount))); } - if (pSMB) - cifs_buf_release(pSMB); + cifs_buf_release(pSMB); if (rc == -EAGAIN) goto copyRetry; @@ -2249,8 +2248,7 @@ createSymLinkRetry: if (rc) cFYI(1, ("Send error in SetPathInfo create symlink = %d", rc)); - if (pSMB) - cifs_buf_release(pSMB); + cifs_buf_release(pSMB); if (rc == -EAGAIN) goto createSymLinkRetry; @@ -4095,8 +4093,7 @@ getDFSRetry: target_nodes, nls_codepage); GetDFSRefExit: - if (pSMB) - cifs_buf_release(pSMB); + cifs_buf_release(pSMB); if (rc == -EAGAIN) goto getDFSRetry; @@ -5117,8 +5114,7 @@ setPermsRetry: if (rc) cFYI(1, ("SetPathInfo (perms) returned %d", rc)); - if (pSMB) - cifs_buf_release(pSMB); + cifs_buf_release(pSMB); if (rc == -EAGAIN) goto setPermsRetry; return rc; @@ -5340,8 +5336,7 @@ QAllEAsRetry: } } } - if (pSMB) - cifs_buf_release(pSMB); + cifs_buf_release(pSMB); if (rc == -EAGAIN) goto QAllEAsRetry; @@ -5490,8 +5485,7 @@ QEARetry: } } } - if (pSMB) - cifs_buf_release(pSMB); + cifs_buf_release(pSMB); if (rc == -EAGAIN) goto QEARetry; -- cgit v1.2.3-70-g09d2 From e4058245ac0c4d9a517cda688a35aef065cb7f4e Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 2 Apr 2008 17:33:47 +0400 Subject: Adds username in the upcall key for unattended mounts with keytab Signed-off-by: Igor Mammedov Signed-off-by: Steve French --- fs/cifs/cifs_spnego.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 6653e29637a7..7013aaff6aed 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -119,6 +119,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) dp = description + strlen(description); sprintf(dp, ";uid=0x%x", sesInfo->linux_uid); + dp = description + strlen(description); + sprintf(dp, ";user=%s", sesInfo->userName); + cFYI(1, ("key description = %s", description)); spnego_key = request_key(&cifs_spnego_key_type, description, ""); -- cgit v1.2.3-70-g09d2 From 0a891adccc867c28b022128bc342a779e476c816 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 22 May 2008 14:20:21 +0000 Subject: [CIFS] Fix reversed memset arguments Signed-off-by: Dave Jones Signed-off-by: Steve French --- fs/cifs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 00ced97bd53a..129dbfe4dca7 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -172,7 +172,7 @@ static void fill_fake_finddataunix(FILE_UNIX_BASIC_INFO *pfnd_dat, { struct inode *pinode = NULL; - memset(pfnd_dat, sizeof(FILE_UNIX_BASIC_INFO), 0); + memset(pfnd_dat, 0, sizeof(FILE_UNIX_BASIC_INFO)); /* __le64 pfnd_dat->EndOfFile = cpu_to_le64(0); __le64 pfnd_dat->NumOfBytes = cpu_to_le64(0); @@ -384,7 +384,7 @@ static int get_sfu_mode(struct inode *inode, static void fill_fake_finddata(FILE_ALL_INFO *pfnd_dat, struct super_block *sb) { - memset(pfnd_dat, sizeof(FILE_ALL_INFO), 0); + memset(pfnd_dat, 0, sizeof(FILE_ALL_INFO)); /* __le64 pfnd_dat->AllocationSize = cpu_to_le64(0); __le64 pfnd_dat->EndOfFile = cpu_to_le64(0); -- cgit v1.2.3-70-g09d2 From 978b7237123d007b9fa983af6e0e2fa8f97f9934 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Mon, 19 May 2008 16:29:46 +1000 Subject: [XFS] Fix fsync() b0rkage. xfs_fsync() fails to wait for data I/O completion before checking if the inode is dirty or clean to decide whether to log the inode or not. This misses inode size updates when the data flushed by the fsync() is extending the file. Hence, like fdatasync(), we need to wait for I/o completion first, then check the inode for cleanliness. Doing so makes the behaviour of xfs_fsync() identical for fsync and fdatasync and we *always* use synchronous semantics if the inode is dirty. Therefore also kill the differences and remove the unused flags from the xfs_fsync function and callers. SGI-PV: 981296 SGI-Modid: xfs-linux-melb:xfs-kern:31033a Signed-off-by: David Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_file.c | 17 ++++--- fs/xfs/linux-2.6/xfs_vnode.h | 8 ---- fs/xfs/xfs_vnodeops.c | 112 ++++++++++++++++--------------------------- fs/xfs/xfs_vnodeops.h | 3 +- 4 files changed, 54 insertions(+), 86 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 65e78c13d4ae..5f60363b9343 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -184,19 +184,24 @@ xfs_file_release( return -xfs_release(XFS_I(inode)); } +/* + * We ignore the datasync flag here because a datasync is effectively + * identical to an fsync. That is, datasync implies that we need to write + * only the metadata needed to be able to access the data that is written + * if we crash after the call completes. Hence if we are writing beyond + * EOF we have to log the inode size change as well, which makes it a + * full fsync. If we don't write beyond EOF, the inode core will be + * clean in memory and so we don't need to log the inode, just like + * fsync. + */ STATIC int xfs_file_fsync( struct file *filp, struct dentry *dentry, int datasync) { - int flags = FSYNC_WAIT; - - if (datasync) - flags |= FSYNC_DATA; xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); - return -xfs_fsync(XFS_I(dentry->d_inode), flags, - (xfs_off_t)0, (xfs_off_t)-1); + return -xfs_fsync(XFS_I(dentry->d_inode)); } /* diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 9d73cb5c0fc7..25eb2a9e8d9b 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -229,14 +229,6 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) #define ATTR_NOLOCK 0x200 /* Don't grab any conflicting locks */ #define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */ -/* - * Flags to vop_fsync/reclaim. - */ -#define FSYNC_NOWAIT 0 /* asynchronous flush */ -#define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */ -#define FSYNC_INVAL 0x2 /* flush and invalidate cached data */ -#define FSYNC_DATA 0x4 /* synchronous fsync of data only */ - /* * Tracking vnode activity. */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 70702a60b4bb..e475e3717eb3 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -856,18 +856,14 @@ xfs_readlink( /* * xfs_fsync * - * This is called to sync the inode and its data out to disk. - * We need to hold the I/O lock while flushing the data, and - * the inode lock while flushing the inode. The inode lock CANNOT - * be held while flushing the data, so acquire after we're done - * with that. + * This is called to sync the inode and its data out to disk. We need to hold + * the I/O lock while flushing the data, and the inode lock while flushing the + * inode. The inode lock CANNOT be held while flushing the data, so acquire + * after we're done with that. */ int xfs_fsync( - xfs_inode_t *ip, - int flag, - xfs_off_t start, - xfs_off_t stop) + xfs_inode_t *ip) { xfs_trans_t *tp; int error; @@ -875,103 +871,79 @@ xfs_fsync( xfs_itrace_entry(ip); - ASSERT(start >= 0 && stop >= -1); - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); - if (flag & FSYNC_DATA) - filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + /* capture size updates in I/O completion before writing the inode. */ + error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + if (error) + return XFS_ERROR(error); /* - * We always need to make sure that the required inode state - * is safe on disk. The vnode might be clean but because - * of committed transactions that haven't hit the disk yet. - * Likewise, there could be unflushed non-transactional - * changes to the inode core that have to go to disk. + * We always need to make sure that the required inode state is safe on + * disk. The vnode might be clean but we still might need to force the + * log because of committed transactions that haven't hit the disk yet. + * Likewise, there could be unflushed non-transactional changes to the + * inode core that have to go to disk and this requires us to issue + * a synchronous transaction to capture these changes correctly. * - * The following code depends on one assumption: that - * any transaction that changes an inode logs the core - * because it has to change some field in the inode core - * (typically nextents or nblocks). That assumption - * implies that any transactions against an inode will - * catch any non-transactional updates. If inode-altering - * transactions exist that violate this assumption, the - * code breaks. Right now, it figures that if the involved - * update_* field is clear and the inode is unpinned, the - * inode is clean. Either it's been flushed or it's been - * committed and the commit has hit the disk unpinning the inode. - * (Note that xfs_inode_item_format() called at commit clears - * the update_* fields.) + * This code relies on the assumption that if the update_* fields + * of the inode are clear and the inode is unpinned then it is clean + * and no action is required. */ xfs_ilock(ip, XFS_ILOCK_SHARED); - /* If we are flushing data then we care about update_size - * being set, otherwise we care about update_core - */ - if ((flag & FSYNC_DATA) ? - (ip->i_update_size == 0) : - (ip->i_update_core == 0)) { + if (!(ip->i_update_size || ip->i_update_core)) { /* - * Timestamps/size haven't changed since last inode - * flush or inode transaction commit. That means - * either nothing got written or a transaction - * committed which caught the updates. If the - * latter happened and the transaction hasn't - * hit the disk yet, the inode will be still - * be pinned. If it is, force the log. + * Timestamps/size haven't changed since last inode flush or + * inode transaction commit. That means either nothing got + * written or a transaction committed which caught the updates. + * If the latter happened and the transaction hasn't hit the + * disk yet, the inode will be still be pinned. If it is, + * force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { - _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, - XFS_LOG_FORCE | - ((flag & FSYNC_WAIT) - ? XFS_LOG_SYNC : 0), + error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, + XFS_LOG_FORCE | XFS_LOG_SYNC, &log_flushed); } else { /* - * If the inode is not pinned and nothing - * has changed we don't need to flush the - * cache. + * If the inode is not pinned and nothing has changed + * we don't need to flush the cache. */ changed = 0; } - error = 0; } else { /* - * Kick off a transaction to log the inode - * core to get the updates. Make it - * sync if FSYNC_WAIT is passed in (which - * is done by everybody but specfs). The - * sync transaction will also force the log. + * Kick off a transaction to log the inode core to get the + * updates. The sync transaction will also force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); - if ((error = xfs_trans_reserve(tp, 0, - XFS_FSYNC_TS_LOG_RES(ip->i_mount), - 0, 0, 0))) { + error = xfs_trans_reserve(tp, 0, + XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); + if (error) { xfs_trans_cancel(tp, 0); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); /* - * Note - it's possible that we might have pushed - * ourselves out of the way during trans_reserve - * which would flush the inode. But there's no - * guarantee that the inode buffer has actually - * gone out yet (it's delwri). Plus the buffer - * could be pinned anyway if it's part of an - * inode in another recent transaction. So we - * play it safe and fire off the transaction anyway. + * Note - it's possible that we might have pushed ourselves out + * of the way during trans_reserve which would flush the inode. + * But there's no guarantee that the inode buffer has actually + * gone out yet (it's delwri). Plus the buffer could be pinned + * anyway if it's part of an inode in another recent + * transaction. So we play it safe and fire off the + * transaction anyway. */ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (flag & FSYNC_WAIT) - xfs_trans_set_sync(tp); + xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 8abe8f186e20..57335ba4ce53 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -18,8 +18,7 @@ int xfs_open(struct xfs_inode *ip); int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, struct cred *credp); int xfs_readlink(struct xfs_inode *ip, char *link); -int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start, - xfs_off_t stop); +int xfs_fsync(struct xfs_inode *ip); int xfs_release(struct xfs_inode *ip); int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, -- cgit v1.2.3-70-g09d2 From 49383b0e98ad1f69ff4c816eb1961f703df12318 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Mon, 19 May 2008 16:29:34 +1000 Subject: [XFS] Don't allow memory reclaim to wait on the filesystem in inode writeback If we allow memory reclaim to wait on the pages under writeback in inode cluster writeback we could deadlock because we are currently holding the ILOCK on the initial writeback inode which is needed in data I/O completion to change the file size or do unwritten extent conversion before the pages are taken out of writeback state. SGI-PV: 981091 SGI-Modid: xfs-linux-melb:xfs-kern:31015a Signed-off-by: David Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index cf0bb9c1d621..739ea45a9d10 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2986,7 +2986,7 @@ xfs_iflush_cluster( ASSERT(pag->pag_ici_init); ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); - ilist = kmem_alloc(ilist_size, KM_MAYFAIL); + ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) return 0; -- cgit v1.2.3-70-g09d2 From c8f5f12e46f079a954d4f7163ba59dadee08ca26 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Tue, 20 May 2008 11:30:15 +1000 Subject: [XFS] Fix inode list allocation size in writeback. We only need to allocate space for the number of inodes in the cluster when writing back inodes, not every byte in the inode cluster. This reduces the amount of memory needing to be allocated to 256 bytes instead of 64k. SGI-PV: 981949 SGI-Modid: xfs-linux-melb:xfs-kern:31182a Signed-off-by: David Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_inode.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 739ea45a9d10..e569bf5d6cf0 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2974,6 +2974,7 @@ xfs_iflush_cluster( xfs_mount_t *mp = ip->i_mount; xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); unsigned long first_index, mask; + unsigned long inodes_per_cluster; int ilist_size; xfs_inode_t **ilist; xfs_inode_t *iq; @@ -2985,7 +2986,8 @@ xfs_iflush_cluster( ASSERT(pag->pagi_inodeok); ASSERT(pag->pag_ici_init); - ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); + inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; + ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) return 0; @@ -2995,8 +2997,7 @@ xfs_iflush_cluster( read_lock(&pag->pag_ici_lock); /* really need a gang lookup range call here */ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, - first_index, - XFS_INODE_CLUSTER_SIZE(mp)); + first_index, inodes_per_cluster); if (nr_found == 0) goto out_free; -- cgit v1.2.3-70-g09d2 From 6ab455eeaff6893cd06da33843e840d888cdc04a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 May 2008 16:34:42 +1000 Subject: [XFS] Fix memory corruption with small buffer reads When we have multiple buffers in a single page for a blocksize == pagesize filesystem we might overwrite the page contents if two callers hit it shortly after each other. To prevent that we need to keep the page locked until I/O is completed and the page marked uptodate. Thanks to Eric Sandeen for triaging this bug and finding a reproducible testcase and Dave Chinner for additional advice. This should fix kernel.org bz #10421. Tested-by: Eric Sandeen SGI-PV: 981813 SGI-Modid: xfs-linux-melb:xfs-kern:31173a Signed-off-by: Christoph Hellwig Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_buf.c | 24 ++++++++++++++++++++---- fs/xfs/linux-2.6/xfs_buf.h | 19 +++++++++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 5105015a75ad..98e0e86093b4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -387,6 +387,8 @@ _xfs_buf_lookup_pages( if (unlikely(page == NULL)) { if (flags & XBF_READ_AHEAD) { bp->b_page_count = i; + for (i = 0; i < bp->b_page_count; i++) + unlock_page(bp->b_pages[i]); return -ENOMEM; } @@ -416,17 +418,24 @@ _xfs_buf_lookup_pages( ASSERT(!PagePrivate(page)); if (!PageUptodate(page)) { page_count--; - if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) { + if (blocksize >= PAGE_CACHE_SIZE) { + if (flags & XBF_READ) + bp->b_flags |= _XBF_PAGE_LOCKED; + } else if (!PagePrivate(page)) { if (test_page_region(page, offset, nbytes)) page_count++; } } - unlock_page(page); bp->b_pages[i] = page; offset = 0; } + if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { + for (i = 0; i < bp->b_page_count; i++) + unlock_page(bp->b_pages[i]); + } + if (page_count == bp->b_page_count) bp->b_flags |= XBF_DONE; @@ -746,6 +755,7 @@ xfs_buf_associate_memory( bp->b_count_desired = len; bp->b_buffer_length = buflen; bp->b_flags |= XBF_MAPPED; + bp->b_flags &= ~_XBF_PAGE_LOCKED; return 0; } @@ -1093,8 +1103,10 @@ _xfs_buf_ioend( xfs_buf_t *bp, int schedule) { - if (atomic_dec_and_test(&bp->b_io_remaining) == 1) + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { + bp->b_flags &= ~_XBF_PAGE_LOCKED; xfs_buf_ioend(bp, schedule); + } } STATIC void @@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io( if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); + + if (bp->b_flags & _XBF_PAGE_LOCKED) + unlock_page(page); } while (bvec >= bio->bi_io_vec); _xfs_buf_ioend(bp, 1); @@ -1163,7 +1178,8 @@ _xfs_buf_ioapply( * filesystem block size is not smaller than the page size. */ if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && - (bp->b_flags & XBF_READ) && + ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == + (XBF_READ|_XBF_PAGE_LOCKED)) && (blocksize >= PAGE_CACHE_SIZE)) { bio = bio_alloc(GFP_NOIO, 1); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 841d7883528d..f948ec7ba9a4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -66,6 +66,25 @@ typedef enum { _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ + + /* + * Special flag for supporting metadata blocks smaller than a FSB. + * + * In this case we can have multiple xfs_buf_t on a single page and + * need to lock out concurrent xfs_buf_t readers as they only + * serialise access to the buffer. + * + * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation + * between reads of the page. Hence we can have one thread read the + * page and modify it, but then race with another thread that thinks + * the page is not up-to-date and hence reads it again. + * + * The result is that the first modifcation to the page is lost. + * This sort of AGF/AGI reading race can happen when unlinking inodes + * that require truncation and results in the AGI unlinked list + * modifications being lost. + */ + _XBF_PAGE_LOCKED = (1 << 22), } xfs_buf_flags_t; typedef enum { -- cgit v1.2.3-70-g09d2 From aaa9bbe039febf1d3a0f3a374deea0680d9f5758 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 23 May 2008 17:38:32 +0000 Subject: [CIFS] remove unused variables CC: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 +-- fs/cifs/cifssmb.c | 6 ++---- fs/cifs/file.c | 7 ------- fs/cifs/misc.c | 3 +-- fs/cifs/readdir.c | 6 +----- 5 files changed, 5 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 08914053242b..9cfcf326ead3 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -333,7 +333,6 @@ struct cifsFileInfo { bool messageMode:1; /* for pipes: message vs byte mode */ atomic_t wrtPending; /* handle in use - defer close */ struct semaphore fh_sem; /* prevents reopen race after dead ses*/ - char *search_resume_name; /* BB removeme BB */ struct cifs_search_info srch_inf; }; @@ -626,7 +625,7 @@ GLOBAL_EXTERN atomic_t tcpSesAllocCount; GLOBAL_EXTERN atomic_t tcpSesReconnectCount; GLOBAL_EXTERN atomic_t tconInfoReconnectCount; -/* Various Debug counters to remove someday (BB) */ +/* Various Debug counters */ GLOBAL_EXTERN atomic_t bufAllocCount; /* current number allocated */ #ifdef CONFIG_CIFS_STATS2 GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 9b8b4cfdf993..174bf8aca237 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1728,7 +1728,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, { int rc = 0; LOCK_REQ *pSMB = NULL; - LOCK_RSP *pSMBr = NULL; +/* LOCK_RSP *pSMBr = NULL; */ /* No response data other than rc to parse */ int bytes_returned; int timeout = 0; __u16 count; @@ -1739,8 +1739,6 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, if (rc) return rc; - pSMBr = (LOCK_RSP *)pSMB; /* BB removeme BB */ - if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) { timeout = CIFS_ASYNC_OP; /* no response expected */ pSMB->Timeout = 0; @@ -1774,7 +1772,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, if (waitFlag) { rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned); + (struct smb_hdr *) pSMB, &bytes_returned); cifs_small_buf_release(pSMB); } else { rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *)pSMB, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8636cec2642c..0aac824371a5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -546,7 +546,6 @@ int cifs_close(struct inode *inode, struct file *file) msleep(timeout); timeout *= 8; } - kfree(pSMBFile->search_resume_name); kfree(file->private_data); file->private_data = NULL; } else @@ -605,12 +604,6 @@ int cifs_closedir(struct inode *inode, struct file *file) else cifs_buf_release(ptmp); } - ptmp = pCFileStruct->search_resume_name; - if (ptmp) { - cFYI(1, ("closedir free resume name")); - pCFileStruct->search_resume_name = NULL; - kfree(ptmp); - } kfree(file->private_data); file->private_data = NULL; } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1d69b8014e0b..4b17f8fe3157 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -519,8 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) pnotify = (struct file_notify_information *) ((char *)&pSMBr->hdr.Protocol + data_offset); cFYI(1, ("dnotify on %s Action: 0x%x", - pnotify->FileName, - pnotify->Action)); /* BB removeme BB */ + pnotify->FileName, pnotify->Action)); /* cifs_dump_mem("Rcvd notify Data: ",buf, sizeof(struct smb_hdr)+60); */ return true; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 713c25110197..df2c3c466ee1 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -675,8 +675,6 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, cifsFile->invalidHandle = true; CIFSFindClose(xid, pTcon, cifsFile->netfid); } - kfree(cifsFile->search_resume_name); - cifsFile->search_resume_name = NULL; if (cifsFile->srch_inf.ntwrk_buf_start) { cFYI(1, ("freeing SMB ff cache buf on search rewind")); if (cifsFile->srch_inf.smallBuf) @@ -1043,9 +1041,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) } /* else { cifsFile->invalidHandle = true; CIFSFindClose(xid, pTcon, cifsFile->netfid); - } - kfree(cifsFile->search_resume_name); - cifsFile->search_resume_name = NULL; */ + } */ rc = find_cifs_entry(xid, pTcon, file, ¤t_entry, &num_to_fill); -- cgit v1.2.3-70-g09d2 From 4468eb3fd102cad559e51594a01cbc65b994d264 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 May 2008 09:31:40 -0400 Subject: on non-posix shares, clear write bits in mode when ATTR_READONLY is set When mounting a share with posix extensions disabled, cifs_get_inode_info turns off all the write bits in the mode for regular files if ATTR_READONLY is set. Directories and other inode types, however, can also have ATTR_READONLY set, but the mode gives no indication of this. This patch makes this apply to other inode types besides regular files. It also cleans up how modes are set in cifs_get_inode_info for both the "normal" and "dynperm" cases. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 76 ++++++++++++++++++++++++++----------------------------- fs/cifs/readdir.c | 71 ++++++++++++++++++++++++++++----------------------- 2 files changed, 75 insertions(+), 72 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 129dbfe4dca7..ae5bcaf2031c 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -418,6 +418,7 @@ int cifs_get_inode_info(struct inode **pinode, char *buf = NULL; bool adjustTZ = false; bool is_dfs_referral = false; + umode_t default_mode; pTcon = cifs_sb->tcon; cFYI(1, ("Getting info on %s", full_path)); @@ -530,47 +531,42 @@ int cifs_get_inode_info(struct inode **pinode, inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; } - /* set default mode. will override for dirs below */ - if (atomic_read(&cifsInfo->inUse) == 0) - /* new inode, can safely set these fields */ - inode->i_mode = cifs_sb->mnt_file_mode; - else /* since we set the inode type below we need to mask off - to avoid strange results if type changes and both - get orred in */ - inode->i_mode &= ~S_IFMT; -/* if (attr & ATTR_REPARSE) */ - /* We no longer handle these as symlinks because we could not - follow them due to the absolute path with drive letter */ - if (attr & ATTR_DIRECTORY) { - /* override default perms since we do not do byte range locking - on dirs */ - inode->i_mode = cifs_sb->mnt_dir_mode; - inode->i_mode |= S_IFDIR; - } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && - (cifsInfo->cifsAttrs & ATTR_SYSTEM) && - /* No need to le64 convert size of zero */ - (pfindData->EndOfFile == 0)) { - inode->i_mode = cifs_sb->mnt_file_mode; - inode->i_mode |= S_IFIFO; -/* BB Finish for SFU style symlinks and devices */ - } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && - (cifsInfo->cifsAttrs & ATTR_SYSTEM)) { - if (decode_sfu_inode(inode, le64_to_cpu(pfindData->EndOfFile), - full_path, cifs_sb, xid)) - cFYI(1, ("Unrecognized sfu inode type")); - - cFYI(1, ("sfu mode 0%o", inode->i_mode)); + /* get default inode mode */ + if (attr & ATTR_DIRECTORY) + default_mode = cifs_sb->mnt_dir_mode; + else + default_mode = cifs_sb->mnt_file_mode; + + /* set permission bits */ + if (atomic_read(&cifsInfo->inUse) == 0 || + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) + inode->i_mode = default_mode; + else { + /* just reenable write bits if !ATTR_READONLY */ + if ((inode->i_mode & S_IWUGO) == 0 && + (attr & ATTR_READONLY) == 0) + inode->i_mode |= (S_IWUGO & default_mode); + inode->i_mode &= ~S_IFMT; + } + /* clear write bits if ATTR_READONLY is set */ + if (attr & ATTR_READONLY) + inode->i_mode &= ~S_IWUGO; + + /* set inode type */ + if ((attr & ATTR_SYSTEM) && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { + /* no need to fix endianness on 0 */ + if (pfindData->EndOfFile == 0) + inode->i_mode |= S_IFIFO; + else if (decode_sfu_inode(inode, + le64_to_cpu(pfindData->EndOfFile), + full_path, cifs_sb, xid)) + cFYI(1, ("unknown SFU file type\n")); } else { - inode->i_mode |= S_IFREG; - /* treat dos attribute of read-only as read-only mode eg 555 */ - if (cifsInfo->cifsAttrs & ATTR_READONLY) - inode->i_mode &= ~(S_IWUGO); - else if ((inode->i_mode & S_IWUGO) == 0) - /* the ATTR_READONLY flag may have been */ - /* changed on server -- set any w bits */ - /* allowed by mnt_file_mode */ - inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); - /* BB add code to validate if device or weird share or device type? */ + if (attr & ATTR_DIRECTORY) + inode->i_mode |= S_IFDIR; + else + inode->i_mode |= S_IFREG; } spin_lock(&inode->i_lock); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index df2c3c466ee1..83f306954883 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -132,6 +132,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, __u32 attr; __u64 allocation_size; __u64 end_of_file; + umode_t default_mode; /* save mtime and size */ local_mtime = tmp_inode->i_mtime; @@ -187,48 +188,54 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, if (atomic_read(&cifsInfo->inUse) == 0) { tmp_inode->i_uid = cifs_sb->mnt_uid; tmp_inode->i_gid = cifs_sb->mnt_gid; - /* set default mode. will override for dirs below */ - tmp_inode->i_mode = cifs_sb->mnt_file_mode; - } else { - /* mask off the type bits since it gets set - below and we do not want to get two type - bits set */ + } + + if (attr & ATTR_DIRECTORY) + default_mode = cifs_sb->mnt_dir_mode; + else + default_mode = cifs_sb->mnt_file_mode; + + /* set initial permissions */ + if ((atomic_read(&cifsInfo->inUse) == 0) || + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) + tmp_inode->i_mode = default_mode; + else { + /* just reenable write bits if !ATTR_READONLY */ + if ((tmp_inode->i_mode & S_IWUGO) == 0 && + (attr & ATTR_READONLY) == 0) + tmp_inode->i_mode |= (S_IWUGO & default_mode); + tmp_inode->i_mode &= ~S_IFMT; } - if (attr & ATTR_DIRECTORY) { - *pobject_type = DT_DIR; - /* override default perms since we do not lock dirs */ - if (atomic_read(&cifsInfo->inUse) == 0) - tmp_inode->i_mode = cifs_sb->mnt_dir_mode; - tmp_inode->i_mode |= S_IFDIR; - } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && - (attr & ATTR_SYSTEM)) { + /* clear write bits if ATTR_READONLY is set */ + if (attr & ATTR_READONLY) + tmp_inode->i_mode &= ~S_IWUGO; + + /* set inode type */ + if ((attr & ATTR_SYSTEM) && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { if (end_of_file == 0) { - *pobject_type = DT_FIFO; tmp_inode->i_mode |= S_IFIFO; + *pobject_type = DT_FIFO; } else { - /* rather than get the type here, we mark the - inode as needing revalidate and get the real type - (blk vs chr vs. symlink) later ie in lookup */ - *pobject_type = DT_REG; + /* + * trying to get the type can be slow, so just call + * this a regular file for now, and mark for reval + */ tmp_inode->i_mode |= S_IFREG; + *pobject_type = DT_REG; cifsInfo->time = 0; } -/* we no longer mark these because we could not follow them */ -/* } else if (attr & ATTR_REPARSE) { - *pobject_type = DT_LNK; - tmp_inode->i_mode |= S_IFLNK; */ } else { - *pobject_type = DT_REG; - tmp_inode->i_mode |= S_IFREG; - if (attr & ATTR_READONLY) - tmp_inode->i_mode &= ~(S_IWUGO); - else if ((tmp_inode->i_mode & S_IWUGO) == 0) - /* the ATTR_READONLY flag may have been changed on */ - /* server -- set any w bits allowed by mnt_file_mode */ - tmp_inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); - } /* could add code here - to validate if device or weird share type? */ + if (attr & ATTR_DIRECTORY) { + tmp_inode->i_mode |= S_IFDIR; + *pobject_type = DT_DIR; + } else { + tmp_inode->i_mode |= S_IFREG; + *pobject_type = DT_REG; + } + } /* can not fill in nlink here as in qpathinfo version and Unx search */ if (atomic_read(&cifsInfo->inUse) == 0) -- cgit v1.2.3-70-g09d2 From b0fd30d3e7e768aad5e398caaea6ae5a5c814eab Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 May 2008 09:33:34 -0400 Subject: when creating new inodes, use file_mode/dir_mode exclusively on mount without unix extensions When CIFS creates a new inode on a mount without unix extensions, it temporarily assigns the mode that was passed to it in the create/mkdir call. Eventually, when the inode is revalidated, it changes to have the file_mode or dir_mode for the mount. This is confusing to users who expect that the mode shouldn't change this way. It's also problematic since only the mode is treated this way, not the uid or gid. Suppose you have a CIFS mount that's mounted with: uid=0,gid=0,file_mode=0666,dir_mode=0777 ...if an unprivileged user comes along and does this on the mount: mkdir -m 0700 foo touch foo/bar ...there is a period of time where the touch will fail, since the dir will initially be owned by root and have mode 0700. If the user waits long enough, then "foo" will be revalidated and will get the correct dir_mode permissions. This patch changes cifs_mkdir and cifs_create to not overwrite the mode found by the initial cifs_get_inode_info call after the inode is created on the server. Legacy behavior can be reenabled with the new "dynperm" mount option. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/dir.c | 4 +++- fs/cifs/inode.c | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f0b5b5f3dd2e..fb69c1fa85c9 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -260,7 +260,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, buf, inode->i_sb, xid, &fileHandle); if (newinode) { - newinode->i_mode = mode; + if (cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_DYNPERM) + newinode->i_mode = mode; if ((oplock & CIFS_CREATE_ACTION) && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index ae5bcaf2031c..12667d6bf305 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1015,8 +1015,11 @@ mkdir_get_info: CIFS_MOUNT_MAP_SPECIAL_CHR); } if (direntry->d_inode) { - direntry->d_inode->i_mode = mode; - direntry->d_inode->i_mode |= S_IFDIR; + if (cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_DYNPERM) + direntry->d_inode->i_mode = + (mode | S_IFDIR); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { direntry->d_inode->i_uid = -- cgit v1.2.3-70-g09d2 From 4e94a105ed0df78e25b20ff8ed6761f5937662b1 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 23 May 2008 18:22:46 +0000 Subject: [CIFS] remove trailing whitespace Signed-off-by: Steve French --- fs/cifs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 12667d6bf305..ae6b725b3665 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1019,7 +1019,7 @@ mkdir_get_info: CIFS_MOUNT_DYNPERM) direntry->d_inode->i_mode = (mode | S_IFDIR); - + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { direntry->d_inode->i_uid = -- cgit v1.2.3-70-g09d2 From 4ca691a892e8ab4f79583de1394f17a7dcfa2b57 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 May 2008 09:33:34 -0400 Subject: silently ignore ownership changes unless unix extensions are enabled or we're faking uid changes CIFS currently allows you to change the ownership of a file, but unless unix extensions are enabled this change is not passed off to the server. Have CIFS silently ignore ownership changes that can't be persistently stored on the server unless the "setuids" option is explicitly specified. We could return an error here (-EOPNOTSUPP or something), but this is how most disk-based windows filesystems on behave on Linux (e.g. VFAT, NTFS, etc). With cifsacl support and proper Windows to Unix idmapping support, we may be able to do this more properly in the future. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index ae6b725b3665..fe752fdb26c3 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1546,13 +1546,26 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) } else goto cifs_setattr_exit; } - if (attrs->ia_valid & ATTR_UID) { - cFYI(1, ("UID changed to %d", attrs->ia_uid)); - uid = attrs->ia_uid; - } - if (attrs->ia_valid & ATTR_GID) { - cFYI(1, ("GID changed to %d", attrs->ia_gid)); - gid = attrs->ia_gid; + + /* + * Without unix extensions we can't send ownership changes to the + * server, so silently ignore them. This is consistent with how + * local DOS/Windows filesystems behave (VFAT, NTFS, etc). With + * CIFSACL support + proper Windows to Unix idmapping, we may be + * able to support this in the future. + */ + if (!pTcon->unix_ext && + !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { + attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); + } else { + if (attrs->ia_valid & ATTR_UID) { + cFYI(1, ("UID changed to %d", attrs->ia_uid)); + uid = attrs->ia_uid; + } + if (attrs->ia_valid & ATTR_GID) { + cFYI(1, ("GID changed to %d", attrs->ia_gid)); + gid = attrs->ia_gid; + } } time_buf.Attributes = 0; -- cgit v1.2.3-70-g09d2 From 27adb44c4f671d15932eb0702a09d27244a8a7c1 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 23 May 2008 19:43:29 +0000 Subject: [CIFS] warn if both dynperm and cifsacl mount options specified Signed-off-by: Steve French --- fs/cifs/connect.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 023434f72c15..d49e274f8eba 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2120,6 +2120,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; } + if ((volume_info.cifs_acl) && (volume_info.dynperm)) + cERROR(1, ("mount option dynperm ignored if cifsacl " + "mount option supported")); + tcon = find_unc(sin_server.sin_addr.s_addr, volume_info.UNC, volume_info.username); -- cgit v1.2.3-70-g09d2 From b7206153f61bb63ee2cffa63905b57ec01d20e6e Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 23 May 2008 20:35:07 +0000 Subject: [CIFS] Correct incorrect obscure open flag Also add defines for pipe subcommand codes Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 65d58b4e6a61..0f327c224da3 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -79,6 +79,19 @@ #define TRANS2_GET_DFS_REFERRAL 0x10 #define TRANS2_REPORT_DFS_INCOSISTENCY 0x11 +/* SMB Transact (Named Pipe) subcommand codes */ +#define TRANS_SET_NMPIPE_STATE 0x0001 +#define TRANS_RAW_READ_NMPIPE 0x0011 +#define TRANS_QUERY_NMPIPE_STATE 0x0021 +#define TRANS_QUERY_NMPIPE_INFO 0x0022 +#define TRANS_PEEK_NMPIPE 0x0023 +#define TRANS_TRANSACT_NMPIPE 0x0026 +#define TRANS_RAW_WRITE_NMPIPE 0x0031 +#define TRANS_READ_NMPIPE 0x0036 +#define TRANS_WRITE_NMPIPE 0x0037 +#define TRANS_WAIT_NMPIPE 0x0053 +#define TRANS_CALL_NMPIPE 0x0054 + /* NT Transact subcommand codes */ #define NT_TRANSACT_CREATE 0x01 #define NT_TRANSACT_IOCTL 0x02 @@ -328,12 +341,13 @@ #define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */ #define CREATE_NO_EA_KNOWLEDGE 0x00000200 #define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete - open for recovery flag - should - be zero */ + "open for recovery" flag - should + be zero in any case */ +#define CREATE_OPEN_FOR_RECOVERY 0x00000400 #define CREATE_RANDOM_ACCESS 0x00000800 #define CREATE_DELETE_ON_CLOSE 0x00001000 #define CREATE_OPEN_BY_ID 0x00002000 -#define CREATE_OPEN_BACKUP_INTN 0x00004000 +#define CREATE_OPEN_BACKUP_INTENT 0x00004000 #define CREATE_NO_COMPRESSION 0x00008000 #define CREATE_RESERVE_OPFILTER 0x00100000 /* should be zero */ #define OPEN_REPARSE_POINT 0x00200000 @@ -722,7 +736,6 @@ typedef struct smb_com_tconx_rsp_ext { #define SMB_CSC_CACHE_AUTO_REINT 0x0004 #define SMB_CSC_CACHE_VDO 0x0008 #define SMB_CSC_NO_CACHING 0x000C - #define SMB_UNIQUE_FILE_NAME 0x0010 #define SMB_EXTENDED_SIGNATURES 0x0020 @@ -806,7 +819,7 @@ typedef struct smb_com_findclose_req { #define ICOUNT_MASK 0x00FF #define PIPE_READ_MODE 0x0100 #define NAMED_PIPE_TYPE 0x0400 -#define PIPE_END_POINT 0x0800 +#define PIPE_END_POINT 0x4000 #define BLOCKING_NAMED_PIPE 0x8000 typedef struct smb_com_open_req { /* also handles create */ -- cgit v1.2.3-70-g09d2 From 03fb0bce01490c9bdedad861962c76f987531014 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 23 May 2008 13:04:19 -0700 Subject: fuse: fix bdi naming conflict Fuse allocates a separate bdi for each filesystem, and registers them in sysfs with "MAJOR:MINOR" of sb->s_dev (st_dev). This works fine for anon devices normally used by fuse, but can conflict with an already registered BDI for "fuseblk" filesystems, where sb->s_dev represents a real block device. In particularl this happens if a non-partitioned device is being mounted. Fix by registering with a different name for "fuseblk" filesystems. Thanks to Ioan Ionita for the bug report. Signed-off-by: Miklos Szeredi Reported-by: Ioan Ionita Tested-by: Ioan Ionita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-class-bdi | 4 ++++ fs/fuse/inode.c | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 5ac1e01bbd48..5f500977b42f 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -14,6 +14,10 @@ MAJOR:MINOR non-block filesystems which provide their own BDI, such as NFS and FUSE. +MAJOR:MINOR-fuseblk + + Value of st_dev on fuseblk filesystems. + default The default backing dev, used for non-block device backed diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fb77e0962132..43e99513334a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -488,7 +488,12 @@ static struct fuse_conn *new_conn(struct super_block *sb) err = bdi_init(&fc->bdi); if (err) goto error_kfree; - err = bdi_register_dev(&fc->bdi, fc->dev); + if (sb->s_bdev) { + err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", + MAJOR(fc->dev), MINOR(fc->dev)); + } else { + err = bdi_register_dev(&fc->bdi, fc->dev); + } if (err) goto error_bdi_destroy; /* -- cgit v1.2.3-70-g09d2 From 71fd5179e8d1d4d503b517e0c5374f7c49540bfc Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 23 May 2008 13:04:20 -0700 Subject: ecryptfs: fix missed mutex_unlock Cc: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index cd62d75b2cc0..e2832bc7869a 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1906,9 +1906,9 @@ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, goto out; } } - mutex_unlock(&key_tfm_list_mutex); (*tfm) = key_tfm->key_tfm; (*tfm_mutex) = &key_tfm->key_tfm_mutex; out: + mutex_unlock(&key_tfm_list_mutex); return rc; } -- cgit v1.2.3-70-g09d2 From 80bfc25f42db6d4715c7688ae2352c5a8038fe7e Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Fri, 23 May 2008 13:04:22 -0700 Subject: ntfs: le*_add_cpu conversion replace all: little_endian_variable = cpu_to_leX(leX_to_cpu(little_endian_variable) + expression_in_cpu_byteorder); with: leX_add_cpu(&little_endian_variable, expression_in_cpu_byteorder); generated with semantic patch Signed-off-by: Marcin Slusarz Acked-by: Anton Altaparmakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ntfs/upcase.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/upcase.c b/fs/ntfs/upcase.c index 9101807dc81a..e2f72ca98037 100644 --- a/fs/ntfs/upcase.c +++ b/fs/ntfs/upcase.c @@ -77,11 +77,10 @@ ntfschar *generate_default_upcase(void) uc[i] = cpu_to_le16(i); for (r = 0; uc_run_table[r][0]; r++) for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++) - uc[i] = cpu_to_le16(le16_to_cpu(uc[i]) + - uc_run_table[r][2]); + le16_add_cpu(&uc[i], uc_run_table[r][2]); for (r = 0; uc_dup_table[r][0]; r++) for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2) - uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1); + le16_add_cpu(&uc[i + 1], -1); for (r = 0; uc_word_table[r][0]; r++) uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]); return uc; -- cgit v1.2.3-70-g09d2 From 80119ef5c8153e0a6cc5edf00c083dc98a9bd348 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 23 May 2008 13:04:31 -0700 Subject: mm: fix atomic_t overflow in vm The atomic_t type is 32bit but a 64bit system can have more than 2^32 pages of virtual address space available. Without this we overflow on ludicrously large mappings Signed-off-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 2 +- include/linux/mman.h | 4 ++-- mm/mmap.c | 4 ++-- mm/nommu.c | 4 ++-- mm/swap.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 74a323d2b850..32dc14cd8900 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -139,7 +139,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, #define K(x) ((x) << (PAGE_SHIFT - 10)) si_meminfo(&i); si_swapinfo(&i); - committed = atomic_read(&vm_committed_space); + committed = atomic_long_read(&vm_committed_space); allowed = ((totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100) + total_swap_pages; diff --git a/include/linux/mman.h b/include/linux/mman.h index 87920a0852a3..dab8892e6ff1 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -17,14 +17,14 @@ extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; -extern atomic_t vm_committed_space; +extern atomic_long_t vm_committed_space; #ifdef CONFIG_SMP extern void vm_acct_memory(long pages); #else static inline void vm_acct_memory(long pages) { - atomic_add(pages, &vm_committed_space); + atomic_long_add(pages, &vm_committed_space); } #endif diff --git a/mm/mmap.c b/mm/mmap.c index fac66337da2a..669499e7c2f5 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -80,7 +80,7 @@ EXPORT_SYMBOL(vm_get_page_prot); int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; -atomic_t vm_committed_space = ATOMIC_INIT(0); +atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); /* * Check that a process has enough memory to allocate a new virtual @@ -177,7 +177,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) * cast `allowed' as a signed long because vm_committed_space * sometimes has a negative value */ - if (atomic_read(&vm_committed_space) < (long)allowed) + if (atomic_long_read(&vm_committed_space) < (long)allowed) return 0; error: vm_unacct_memory(pages); diff --git a/mm/nommu.c b/mm/nommu.c index ef8c62cec697..dca93fcb8b7a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -39,7 +39,7 @@ struct page *mem_map; unsigned long max_mapnr; unsigned long num_physpages; unsigned long askedalloc, realalloc; -atomic_t vm_committed_space = ATOMIC_INIT(0); +atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; @@ -1410,7 +1410,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) * cast `allowed' as a signed long because vm_committed_space * sometimes has a negative value */ - if (atomic_read(&vm_committed_space) < (long)allowed) + if (atomic_long_read(&vm_committed_space) < (long)allowed) return 0; error: vm_unacct_memory(pages); diff --git a/mm/swap.c b/mm/swap.c index 91e194445a5e..45c9f25a8a3b 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -503,7 +503,7 @@ void vm_acct_memory(long pages) local = &__get_cpu_var(committed_space); *local += pages; if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { - atomic_add(*local, &vm_committed_space); + atomic_long_add(*local, &vm_committed_space); *local = 0; } preempt_enable(); @@ -520,7 +520,7 @@ static int cpu_swap_callback(struct notifier_block *nfb, committed = &per_cpu(committed_space, (long)hcpu); if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - atomic_add(*committed, &vm_committed_space); + atomic_long_add(*committed, &vm_committed_space); *committed = 0; drain_cpu_pagevecs((long)hcpu); } -- cgit v1.2.3-70-g09d2 From c4185a0e019387f5ad6e99009804965531fa1fab Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Fri, 23 May 2008 13:04:47 -0700 Subject: proc: proc_get_inode() should get module only once Any file under /proc/net opened more than once leaked the refcounter on the module it belongs to. The problem is that module_get is called for each file opening while module_put is called only when /proc inode is destroyed. So, lets put module counter if we are dealing with already initialised inode. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=10737 Signed-off-by: Denis V. Lunev Cc: David Miller Cc: Patrick McHardy Acked-by: Pavel Emelyanov Acked-by: Robert Olsson Acked-by: Eric W. Biederman Reported-by: Roland Kletzing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 6f4e8dc97da1..b08d10017911 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -425,7 +425,8 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, } } unlock_new_inode(inode); - } + } else + module_put(de->owner); return inode; out_ino: -- cgit v1.2.3-70-g09d2 From 5132861a7a44498ebb18357473f8b8d4cdc70e9f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 May 2008 09:33:34 -0400 Subject: disable most mode changes on non-unix/non-cifsacl mounts CIFS currently allows you to change the mode of an inode on a share that doesn't have unix extensions enabled, and isn't using cifsacl. The inode in this case *only* has its mode changed in memory on the client. This is problematic since it can change any time the inode is purged from the cache. This patch makes cifs_setattr silently ignore most mode changes when unix extensions and cifsacl support are not enabled, and when the share is not mounted with the "dynperm" option. The exceptions are: When a mode change would remove all write access to an inode we turn on the ATTR_READONLY bit on the server and remove all write bits from the inode's mode in memory. When a mode change would add a write bit to an inode that previously had them all turned off, it turns off the ATTR_READONLY bit on the server, and resets the mode back to what it would normally be (generally, the file_mode or dir_mode of the share). Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index fe752fdb26c3..722be543ceec 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1575,7 +1575,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) attrs->ia_valid &= ~ATTR_MODE; if (attrs->ia_valid & ATTR_MODE) { - cFYI(1, ("Mode changed to 0x%x", attrs->ia_mode)); + cFYI(1, ("Mode changed to 0%o", attrs->ia_mode)); mode = attrs->ia_mode; } @@ -1590,18 +1590,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) #ifdef CONFIG_CIFS_EXPERIMENTAL if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) rc = mode_to_acl(inode, full_path, mode); - else if ((mode & S_IWUGO) == 0) { -#else - if ((mode & S_IWUGO) == 0) { + else #endif - /* not writeable */ - if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) { - set_dosattr = true; - time_buf.Attributes = - cpu_to_le32(cifsInode->cifsAttrs | - ATTR_READONLY); - } - } else if (cifsInode->cifsAttrs & ATTR_READONLY) { + if (((mode & S_IWUGO) == 0) && + (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { + set_dosattr = true; + time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | + ATTR_READONLY); + /* fix up mode if we're not using dynperm */ + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) + attrs->ia_mode = inode->i_mode & ~S_IWUGO; + } else if ((mode & S_IWUGO) && + (cifsInode->cifsAttrs & ATTR_READONLY)) { /* If file is readonly on server, we would not be able to write to it - so if any write bit is enabled for user or group or other we @@ -1612,6 +1612,20 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) /* Windows ignores set to zero */ if (time_buf.Attributes == 0) time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); + + /* reset local inode permissions to normal */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { + attrs->ia_mode &= ~(S_IALLUGO); + if (S_ISDIR(inode->i_mode)) + attrs->ia_mode |= + cifs_sb->mnt_dir_mode; + else + attrs->ia_mode |= + cifs_sb->mnt_file_mode; + } + } else if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { + /* ignore mode change - ATTR_READONLY hasn't changed */ + attrs->ia_valid &= ~ATTR_MODE; } } -- cgit v1.2.3-70-g09d2 From 03cddb80ed2dacaf03c370d38bcc75f8303a03b8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Jun 2008 20:59:29 -0400 Subject: ext4: Fix use of uninitialized data with debug enabled. Fix use of uninitialized data with debug enabled. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 873ad9b3418c..c9900aade150 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2745,8 +2745,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, sbi = EXT4_SB(sb); es = sbi->s_es; - ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, - gdp->bg_free_blocks_count); err = -EIO; bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); @@ -2762,6 +2760,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (!gdp) goto out_err; + ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, + gdp->bg_free_blocks_count); + err = ext4_journal_get_write_access(handle, gdp_bh); if (err) goto out_err; @@ -3094,8 +3095,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa) { - unsigned len = ac->ac_o_ex.fe_len; - + unsigned int len = ac->ac_o_ex.fe_len; ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, &ac->ac_b_ex.fe_group, &ac->ac_b_ex.fe_start); -- cgit v1.2.3-70-g09d2 From 0bf7e8379ce7e0159a2a6bd3d937f2f6ada79799 Mon Sep 17 00:00:00 2001 From: "Jose R. Santos" Date: Tue, 3 Jun 2008 14:07:29 -0400 Subject: ext4: Fix uninit block group initialization with FLEX_BG With FLEX_BG block bitmaps, inode bitmaps and inode tables _MAY_ be allocated outside the group. So, when initializing an uninitialized block bitmap, we need to check the location of this blocks before setting the corresponding bits in the block bitmap of the newly initialized group. Also return the right number of free blocks when counting the available free blocks in uninit group. Tested-by: Aneesh Kumar K.V Signed-off-by: Jose R. Santos Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 30494c5da843..9cc80b9cc8d8 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -43,6 +43,46 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, } +static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, + ext4_group_t block_group) +{ + ext4_group_t actual_group; + ext4_get_group_no_and_offset(sb, block, &actual_group, 0); + if (actual_group == block_group) + return 1; + return 0; +} + +static int ext4_group_used_meta_blocks(struct super_block *sb, + ext4_group_t block_group) +{ + ext4_fsblk_t tmp; + struct ext4_sb_info *sbi = EXT4_SB(sb); + /* block bitmap, inode bitmap, and inode table blocks */ + int used_blocks = sbi->s_itb_per_group + 2; + + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { + struct ext4_group_desc *gdp; + struct buffer_head *bh; + + gdp = ext4_get_group_desc(sb, block_group, &bh); + if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), + block_group)) + used_blocks--; + + if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), + block_group)) + used_blocks--; + + tmp = ext4_inode_table(sb, gdp); + for (; tmp < ext4_inode_table(sb, gdp) + + sbi->s_itb_per_group; tmp++) { + if (!ext4_block_in_group(sb, tmp, block_group)) + used_blocks -= 1; + } + } + return used_blocks; +} /* Initializes an uninitialized block bitmap if given, and returns the * number of blocks free in the group. */ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, @@ -105,20 +145,34 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, free_blocks = group_blocks - bit_max; if (bh) { - ext4_fsblk_t start; + ext4_fsblk_t start, tmp; + int flex_bg = 0; for (bit = 0; bit < bit_max; bit++) ext4_set_bit(bit, bh->b_data); start = ext4_group_first_block_no(sb, block_group); - /* Set bits for block and inode bitmaps, and inode table */ - ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); - ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data); - for (bit = (ext4_inode_table(sb, gdp) - start), - bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++) - ext4_set_bit(bit, bh->b_data); + if (EXT4_HAS_INCOMPAT_FEATURE(sb, + EXT4_FEATURE_INCOMPAT_FLEX_BG)) + flex_bg = 1; + /* Set bits for block and inode bitmaps, and inode table */ + tmp = ext4_block_bitmap(sb, gdp); + if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + ext4_set_bit(tmp - start, bh->b_data); + + tmp = ext4_inode_bitmap(sb, gdp); + if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + ext4_set_bit(tmp - start, bh->b_data); + + tmp = ext4_inode_table(sb, gdp); + for (; tmp < ext4_inode_table(sb, gdp) + + sbi->s_itb_per_group; tmp++) { + if (!flex_bg || + ext4_block_in_group(sb, tmp, block_group)) + ext4_set_bit(tmp - start, bh->b_data); + } /* * Also if the number of blocks within the group is * less than the blocksize * 8 ( which is the size @@ -126,8 +180,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); } - - return free_blocks - sbi->s_itb_per_group - 2; + return free_blocks - ext4_group_used_meta_blocks(sb, block_group); } -- cgit v1.2.3-70-g09d2 From 944600930a37aa725ba6f93c3244e2d77a1e3581 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 6 Jun 2008 18:05:52 -0400 Subject: ext4: fix online resize bug There is a bug when we are trying to verify that the reserve inode's double indirect blocks point back to the primary gdt blocks. The fix is obvious, we need to mod the gdb count by the addr's per block. This was verified using the same testcase as with the ext3 equivalent of this patch. Signed-off-by: Josef Bacik Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9f086a6a472b..9ecb92f68543 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -563,7 +563,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, } blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count; - data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count; + data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count % + EXT4_ADDR_PER_BLOCK(sb)); end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb); /* Get each reserved primary GDT block and verify it holds backups */ -- cgit v1.2.3-70-g09d2 From 8ea76900be3b4522396e2021260d2818a27b3a5b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 26 May 2008 10:28:09 -0400 Subject: jbd2: Fix memory leak when verifying checksums in the journal Cc: Andreas Dilger Cc: Girish Shilamkar Signed-off-by: "Theodore Ts'o" --- fs/jbd2/recovery.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 5d0405a9e7ca..7199db52b2fd 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -344,6 +344,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh, *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, obh->b_size); } + put_bh(obh); } return 0; } -- cgit v1.2.3-70-g09d2 From 624080eded68738daee041ad64672a9d2614754f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 6 Jun 2008 17:50:40 -0400 Subject: jbd2: If a journal checksum error is detected, propagate the error to ext4 If a journal checksum error is detected, the ext4 filesystem will call ext4_error(), and the mount will either continue, become a read-only mount, or cause a kernel panic based on the superblock flags indicating the user's preference of what to do in case of filesystem corruption being detected. Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 23 +++++++++++++++++++++++ fs/jbd2/recovery.c | 11 ++++------- include/linux/jbd2.h | 3 +++ 3 files changed, 30 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 09d9359c8055..d01a32e8b50a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2189,6 +2189,29 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { if (ext4_load_journal(sb, es, journal_devnum)) goto failed_mount3; + if (!(sb->s_flags & MS_RDONLY) && + EXT4_SB(sb)->s_journal->j_failed_commit) { + printk(KERN_CRIT "EXT4-fs error (device %s): " + "ext4_fill_super: Journal transaction " + "%u is corrupt\n", sb->s_id, + EXT4_SB(sb)->s_journal->j_failed_commit); + if (test_opt (sb, ERRORS_RO)) { + printk (KERN_CRIT + "Mounting filesystem read-only\n"); + sb->s_flags |= MS_RDONLY; + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + } + if (test_opt(sb, ERRORS_PANIC)) { + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + ext4_commit_super(sb, es, 1); + printk(KERN_CRIT + "EXT4-fs (device %s): mount failed\n", + sb->s_id); + goto failed_mount4; + } + } } else if (journal_inum) { if (ext4_create_journal(sb, es, journal_inum)) goto failed_mount3; diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 7199db52b2fd..058f50f65b76 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -611,9 +611,8 @@ static int do_one_pass(journal_t *journal, chksum_err = chksum_seen = 0; if (info->end_transaction) { - printk(KERN_ERR "JBD: Transaction %u " - "found to be corrupt.\n", - next_commit_ID - 1); + journal->j_failed_commit = + info->end_transaction; brelse(bh); break; } @@ -644,10 +643,8 @@ static int do_one_pass(journal_t *journal, if (!JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ - printk(KERN_ERR - "JBD: Transaction %u " - "found to be corrupt.\n", - next_commit_ID); + journal->j_failed_commit = + next_commit_ID; brelse(bh); break; } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 05e2b307161a..d147f0f90360 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -919,6 +919,9 @@ struct journal_s struct proc_dir_entry *j_proc_entry; struct transaction_stats_s j_stats; + /* Failed journal commit ID */ + unsigned int j_failed_commit; + /* * An opaque pointer to fs-private information. ext3 puts its * superblock pointer here -- cgit v1.2.3-70-g09d2 From cd0b6a39a1d68b61b1073662f40f747c8b728f98 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 26 May 2008 10:28:28 -0400 Subject: ext4: Display the journal_async_commit mount option in /proc/mounts Cc: Andreas Dilger Cc: Girish Shilamkar Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d01a32e8b50a..ba92c606ad9a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -731,6 +731,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) } if (test_opt(sb, BARRIER)) seq_puts(seq, ",barrier=1"); + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) + seq_puts(seq, ",journal_async_commit"); if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); if (!test_opt(sb, EXTENTS)) -- cgit v1.2.3-70-g09d2 From 034772b068be62a79470d6c1b81b01fbe27793ac Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 3 Jun 2008 22:31:11 -0400 Subject: jbd2: Fix barrier fallback code to re-lock the buffer head If the device doesn't support write barriers, the write is retried without ordered mode. But the buffer head needs to be re-locked or submit_bh will fail with on BUG(!buffer_locked(bh)). Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 4d99685fdce4..a2ed72f7ceee 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -168,6 +168,7 @@ static int journal_submit_commit_record(journal_t *journal, spin_unlock(&journal->j_state_lock); /* And try again, without the barrier */ + lock_buffer(bh); set_buffer_uptodate(bh); set_buffer_dirty(bh); ret = submit_bh(WRITE, bh); -- cgit v1.2.3-70-g09d2 From 571640cad3fda6475da45d91cf86076f1f86bd9b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 26 May 2008 12:29:46 -0400 Subject: ext4: enable barriers by default I can't think of any valid reason for ext4 to not use barriers when they are available; I believe this is necessary for filesystem integrity in the face of a volatile write cache on storage. An administrator who trusts that the cache is sufficiently battery- backed (and power supplies are sufficiently redundant, etc...) can always turn it back off again. SuSE has carried such a patch for ext3 for quite some time now. Also document the mount option while we're at it. Signed-off-by: Eric Sandeen Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/ext4.txt | 12 ++++++++++-- fs/ext4/super.c | 11 +++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 560f88dc7090..0c5086db8352 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -139,8 +139,16 @@ commit=nrsec (*) Ext4 can be told to sync all its data and metadata Setting it to very large values will improve performance. -barrier=1 This enables/disables barriers. barrier=0 disables - it, barrier=1 enables it. +barrier=<0|1(*)> This enables/disables the use of write barriers in + the jbd code. barrier=0 disables, barrier=1 enables. + This also requires an IO stack which can support + barriers, and if jbd gets an error on a barrier + write, it will disable again with a warning. + Write barriers enforce proper on-disk ordering + of journal commits, making volatile disk write caches + safe to use, at some performance penalty. If + your disks are battery-backed in one way or another, + disabling barriers may safely improve performance. orlov (*) This enables the new Orlov block allocator. It is enabled by default. diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ba92c606ad9a..cb96f127c366 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -671,6 +671,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) unsigned long def_mount_opts; struct super_block *sb = vfs->mnt_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); + journal_t *journal = sbi->s_journal; struct ext4_super_block *es = sbi->s_es; def_mount_opts = le32_to_cpu(es->s_default_mount_opts); @@ -729,8 +730,13 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_printf(seq, ",commit=%u", (unsigned) (sbi->s_commit_interval / HZ)); } - if (test_opt(sb, BARRIER)) - seq_puts(seq, ",barrier=1"); + /* + * We're changing the default of barrier mount option, so + * let's always display its mount state so it's clear what its + * status is. + */ + seq_puts(seq, ",barrier="); + seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) seq_puts(seq, ",journal_async_commit"); if (test_opt(sb, NOBH)) @@ -1909,6 +1915,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_resgid = le16_to_cpu(es->s_def_resgid); set_opt(sbi->s_mount_opt, RESERVATION); + set_opt(sbi->s_mount_opt, BARRIER); /* * turn on extents feature by default in ext4 filesystem -- cgit v1.2.3-70-g09d2 From cbaffba12ce08beb3e80bfda148ee0fa14aac188 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 26 May 2008 20:55:42 +0400 Subject: posix timers: discard SI_TIMER signals on exec Based on Roland's patch. This approach was suggested by Austin Clements from the very beginning, and then by Linus. As Austin pointed out, the execing task can be killed by SI_TIMER signal because exec flushes the signal handlers, but doesn't discard the pending signals generated by posix timers. Perhaps not a bug, but people find this surprising. See http://bugzilla.kernel.org/show_bug.cgi?id=10460 Signed-off-by: Oleg Nesterov Cc: Austin Clements Cc: Roland McGrath Signed-off-by: Linus Torvalds --- fs/exec.c | 1 + include/linux/sched.h | 2 ++ kernel/signal.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 3c2ba7ce11d4..9448f1b50b4a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -860,6 +860,7 @@ static int de_thread(struct task_struct *tsk) no_thread_group: exit_itimers(sig); + flush_itimer_signals(); if (leader) release_task(leader); diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4b..3e05e5474749 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1848,7 +1848,9 @@ extern void exit_thread(void); extern void exit_files(struct task_struct *); extern void __cleanup_signal(struct signal_struct *); extern void __cleanup_sighand(struct sighand_struct *); + extern void exit_itimers(struct signal_struct *); +extern void flush_itimer_signals(void); extern NORET_TYPE void do_group_exit(int); diff --git a/kernel/signal.c b/kernel/signal.c index 2955f6c4f36e..6c0958e52ea7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -231,6 +231,40 @@ void flush_signals(struct task_struct *t) spin_unlock_irqrestore(&t->sighand->siglock, flags); } +static void __flush_itimer_signals(struct sigpending *pending) +{ + sigset_t signal, retain; + struct sigqueue *q, *n; + + signal = pending->signal; + sigemptyset(&retain); + + list_for_each_entry_safe(q, n, &pending->list, list) { + int sig = q->info.si_signo; + + if (likely(q->info.si_code != SI_TIMER)) { + sigaddset(&retain, sig); + } else { + sigdelset(&signal, sig); + list_del_init(&q->list); + __sigqueue_free(q); + } + } + + sigorsets(&pending->signal, &signal, &retain); +} + +void flush_itimer_signals(void) +{ + struct task_struct *tsk = current; + unsigned long flags; + + spin_lock_irqsave(&tsk->sighand->siglock, flags); + __flush_itimer_signals(&tsk->pending); + __flush_itimer_signals(&tsk->signal->shared_pending); + spin_unlock_irqrestore(&tsk->sighand->siglock, flags); +} + void ignore_signals(struct task_struct *t) { int i; -- cgit v1.2.3-70-g09d2 From a82c53a0e3f57f02782330372b7adad67b417645 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 9 May 2008 13:28:36 +0200 Subject: splice: fix sendfile() issue with relay Splice isn't always incrementing the ppos correctly, which broke relay splice. Signed-off-by: Tom Zanussi Tested-by: Dan Williams Signed-off-by: Jens Axboe --- fs/splice.c | 12 ++++++++---- kernel/relay.c | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index 78150038b584..a048ad2130c3 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -983,7 +983,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, while (len) { size_t read_len; - loff_t pos = sd->pos; + loff_t pos = sd->pos, prev_pos = pos; ret = do_splice_to(in, &pos, pipe, len, flags); if (unlikely(ret <= 0)) @@ -998,15 +998,19 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, * could get stuck data in the internal pipe: */ ret = actor(pipe, sd); - if (unlikely(ret <= 0)) + if (unlikely(ret <= 0)) { + sd->pos = prev_pos; goto out_release; + } bytes += ret; len -= ret; sd->pos = pos; - if (ret < read_len) + if (ret < read_len) { + sd->pos = prev_pos + ret; goto out_release; + } } done: @@ -1072,7 +1076,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, ret = splice_direct_to_actor(in, &sd, direct_splice_actor); if (ret > 0) - *ppos += ret; + *ppos = sd.pos; return ret; } diff --git a/kernel/relay.c b/kernel/relay.c index bc24dcdc570f..7de644cdec43 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1191,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in, ret = 0; spliced = 0; - while (len) { + while (len && !spliced) { ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); if (ret < 0) break; -- cgit v1.2.3-70-g09d2 From ca39d651d17df49b6d11f851d56c0ce0ce01ea1a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 20 May 2008 21:27:41 +0200 Subject: splice: handle try_to_release_page() failure splice currently assumes that try_to_release_page() always suceeds, but it can return failure. If it does, we cannot steal the page. Acked-by: Mingming Cao --- fs/splice.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index a048ad2130c3..aa5f6f60b305 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -58,8 +58,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, */ wait_on_page_writeback(page); - if (PagePrivate(page)) - try_to_release_page(page, GFP_KERNEL); + if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) + goto out_unlock; /* * If we succeeded in removing the mapping, set LRU flag @@ -75,6 +75,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, * Raced with truncate or failed to remove page from current * address space, unlock and return failure. */ +out_unlock: unlock_page(page); return 1; } -- cgit v1.2.3-70-g09d2 From a12630b186d56a77d17c9b34c82b88dda4337ed7 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 9 May 2008 18:49:29 -0700 Subject: ocfs2: Rename 'user_stack' plugin structure to 'ocfs2_user_plugin' The static structure describing the userspace cluster plugin for ocfs2 was named 'user_stack', which is a real pain when people are grep(1)ing the tree for the program stack object 'user_stack'. Change the name to something distinct and namespaced. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stack_user.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index b503772cd0ec..6b97d11f6bf8 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -61,7 +61,7 @@ * negotiated by the client. The client negotiates based on the maximum * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major * number from the "SETV" message must match - * user_stack.sp_proto->lp_max_version.pv_major, and the minor number + * ocfs2_user_plugin.sp_proto->lp_max_version.pv_major, and the minor number * must be less than or equal to ...->lp_max_version.pv_minor. * * Once this information has been set, mounts will be allowed. From this @@ -153,7 +153,7 @@ union ocfs2_control_message { struct ocfs2_control_message_down u_down; }; -static struct ocfs2_stack_plugin user_stack; +static struct ocfs2_stack_plugin ocfs2_user_plugin; static atomic_t ocfs2_control_opened; static int ocfs2_control_this_node = -1; @@ -399,7 +399,7 @@ static int ocfs2_control_do_setversion_msg(struct file *file, char *ptr = NULL; struct ocfs2_control_private *p = file->private_data; struct ocfs2_protocol_version *max = - &user_stack.sp_proto->lp_max_version; + &ocfs2_user_plugin.sp_proto->lp_max_version; if (ocfs2_control_get_handshake_state(file) != OCFS2_CONTROL_HANDSHAKE_PROTOCOL) @@ -680,7 +680,7 @@ static void fsdlm_lock_ast_wrapper(void *astarg) struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg); int status = lksb->sb_status; - BUG_ON(user_stack.sp_proto == NULL); + BUG_ON(ocfs2_user_plugin.sp_proto == NULL); /* * For now we're punting on the issue of other non-standard errors @@ -693,16 +693,16 @@ static void fsdlm_lock_ast_wrapper(void *astarg) */ if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) - user_stack.sp_proto->lp_unlock_ast(astarg, 0); + ocfs2_user_plugin.sp_proto->lp_unlock_ast(astarg, 0); else - user_stack.sp_proto->lp_lock_ast(astarg); + ocfs2_user_plugin.sp_proto->lp_lock_ast(astarg); } static void fsdlm_blocking_ast_wrapper(void *astarg, int level) { - BUG_ON(user_stack.sp_proto == NULL); + BUG_ON(ocfs2_user_plugin.sp_proto == NULL); - user_stack.sp_proto->lp_blocking_ast(astarg, level); + ocfs2_user_plugin.sp_proto->lp_blocking_ast(astarg, level); } static int user_dlm_lock(struct ocfs2_cluster_connection *conn, @@ -838,7 +838,7 @@ static int user_cluster_this_node(unsigned int *this_node) return 0; } -static struct ocfs2_stack_operations user_stack_ops = { +static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { .connect = user_cluster_connect, .disconnect = user_cluster_disconnect, .this_node = user_cluster_this_node, @@ -849,20 +849,20 @@ static struct ocfs2_stack_operations user_stack_ops = { .dump_lksb = user_dlm_dump_lksb, }; -static struct ocfs2_stack_plugin user_stack = { +static struct ocfs2_stack_plugin ocfs2_user_plugin = { .sp_name = "user", - .sp_ops = &user_stack_ops, + .sp_ops = &ocfs2_user_plugin_ops, .sp_owner = THIS_MODULE, }; -static int __init user_stack_init(void) +static int __init ocfs2_user_plugin_init(void) { int rc; rc = ocfs2_control_init(); if (!rc) { - rc = ocfs2_stack_glue_register(&user_stack); + rc = ocfs2_stack_glue_register(&ocfs2_user_plugin); if (rc) ocfs2_control_exit(); } @@ -870,14 +870,14 @@ static int __init user_stack_init(void) return rc; } -static void __exit user_stack_exit(void) +static void __exit ocfs2_user_plugin_exit(void) { - ocfs2_stack_glue_unregister(&user_stack); + ocfs2_stack_glue_unregister(&ocfs2_user_plugin); ocfs2_control_exit(); } MODULE_AUTHOR("Oracle"); MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks"); MODULE_LICENSE("GPL"); -module_init(user_stack_init); -module_exit(user_stack_exit); +module_init(ocfs2_user_plugin_init); +module_exit(ocfs2_user_plugin_exit); -- cgit v1.2.3-70-g09d2 From 271d772d02507c7541d5e6b4938ed2380e59a39a Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 12 May 2008 18:31:35 -0700 Subject: [PATCH 1/3] ocfs2/net: Silence build warnings This patch silences the build warnings concerning o2net_debugfs_init() and friends when building without CONFIG_DEBUG_FS enabled. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/tcp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index a705d5d19036..fd6179eb26d4 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h @@ -128,23 +128,23 @@ void o2net_debug_del_nst(struct o2net_send_tracking *nst); void o2net_debug_add_sc(struct o2net_sock_container *sc); void o2net_debug_del_sc(struct o2net_sock_container *sc); #else -static int o2net_debugfs_init(void) +static inline int o2net_debugfs_init(void) { return 0; } -static void o2net_debugfs_exit(void) +static inline void o2net_debugfs_exit(void) { } -static void o2net_debug_add_nst(struct o2net_send_tracking *nst) +static inline void o2net_debug_add_nst(struct o2net_send_tracking *nst) { } -static void o2net_debug_del_nst(struct o2net_send_tracking *nst) +static inline void o2net_debug_del_nst(struct o2net_send_tracking *nst) { } -static void o2net_debug_add_sc(struct o2net_sock_container *sc) +static inline void o2net_debug_add_sc(struct o2net_sock_container *sc) { } -static void o2net_debug_del_sc(struct o2net_sock_container *sc) +static inline void o2net_debug_del_sc(struct o2net_sock_container *sc) { } #endif /* CONFIG_DEBUG_FS */ -- cgit v1.2.3-70-g09d2 From 959040c37a8cae8117907d4aed87f1b01ff1ea19 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 12 May 2008 18:31:36 -0700 Subject: [PATCH 2/3] ocfs2/dlm: Silence build warnings This patch silences the build warnings concerning dlm_debug_init() and friends when building without CONFIG_DEBUG_FS enabled. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmdebug.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h index d34a62a3a625..8c686d22f9c7 100644 --- a/fs/ocfs2/dlm/dlmdebug.h +++ b/fs/ocfs2/dlm/dlmdebug.h @@ -60,25 +60,25 @@ void dlm_destroy_debugfs_root(void); #else -static int dlm_debug_init(struct dlm_ctxt *dlm) +static inline int dlm_debug_init(struct dlm_ctxt *dlm) { return 0; } -static void dlm_debug_shutdown(struct dlm_ctxt *dlm) +static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm) { } -static int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) +static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) { return 0; } -static void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) +static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) { } -static int dlm_create_debugfs_root(void) +static inline int dlm_create_debugfs_root(void) { return 0; } -static void dlm_destroy_debugfs_root(void) +static inline void dlm_destroy_debugfs_root(void) { } -- cgit v1.2.3-70-g09d2 From 0f475b2abed6cbccee1da20a0bef2895eb2a0edd Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 12 May 2008 18:31:37 -0700 Subject: [PATCH 3/3] ocfs2/net: Silence build warnings This patch silences the build warnings concerning o2net_init_nst() and friends when building without CONFIG_DEBUG_FS enabled. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/tcp.c | 28 +++++++++------------------- fs/ocfs2/cluster/tcp_internal.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1e44ad14881a..a27d61581bd6 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -142,53 +142,43 @@ static void o2net_idle_timer(unsigned long data); static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); -static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, - u32 msgkey, struct task_struct *task, u8 node) -{ #ifdef CONFIG_DEBUG_FS +void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node) +{ INIT_LIST_HEAD(&nst->st_net_debug_item); nst->st_task = task; nst->st_msg_type = msgtype; nst->st_msg_key = msgkey; nst->st_node = node; -#endif } -static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) { -#ifdef CONFIG_DEBUG_FS do_gettimeofday(&nst->st_sock_time); -#endif } -static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +void o2net_set_nst_send_time(struct o2net_send_tracking *nst) { -#ifdef CONFIG_DEBUG_FS do_gettimeofday(&nst->st_send_time); -#endif } -static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +void o2net_set_nst_status_time(struct o2net_send_tracking *nst) { -#ifdef CONFIG_DEBUG_FS do_gettimeofday(&nst->st_status_time); -#endif } -static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, +void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, struct o2net_sock_container *sc) { -#ifdef CONFIG_DEBUG_FS nst->st_sc = sc; -#endif } -static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) +void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) { -#ifdef CONFIG_DEBUG_FS nst->st_id = msg_id; -#endif } +#endif /* CONFIG_DEBUG_FS */ static inline int o2net_reconnect_delay(void) { diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 8d58cfe410b1..18307ff81b77 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -224,10 +224,42 @@ struct o2net_send_tracking { struct timeval st_send_time; struct timeval st_status_time; }; + +void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node); +void o2net_set_nst_sock_time(struct o2net_send_tracking *nst); +void o2net_set_nst_send_time(struct o2net_send_tracking *nst); +void o2net_set_nst_status_time(struct o2net_send_tracking *nst); +void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, + struct o2net_sock_container *sc); +void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id); + #else struct o2net_send_tracking { u32 dummy; }; + +static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node) +{ +} +static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +{ +} +static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +{ +} +static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +{ +} +static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, + struct o2net_sock_container *sc) +{ +} +static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, + u32 msg_id) +{ +} #endif /* CONFIG_DEBUG_FS */ #endif /* O2CLUSTER_TCP_INTERNAL_H */ -- cgit v1.2.3-70-g09d2 From ca05a99a54db1db5bca72eccb5866d2a86f8517f Mon Sep 17 00:00:00 2001 From: "Andrew G. Morgan" Date: Tue, 27 May 2008 22:05:17 -0700 Subject: capabilities: remain source compatible with 32-bit raw legacy capability support. Source code out there hard-codes a notion of what the _LINUX_CAPABILITY_VERSION #define means in terms of the semantics of the raw capability system calls capget() and capset(). Its unfortunate, but true. Since the confusing header file has been in a released kernel, there is software that is erroneously using 64-bit capabilities with the semantics of 32-bit compatibilities. These recently compiled programs may suffer corruption of their memory when sys_getcap() overwrites more memory than they are coded to expect, and the raising of added capabilities when using sys_capset(). As such, this patch does a number of things to clean up the situation for all. It 1. forces the _LINUX_CAPABILITY_VERSION define to always retain its legacy value. 2. adopts a new #define strategy for the kernel's internal implementation of the preferred magic. 3. deprecates v2 capability magic in favor of a new (v3) magic number. The functionality of v3 is entirely equivalent to v2, the only difference being that the v2 magic causes the kernel to log a "deprecated" warning so the admin can find applications that may be using v2 inappropriately. [User space code continues to be encouraged to use the libcap API which protects the application from details like this. libcap-2.10 is the first to support v3 capabilities.] Fixes issue reported in https://bugzilla.redhat.com/show_bug.cgi?id=447518. Thanks to Bojan Smojver for the report. [akpm@linux-foundation.org: s/depreciate/deprecate/g] [akpm@linux-foundation.org: be robust about put_user size] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Andrew G. Morgan Cc: Serge E. Hallyn Cc: Bojan Smojver Cc: stable@kernel.org Signed-off-by: Andrew Morton Signed-off-by: Chris Wright --- fs/proc/array.c | 2 +- include/linux/capability.h | 29 ++++++++---- kernel/capability.c | 111 +++++++++++++++++++++++++++++---------------- 3 files changed, 95 insertions(+), 47 deletions(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index 9e3b8c33c24b..797d775e0354 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -288,7 +288,7 @@ static void render_cap_t(struct seq_file *m, const char *header, seq_printf(m, "%s", header); CAP_FOR_EACH_U32(__capi) { seq_printf(m, "%08x", - a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]); + a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]); } seq_printf(m, "\n"); } diff --git a/include/linux/capability.h b/include/linux/capability.h index f4ea0dd9a618..fa830f8de032 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -31,11 +31,11 @@ struct task_struct; #define _LINUX_CAPABILITY_VERSION_1 0x19980330 #define _LINUX_CAPABILITY_U32S_1 1 -#define _LINUX_CAPABILITY_VERSION_2 0x20071026 +#define _LINUX_CAPABILITY_VERSION_2 0x20071026 /* deprecated - use v3 */ #define _LINUX_CAPABILITY_U32S_2 2 -#define _LINUX_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_2 -#define _LINUX_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_2 +#define _LINUX_CAPABILITY_VERSION_3 0x20080522 +#define _LINUX_CAPABILITY_U32S_3 2 typedef struct __user_cap_header_struct { __u32 version; @@ -77,10 +77,23 @@ struct vfs_cap_data { } data[VFS_CAP_U32]; }; -#ifdef __KERNEL__ +#ifndef __KERNEL__ + +/* + * Backwardly compatible definition for source code - trapped in a + * 32-bit world. If you find you need this, please consider using + * libcap to untrap yourself... + */ +#define _LINUX_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_1 +#define _LINUX_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_1 + +#else + +#define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 +#define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 typedef struct kernel_cap_struct { - __u32 cap[_LINUX_CAPABILITY_U32S]; + __u32 cap[_KERNEL_CAPABILITY_U32S]; } kernel_cap_t; #define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct)) @@ -351,7 +364,7 @@ typedef struct kernel_cap_struct { */ #define CAP_FOR_EACH_U32(__capi) \ - for (__capi = 0; __capi < _LINUX_CAPABILITY_U32S; ++__capi) + for (__capi = 0; __capi < _KERNEL_CAPABILITY_U32S; ++__capi) # define CAP_FS_MASK_B0 (CAP_TO_MASK(CAP_CHOWN) \ | CAP_TO_MASK(CAP_DAC_OVERRIDE) \ @@ -361,7 +374,7 @@ typedef struct kernel_cap_struct { # define CAP_FS_MASK_B1 (CAP_TO_MASK(CAP_MAC_OVERRIDE)) -#if _LINUX_CAPABILITY_U32S != 2 +#if _KERNEL_CAPABILITY_U32S != 2 # error Fix up hand-coded capability macro initializers #else /* HAND-CODED capability initializers */ @@ -372,7 +385,7 @@ typedef struct kernel_cap_struct { # define CAP_NFSD_SET ((kernel_cap_t){{ CAP_FS_MASK_B0|CAP_TO_MASK(CAP_SYS_RESOURCE), \ CAP_FS_MASK_B1 } }) -#endif /* _LINUX_CAPABILITY_U32S != 2 */ +#endif /* _KERNEL_CAPABILITY_U32S != 2 */ #define CAP_INIT_INH_SET CAP_EMPTY_SET diff --git a/kernel/capability.c b/kernel/capability.c index 39e8193b41ea..cfbe44299488 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -52,6 +52,69 @@ static void warn_legacy_capability_use(void) } } +/* + * Version 2 capabilities worked fine, but the linux/capability.h file + * that accompanied their introduction encouraged their use without + * the necessary user-space source code changes. As such, we have + * created a version 3 with equivalent functionality to version 2, but + * with a header change to protect legacy source code from using + * version 2 when it wanted to use version 1. If your system has code + * that trips the following warning, it is using version 2 specific + * capabilities and may be doing so insecurely. + * + * The remedy is to either upgrade your version of libcap (to 2.10+, + * if the application is linked against it), or recompile your + * application with modern kernel headers and this warning will go + * away. + */ + +static void warn_deprecated_v2(void) +{ + static int warned; + + if (!warned) { + char name[sizeof(current->comm)]; + + printk(KERN_INFO "warning: `%s' uses deprecated v2" + " capabilities in a way that may be insecure.\n", + get_task_comm(name, current)); + warned = 1; + } +} + +/* + * Version check. Return the number of u32s in each capability flag + * array, or a negative value on error. + */ +static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) +{ + __u32 version; + + if (get_user(version, &header->version)) + return -EFAULT; + + switch (version) { + case _LINUX_CAPABILITY_VERSION_1: + warn_legacy_capability_use(); + *tocopy = _LINUX_CAPABILITY_U32S_1; + break; + case _LINUX_CAPABILITY_VERSION_2: + warn_deprecated_v2(); + /* + * fall through - v3 is otherwise equivalent to v2. + */ + case _LINUX_CAPABILITY_VERSION_3: + *tocopy = _LINUX_CAPABILITY_U32S_3; + break; + default: + if (put_user((u32)_KERNEL_CAPABILITY_VERSION, &header->version)) + return -EFAULT; + return -EINVAL; + } + + return 0; +} + /* * For sys_getproccap() and sys_setproccap(), any of the three * capability set pointers may be NULL -- indicating that that set is @@ -71,27 +134,13 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) { int ret = 0; pid_t pid; - __u32 version; struct task_struct *target; unsigned tocopy; kernel_cap_t pE, pI, pP; - if (get_user(version, &header->version)) - return -EFAULT; - - switch (version) { - case _LINUX_CAPABILITY_VERSION_1: - warn_legacy_capability_use(); - tocopy = _LINUX_CAPABILITY_U32S_1; - break; - case _LINUX_CAPABILITY_VERSION_2: - tocopy = _LINUX_CAPABILITY_U32S_2; - break; - default: - if (put_user(_LINUX_CAPABILITY_VERSION, &header->version)) - return -EFAULT; - return -EINVAL; - } + ret = cap_validate_magic(header, &tocopy); + if (ret != 0) + return ret; if (get_user(pid, &header->pid)) return -EFAULT; @@ -118,7 +167,7 @@ out: spin_unlock(&task_capability_lock); if (!ret) { - struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S]; + struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; unsigned i; for (i = 0; i < tocopy; i++) { @@ -128,7 +177,7 @@ out: } /* - * Note, in the case, tocopy < _LINUX_CAPABILITY_U32S, + * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S, * we silently drop the upper capabilities here. This * has the effect of making older libcap * implementations implicitly drop upper capability @@ -240,30 +289,16 @@ static inline int cap_set_all(kernel_cap_t *effective, */ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) { - struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S]; + struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; unsigned i, tocopy; kernel_cap_t inheritable, permitted, effective; - __u32 version; struct task_struct *target; int ret; pid_t pid; - if (get_user(version, &header->version)) - return -EFAULT; - - switch (version) { - case _LINUX_CAPABILITY_VERSION_1: - warn_legacy_capability_use(); - tocopy = _LINUX_CAPABILITY_U32S_1; - break; - case _LINUX_CAPABILITY_VERSION_2: - tocopy = _LINUX_CAPABILITY_U32S_2; - break; - default: - if (put_user(_LINUX_CAPABILITY_VERSION, &header->version)) - return -EFAULT; - return -EINVAL; - } + ret = cap_validate_magic(header, &tocopy); + if (ret != 0) + return ret; if (get_user(pid, &header->pid)) return -EFAULT; @@ -281,7 +316,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) permitted.cap[i] = kdata[i].permitted; inheritable.cap[i] = kdata[i].inheritable; } - while (i < _LINUX_CAPABILITY_U32S) { + while (i < _KERNEL_CAPABILITY_U32S) { effective.cap[i] = 0; permitted.cap[i] = 0; inheritable.cap[i] = 0; -- cgit v1.2.3-70-g09d2 From 1d92cfd54a51ff1b9593019fdde56793b66ba6a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Jun 2008 10:59:02 +0100 Subject: cifs endianness fixes __le16 fields used as host-endian. Signed-off-by: Al Viro Acked-by: Steve French Signed-off-by: Linus Torvalds --- fs/cifs/cifssmb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 9b8b4cfdf993..fb655b4593c6 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3927,9 +3927,9 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, } ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals); - if (ref->VersionNumber != 3) { + if (ref->VersionNumber != cpu_to_le16(3)) { cERROR(1, ("Referrals of V%d version are not supported," - "should be V3", ref->VersionNumber)); + "should be V3", le16_to_cpu(ref->VersionNumber))); rc = -EINVAL; goto parse_DFS_referrals_exit; } @@ -3977,7 +3977,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, if (rc) goto parse_DFS_referrals_exit; - ref += ref->Size; + ref += le16_to_cpu(ref->Size); } parse_DFS_referrals_exit: -- cgit v1.2.3-70-g09d2 From ddb2c43594f22843e9f3153da151deaba1a834c5 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 4 Jun 2008 09:16:33 -0700 Subject: asn1: additional sanity checking during BER decoding - Don't trust a length which is greater than the working buffer. An invalid length could cause overflow when calculating buffer size for decoding oid. - An oid length of zero is invalid and allows for an off-by-one error when decoding oid because the first subid actually encodes first 2 subids. - A primitive encoding may not have an indefinite length. Thanks to Wei Wang from McAfee for report. Cc: Steven French Cc: stable@kernel.org Acked-by: Patrick McHardy Signed-off-by: Chris Wright Signed-off-by: Linus Torvalds --- fs/cifs/asn1.c | 14 ++++++++++++++ net/ipv4/netfilter/nf_nat_snmp_basic.c | 14 ++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'fs') diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index cb52cbbe45ff..f58e41d3ba48 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -186,6 +186,11 @@ asn1_length_decode(struct asn1_ctx *ctx, unsigned int *def, unsigned int *len) } } } + + /* don't trust len bigger than ctx buffer */ + if (*len > ctx->end - ctx->pointer) + return 0; + return 1; } @@ -203,6 +208,10 @@ asn1_header_decode(struct asn1_ctx *ctx, if (!asn1_length_decode(ctx, &def, &len)) return 0; + /* primitive shall be definite, indefinite shall be constructed */ + if (*con == ASN1_PRI && !def) + return 0; + if (def) *eoc = ctx->pointer + len; else @@ -389,6 +398,11 @@ asn1_oid_decode(struct asn1_ctx *ctx, unsigned long *optr; size = eoc - ctx->pointer + 1; + + /* first subid actually encodes first two subids */ + if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) + return 0; + *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); if (*oid == NULL) return 0; diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 5daefad3d193..7750c97fde7b 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -232,6 +232,11 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx, } } } + + /* don't trust len bigger than ctx buffer */ + if (*len > ctx->end - ctx->pointer) + return 0; + return 1; } @@ -250,6 +255,10 @@ static unsigned char asn1_header_decode(struct asn1_ctx *ctx, if (!asn1_length_decode(ctx, &def, &len)) return 0; + /* primitive shall be definite, indefinite shall be constructed */ + if (*con == ASN1_PRI && !def) + return 0; + if (def) *eoc = ctx->pointer + len; else @@ -434,6 +443,11 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, unsigned long *optr; size = eoc - ctx->pointer + 1; + + /* first subid actually encodes first two subids */ + if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) + return 0; + *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); if (*oid == NULL) { if (net_ratelimit()) -- cgit v1.2.3-70-g09d2 From b8c141e8fd80fa64d80c6a74492053f25a28e0ea Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 5 Jun 2008 22:45:55 -0700 Subject: frv: don't offer BINFMT_FLAT Fix the following compile error: CC fs/binfmt_flat.o In file included from /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:36: /home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/flat.h:14:22: error: asm/flat.h: No such file or directory /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c: In function 'create_flat_tables': /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:124: error: implicit declaration of function 'flat_stack_align' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:125: error: implicit declaration of function 'flat_argvp_envp_on_stack' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c: In function 'calc_reloc': /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:347: error: implicit declaration of function 'flat_reloc_valid' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c: In function 'load_flat_file': /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:479: error: implicit declaration of function 'flat_old_ram_flag' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:755: error: implicit declaration of function 'flat_set_persistent' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:757: error: implicit declaration of function 'flat_get_relocate_addr' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:765: error: implicit declaration of function 'flat_get_addr_from_rp' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:781: error: implicit declaration of function 'flat_put_addr_at_rp' Reported-by: Adrian Bunk Signed-off-by: Adrian Bunk Tested-by: David Howells Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig.binfmt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 55e8ee1900a5..3263084eef9e 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -42,7 +42,7 @@ config BINFMT_ELF_FDPIC config BINFMT_FLAT bool "Kernel support for flat binaries" - depends on !MMU + depends on !MMU && (!FRV || BROKEN) help Support uClinux FLAT format binaries. -- cgit v1.2.3-70-g09d2 From d3e49afbb66109613c3474f2273f5830ac2dcb09 Mon Sep 17 00:00:00 2001 From: Michael Halcrow Date: Thu, 5 Jun 2008 22:46:02 -0700 Subject: eCryptfs: remove unnecessary page decrypt call The page decrypt calls in ecryptfs_write() are both pointless and buggy. Pointless because ecryptfs_get_locked_page() has already brought the page up to date, and buggy because prior mmap writes will just be blown away by the decrypt call. This patch also removes the declaration of a now-nonexistent function ecryptfs_write_zeros(). Thanks to Eric Sandeen and David Kleikamp for helping to track this down. Eric said: fsx w/ mmap dies quickly ( < 100 ops) without this, and survives nicely (to millions of ops+) with it in place. Signed-off-by: Michael Halcrow Cc: Eric Sandeen Cc: Dave Kleikamp Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/ecryptfs_kernel.h | 2 -- fs/ecryptfs/read_write.c | 22 ---------------------- 2 files changed, 24 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 951ee33a022d..c15c25745e05 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -660,8 +660,6 @@ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, struct ecryptfs_auth_tok **auth_tok, char *sig); -int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, - int num_zeros); int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, loff_t offset, size_t size); int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index ebf55150be56..75c2ea9fee35 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -157,20 +157,6 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, ecryptfs_page_idx, rc); goto out; } - if (start_offset_in_page) { - /* Read in the page from the lower - * into the eCryptfs inode page cache, - * decrypting */ - rc = ecryptfs_decrypt_page(ecryptfs_page); - if (rc) { - printk(KERN_ERR "%s: Error decrypting " - "page; rc = [%d]\n", - __func__, rc); - ClearPageUptodate(ecryptfs_page); - page_cache_release(ecryptfs_page); - goto out; - } - } ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); /* @@ -349,14 +335,6 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, ecryptfs_page_idx, rc); goto out; } - rc = ecryptfs_decrypt_page(ecryptfs_page); - if (rc) { - printk(KERN_ERR "%s: Error decrypting " - "page; rc = [%d]\n", __func__, rc); - ClearPageUptodate(ecryptfs_page); - page_cache_release(ecryptfs_page); - goto out; - } ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); memcpy((data + data_offset), ((char *)ecryptfs_page_virt + start_offset_in_page), -- cgit v1.2.3-70-g09d2 From 44d1b980c72db0faf35adb082fb2208351803028 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 5 Jun 2008 22:46:18 -0700 Subject: Fix various old email addresses for dwmw2 Although if people have questions about ARCnet, perhaps it's _better_ for them to be mailing dwmw2@cam.ac.uk about it... Signed-off-by: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/networking/arcnet.txt | 2 +- arch/frv/kernel/cmode.S | 2 +- arch/frv/kernel/sleep.S | 2 +- arch/frv/mb93090-mb00/pci-dma-nommu.c | 2 +- drivers/mtd/redboot.c | 2 +- fs/afs/callback.c | 2 +- fs/afs/inode.c | 2 +- fs/afs/super.c | 2 +- include/linux/mtd/nand.h | 2 +- include/linux/tty.h | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/Documentation/networking/arcnet.txt b/Documentation/networking/arcnet.txt index 770fc41a78e8..796012540386 100644 --- a/Documentation/networking/arcnet.txt +++ b/Documentation/networking/arcnet.txt @@ -46,7 +46,7 @@ These are the ARCnet drivers for Linux. This new release (2.91) has been put together by David Woodhouse -, in an attempt to tidy up the driver after adding support +, in an attempt to tidy up the driver after adding support for yet another chipset. Now the generic support has been separated from the individual chipset drivers, and the source files aren't quite so packed with #ifdefs! I've changed this file a bit, but kept it in the first person from diff --git a/arch/frv/kernel/cmode.S b/arch/frv/kernel/cmode.S index 81ba28ad2207..53deeb5d7e87 100644 --- a/arch/frv/kernel/cmode.S +++ b/arch/frv/kernel/cmode.S @@ -1,7 +1,7 @@ /* cmode.S: clock mode management * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Woodhouse (dwmw2@redhat.com) + * Written by David Woodhouse (dwmw2@infradead.org) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/arch/frv/kernel/sleep.S b/arch/frv/kernel/sleep.S index c9b2d51ab9ad..f67bf73cd2cc 100644 --- a/arch/frv/kernel/sleep.S +++ b/arch/frv/kernel/sleep.S @@ -1,7 +1,7 @@ /* sleep.S: power saving mode entry * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Woodhouse (dwmw2@redhat.com) + * Written by David Woodhouse (dwmw2@infradead.org) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c index 4985466b1a7c..64ee58d748be 100644 --- a/arch/frv/mb93090-mb00/pci-dma-nommu.c +++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c @@ -1,7 +1,7 @@ /* pci-dma-nommu.c: Dynamic DMA mapping support for the FRV * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Woodhouse (dwmw2@redhat.com) + * Written by David Woodhouse (dwmw2@infradead.org) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/drivers/mtd/redboot.c b/drivers/mtd/redboot.c index 47474903263c..c5030f94f04e 100644 --- a/drivers/mtd/redboot.c +++ b/drivers/mtd/redboot.c @@ -295,5 +295,5 @@ module_init(redboot_parser_init); module_exit(redboot_parser_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Red Hat, Inc. - David Woodhouse "); +MODULE_AUTHOR("David Woodhouse "); MODULE_DESCRIPTION("Parsing code for RedBoot Flash Image System (FIS) tables"); diff --git a/fs/afs/callback.c b/fs/afs/callback.c index a78d5b236bb1..587ef5123cd8 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -8,7 +8,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * Authors: David Woodhouse + * Authors: David Woodhouse * David Howells * */ diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 08db82e1343a..bb47217f6a18 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -8,7 +8,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * Authors: David Woodhouse + * Authors: David Woodhouse * David Howells * */ diff --git a/fs/afs/super.c b/fs/afs/super.c index 4b572b801d8d..7e3faeef6818 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -10,7 +10,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Authors: David Howells - * David Woodhouse + * David Woodhouse * */ diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c42bc7f533a5..53ea3dc8b0e8 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1,7 +1,7 @@ /* * linux/include/linux/mtd/nand.h * - * Copyright (c) 2000 David Woodhouse + * Copyright (c) 2000 David Woodhouse * Steven J. Hill * Thomas Gleixner * diff --git a/include/linux/tty.h b/include/linux/tty.h index 7f7121f9c968..324a3b231d40 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -36,7 +36,7 @@ #define N_6PACK 7 #define N_MASC 8 /* Reserved for Mobitex module */ #define N_R3964 9 /* Reserved for Simatic R3964 module */ -#define N_PROFIBUS_FDL 10 /* Reserved for Profibus */ +#define N_PROFIBUS_FDL 10 /* Reserved for Profibus */ #define N_IRDA 11 /* Linux IrDa - http://irda.sourceforge.net/ */ #define N_SMSBLOCK 12 /* SMS block mode - for talking to GSM data */ /* cards about SMS messages */ -- cgit v1.2.3-70-g09d2 From 93b071139a956e51c98cdefd50a47981a4eb852e Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 5 Jun 2008 22:46:21 -0700 Subject: introduce memory_read_from_buffer() This patch introduces memory_read_from_buffer(). The only difference between memory_read_from_buffer() and simple_read_from_buffer() is which address space the function copies to. simple_read_from_buffer copies to user space memory. memory_read_from_buffer copies to normal memory. Signed-off-by: Akinobu Mita Cc: Al Viro Cc: Doug Warzecha Cc: Zhang Rui Cc: Matt Domsch Cc: Abhay Salunke Cc: Greg Kroah-Hartman Cc: Markus Rechberger Cc: Kay Sievers Cc: Bob Moore Cc: Thomas Renninger Cc: Len Brown Cc: Benjamin Herrenschmidt Cc: "Antonino A. Daplas" Cc: Krzysztof Helt Cc: Geert Uytterhoeven Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Peter Oberparleiter Cc: Michael Holzheu Cc: Brian King Cc: James E.J. Bottomley Cc: Andrew Vasquez Cc: Seokmann Ju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/libfs.c | 18 ++++++++++++++++++ include/linux/fs.h | 5 ++++- 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/libfs.c b/fs/libfs.c index b004dfadd891..892d41cb3382 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -528,6 +528,23 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, return count; } +ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, + const void *from, size_t available) +{ + loff_t pos = *ppos; + + if (pos < 0) + return -EINVAL; + if (pos >= available) + return 0; + if (count > available - pos) + count = available - pos; + memcpy(to, from + pos, count); + *ppos = pos + count; + + return count; +} + /* * Transaction based IO. * The file expects a single write which triggers the transaction, and then @@ -800,6 +817,7 @@ EXPORT_SYMBOL(simple_statfs); EXPORT_SYMBOL(simple_sync_file); EXPORT_SYMBOL(simple_unlink); EXPORT_SYMBOL(simple_read_from_buffer); +EXPORT_SYMBOL(memory_read_from_buffer); EXPORT_SYMBOL(simple_transaction_get); EXPORT_SYMBOL(simple_transaction_read); EXPORT_SYMBOL(simple_transaction_release); diff --git a/include/linux/fs.h b/include/linux/fs.h index f413085f748e..d490779f18d9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2000,7 +2000,10 @@ extern int simple_fill_super(struct super_block *, int, struct tree_descr *); extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); -extern ssize_t simple_read_from_buffer(void __user *, size_t, loff_t *, const void *, size_t); +extern ssize_t simple_read_from_buffer(void __user *to, size_t count, + loff_t *ppos, const void *from, size_t available); +extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, + const void *from, size_t available); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, -- cgit v1.2.3-70-g09d2 From 7db9cfd380205f6b50afdc3bc3619f876a5eaf0d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 5 Jun 2008 22:46:27 -0700 Subject: devscgroup: check for device permissions at mount time Currently even if a task sits in an all-denied cgroup it can still mount any block device in any mode it wants. Put a proper check in do_open for block device to prevent this. Signed-off-by: Pavel Emelyanov Acked-by: Serge Hallyn Tested-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/block_dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 7d822fae7765..470c10ceb0fb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -928,9 +929,14 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) { struct module *owner = NULL; struct gendisk *disk; - int ret = -ENXIO; + int ret; int part; + ret = devcgroup_inode_permission(bdev->bd_inode, file->f_mode); + if (ret != 0) + return ret; + + ret = -ENXIO; file->f_mapping = bdev->bd_inode->i_mapping; lock_kernel(); disk = get_gendisk(bdev->bd_dev, &part); -- cgit v1.2.3-70-g09d2 From aae8679b0ebcaa92f99c1c3cb0cd651594a43915 Mon Sep 17 00:00:00 2001 From: Thomas Tuttle Date: Thu, 5 Jun 2008 22:46:31 -0700 Subject: pagemap: fix bug in add_to_pagemap, require aligned-length reads of /proc/pid/pagemap Fix a bug in add_to_pagemap. Previously, since pm->out was a char *, put_user was only copying 1 byte of every PFN, resulting in the top 7 bytes of each PFN not being copied. By requiring that reads be a multiple of 8 bytes, I can make pm->out and pm->end u64*s instead of char*s, which makes put_user work properly, and also simplifies the logic in add_to_pagemap a bit. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Thomas Tuttle Cc: Matt Mackall Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 88717c0f941b..17403629e330 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -496,7 +496,7 @@ const struct file_operations proc_clear_refs_operations = { }; struct pagemapread { - char __user *out, *end; + u64 __user *out, *end; }; #define PM_ENTRY_BYTES sizeof(u64) @@ -519,21 +519,11 @@ struct pagemapread { static int add_to_pagemap(unsigned long addr, u64 pfn, struct pagemapread *pm) { - /* - * Make sure there's room in the buffer for an - * entire entry. Otherwise, only copy part of - * the pfn. - */ - if (pm->out + PM_ENTRY_BYTES >= pm->end) { - if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) - return -EFAULT; - pm->out = pm->end; - return PM_END_OF_BUFFER; - } - if (put_user(pfn, pm->out)) return -EFAULT; - pm->out += PM_ENTRY_BYTES; + pm->out++; + if (pm->out >= pm->end) + return PM_END_OF_BUFFER; return 0; } @@ -634,7 +624,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, ret = -EINVAL; /* file position must be aligned */ - if (*ppos % PM_ENTRY_BYTES) + if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) goto out_task; ret = 0; @@ -664,8 +654,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, goto out_pages; } - pm.out = buf; - pm.end = buf + count; + pm.out = (u64 *)buf; + pm.end = (u64 *)(buf + count); if (!ptrace_may_attach(task)) { ret = -EIO; @@ -690,9 +680,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (ret == PM_END_OF_BUFFER) ret = 0; /* don't need mmap_sem for these, but this looks cleaner */ - *ppos += pm.out - buf; + *ppos += (char *)pm.out - buf; if (!ret) - ret = pm.out - buf; + ret = (char *)pm.out - buf; } out_pages: -- cgit v1.2.3-70-g09d2 From d100d148aa48df3b6ad526a48624f906695efe60 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Jun 2008 22:46:46 -0700 Subject: nommu: fix ksize() abuse The nommu binfmt code uses ksize() for pointers returned from do_mmap() which is wrong. This converts the call-sites to use the nommu specific kobjsize() function which works as expected. Cc: Christoph Lameter Cc: Matt Mackall Acked-by: Paul Mundt Acked-by: David Howells Signed-off-by: Pekka Enberg Acked-by: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf_fdpic.c | 2 +- fs/binfmt_flat.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index ddd35d873391..d051a32e6270 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -390,7 +390,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, } /* expand the stack mapping to use up the entire allocation granule */ - fullsize = ksize((char *) current->mm->start_brk); + fullsize = kobjsize((char *) current->mm->start_brk); if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size, fullsize, 0, 0))) stack_size = fullsize; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 3b40d45a3a16..2cb1acda3a82 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -548,7 +548,7 @@ static int load_flat_file(struct linux_binprm * bprm, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); /* Remap to use all availabe slack region space */ if (realdatastart && (realdatastart < (unsigned long)-4096)) { - reallen = ksize((void *)realdatastart); + reallen = kobjsize((void *)realdatastart); if (reallen > len) { realdatastart = do_mremap(realdatastart, len, reallen, MREMAP_FIXED, realdatastart); @@ -600,7 +600,7 @@ static int load_flat_file(struct linux_binprm * bprm, PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); /* Remap to use all availabe slack region space */ if (textpos && (textpos < (unsigned long) -4096)) { - reallen = ksize((void *)textpos); + reallen = kobjsize((void *)textpos); if (reallen > len) { textpos = do_mremap(textpos, len, reallen, MREMAP_FIXED, textpos); @@ -683,7 +683,7 @@ static int load_flat_file(struct linux_binprm * bprm, */ current->mm->start_brk = datapos + data_len + bss_len; current->mm->brk = (current->mm->start_brk + 3) & ~3; - current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len; + current->mm->context.end_brk = memp + kobjsize((void *) memp) - stack_len; } if (flags & FLAT_FLAG_KTRACE) @@ -790,7 +790,7 @@ static int load_flat_file(struct linux_binprm * bprm, /* zero the BSS, BRK and stack areas */ memset((void*)(datapos + data_len), 0, bss_len + - (memp + ksize((void *) memp) - stack_len - /* end brk */ + (memp + kobjsize((void *) memp) - stack_len - /* end brk */ libinfo->lib_list[id].start_brk) + /* start brk */ stack_len); -- cgit v1.2.3-70-g09d2 From 9bb91784de6618c955994b2d5be332fb68c87ef1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 5 Jun 2008 22:46:47 -0700 Subject: ext3: fix online resize bug There is a bug when we are trying to verify that the reserve inode's double indirect blocks point back to the primary gdt blocks. The fix is obvious, we need to mod the gdb count by the addr's per block. You can verify this with the following test case dd if=/dev/zero of=disk1 seek=1024 count=1 bs=100M losetup /dev/loop1 disk1 pvcreate /dev/loop1 vgcreate loopvg1 /dev/loop1 lvcreate -l 100%VG loopvg1 -n looplv1 mkfs.ext3 -J size=64 -b 1024 /dev/loopvg1/looplv1 mount /dev/loopvg1/looplv1 /mnt/loop dd if=/dev/zero of=disk2 seek=1024 count=1 bs=50M losetup /dev/loop2 disk2 pvcreate /dev/loop2 vgextend loopvg1 /dev/loop2 lvextend -l 100%VG /dev/loopvg1/looplv1 resize2fs /dev/loopvg1/looplv1 without this patch the resize2fs fails, with it the resize2fs succeeds. Signed-off-by: Josef Bacik Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 28cfd0b40527..77278e947e94 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -580,7 +580,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, } blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count; - data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count; + data = (__le32 *)dind->b_data + (EXT3_SB(sb)->s_gdb_count % + EXT3_ADDR_PER_BLOCK(sb)); end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb); /* Get each reserved primary GDT block and verify it holds backups */ -- cgit v1.2.3-70-g09d2 From aed5417593ad125283f35513573282139a8664b5 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 5 Jun 2008 22:46:53 -0700 Subject: proc: calculate the correct /proc/ link count This patch: commit e9720acd728a46cb40daa52c99a979f7c4ff195c Author: Pavel Emelyanov Date: Fri Mar 7 11:08:40 2008 -0800 [NET]: Make /proc/net a symlink on /proc/self/net (v3) introduced a /proc/self/net directory without bumping the corresponding link count for /proc/self. This patch replaces the static link count initializations with a call that counts the number of directory entries in the given pid_entry table whenever it is instantiated, and thus relieves the burden of manually keeping the two in sync. [akpm@linux-foundation.org: cleanup] Acked-by: Eric W. Biederman Cc: Pavel Emelyanov Signed-off-by: Vegard Nossum Cc: "David S. Miller" Cc: Alexey Dobriyan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index c447e0743a3c..3b455371e7ff 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -127,6 +127,25 @@ struct pid_entry { NULL, &proc_single_file_operations, \ { .proc_show = &proc_##OTYPE } ) +/* + * Count the number of hardlinks for the pid_entry table, excluding the . + * and .. links. + */ +static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, + unsigned int n) +{ + unsigned int i; + unsigned int count; + + count = 0; + for (i = 0; i < n; ++i) { + if (S_ISDIR(entries[i].mode)) + ++count; + } + + return count; +} + int maps_protect; EXPORT_SYMBOL(maps_protect); @@ -2585,10 +2604,9 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 5; -#ifdef CONFIG_SECURITY - inode->i_nlink += 1; -#endif + + inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, + ARRAY_SIZE(tgid_base_stuff)); dentry->d_op = &pid_dentry_operations; @@ -2816,10 +2834,9 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 4; -#ifdef CONFIG_SECURITY - inode->i_nlink += 1; -#endif + + inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, + ARRAY_SIZE(tid_base_stuff)); dentry->d_op = &pid_dentry_operations; -- cgit v1.2.3-70-g09d2 From bbcdac0c20aa20d1daad41d9c138102b70e5aae4 Mon Sep 17 00:00:00 2001 From: Thomas Tuttle Date: Thu, 5 Jun 2008 22:46:58 -0700 Subject: pagemap: return map count, not reference count, in /proc/kpagecount Since pagemap is all about examining pages mapped into processes' memory spaces, it makes sense for kpagecount to return the map counts, not the reference counts. Signed-off-by: Thomas Tuttle Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 32dc14cd8900..5a16090a6d6e 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -726,7 +726,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, if (!ppage) pcount = 0; else - pcount = atomic_read(&ppage->_count); + pcount = page_mapcount(ppage); if (put_user(pcount, out++)) { ret = -EFAULT; -- cgit v1.2.3-70-g09d2 From 4710d1ac4c491dd8a28f57946214c0b5fe73cc87 Mon Sep 17 00:00:00 2001 From: Thomas Tuttle Date: Thu, 5 Jun 2008 22:46:58 -0700 Subject: pagemap: return EINVAL, not EIO, for unaligned reads of kpagecount or kpageflags If the user tries to read from a position that is not a multiple of 8, or read a number of bytes that is not a multiple of 8, they have passed an invalid argument to read, for the purpose of reading these files. It's not an IO error because we didn't encounter any trouble finding the data they asked for. Signed-off-by: Thomas Tuttle Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5a16090a6d6e..7e277f2ad466 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -716,7 +716,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, pfn = src / KPMSIZE; count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) - return -EIO; + return -EINVAL; while (count > 0) { ppage = NULL; @@ -782,7 +782,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, pfn = src / KPMSIZE; count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) - return -EIO; + return -EINVAL; while (count > 0) { ppage = NULL; -- cgit v1.2.3-70-g09d2 From aab2545fdd6641b76af0ae96456c4ca9d1e50dad Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Jun 2008 11:31:39 -0700 Subject: uml: activate_mm: remove the dead PF_BORROWED_MM check use_mm() was changed to use switch_mm() instead of activate_mm(), since then nobody calls (and nobody should call) activate_mm() with PF_BORROWED_MM bit set. As Jeff Dike pointed out, we can also remove the "old != new" check, it is always true. Signed-off-by: Oleg Nesterov Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 4 ---- include/asm-um/mmu_context.h | 12 +++--------- 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index b5253e77eb2f..0fb3117ddd93 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -591,10 +591,6 @@ static void use_mm(struct mm_struct *mm) atomic_inc(&mm->mm_count); tsk->mm = mm; tsk->active_mm = mm; - /* - * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise - * it won't work. Update it accordingly if you change it here - */ switch_mm(active_mm, mm, tsk); task_unlock(tsk); diff --git a/include/asm-um/mmu_context.h b/include/asm-um/mmu_context.h index 6686fc524ca1..54f42e8b0105 100644 --- a/include/asm-um/mmu_context.h +++ b/include/asm-um/mmu_context.h @@ -22,16 +22,10 @@ extern void force_flush_all(void); static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) { /* - * This is called by fs/exec.c and fs/aio.c. In the first case, for an - * exec, we don't need to do anything as we're called from userspace - * and thus going to use a new host PID. In the second, we're called - * from a kernel thread, and thus need to go doing the mmap's on the - * host. Since they're very expensive, we want to avoid that as far as - * possible. + * This is called by fs/exec.c and sys_unshare() + * when the new ->mm is used for the first time. */ - if (old != new && (current->flags & PF_BORROWED_MM)) - __switch_mm(&new->context.id); - + __switch_mm(&new->context.id); arch_dup_mmap(old, new); } -- cgit v1.2.3-70-g09d2 From dbdbb87636e882042cbe53d5d4eac94206f8db83 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 10 Jun 2008 21:21:56 +0000 Subject: [CIFS] Fix hang in mount when negprot causes server to kill tcp session Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/CHANGES | 5 +++++ fs/cifs/connect.c | 1 + 2 files changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 28e3d5c5fcac..1f3465201fdf 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -2,6 +2,11 @@ Version 1.53 ------------ DFS support added (Microsoft Distributed File System client support needed for referrals which enable a hierarchical name space among servers). +Disable temporary caching of mode bits to servers which do not support +storing of mode (e.g. Windows servers, when client mounts without cifsacl +mount option) and add new "dynperm" mount option to enable temporary caching +of mode (enable old behavior). Fix hang on mount caused when server crashes +tcp session during negotiate protocol. Version 1.52 ------------ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d49e274f8eba..e8fa46c7cff2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -653,6 +653,7 @@ multi_t2_fnd: spin_lock(&GlobalMid_Lock); server->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); + wake_up_all(&server->response_q); /* don't exit until kthread_stop is called */ set_current_state(TASK_UNINTERRUPTIBLE); -- cgit v1.2.3-70-g09d2 From 79ee9a8b2d328243488fee8b55bfacc822049a2a Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 10 Jun 2008 21:37:02 +0000 Subject: [CIFS] cifs: fix oops on mount when CONFIG_CIFS_DFS_UPCALL is enabled simple "mount -t cifs //xxx /mnt" oopsed on strlen of options http://kerneloops.org/guilty.php?guilty=cifs_get_sb&version=2.6.25-release&start=16711 \ 68&end=1703935&class=oops Signed-off-by: Marcin Slusarz Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5df93fd6303f..86b4d5f405ae 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -97,9 +97,6 @@ cifs_read_super(struct super_block *sb, void *data, { struct inode *inode; struct cifs_sb_info *cifs_sb; -#ifdef CONFIG_CIFS_DFS_UPCALL - int len; -#endif int rc = 0; /* BB should we make this contingent on mount parm? */ @@ -117,15 +114,17 @@ cifs_read_super(struct super_block *sb, void *data, * complex operation (mount), and in case of fail * just exit instead of doing mount and attempting * undo it if this copy fails?*/ - len = strlen(data); - cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); - if (cifs_sb->mountdata == NULL) { - kfree(sb->s_fs_info); - sb->s_fs_info = NULL; - return -ENOMEM; + if (data) { + int len = strlen(data); + cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); + if (cifs_sb->mountdata == NULL) { + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; + return -ENOMEM; + } + strncpy(cifs_sb->mountdata, data, len + 1); + cifs_sb->mountdata[len] = '\0'; } - strncpy(cifs_sb->mountdata, data, len + 1); - cifs_sb->mountdata[len] = '\0'; #endif rc = cifs_mount(sb, cifs_sb, data, devname); -- cgit v1.2.3-70-g09d2 From 2d518f84e5ecd1d71df0e6ac5176d212f68c27ce Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 12 Jun 2008 15:21:28 -0700 Subject: fat: relax the permission check of fat_setattr() New chmod() allows only acceptable permission, and if not acceptable, it returns -EPERM. Old one allows even if it can't store permission to on disk inode. But it seems too strict for users. E.g. https://bugzilla.redhat.com/show_bug.cgi?id=449080: With new one, rsync couldn't create the temporary file. So, this patch allows like old one, but now it doesn't change the permission if it can't store, and it returns 0. Also, this patch fixes missing check. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/file.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/fat/file.c b/fs/fat/file.c index 27cc1164ec36..771326b8047e 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -257,26 +257,34 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) } EXPORT_SYMBOL_GPL(fat_getattr); -static int fat_check_mode(const struct msdos_sb_info *sbi, struct inode *inode, - mode_t mode) +static int fat_sanitize_mode(const struct msdos_sb_info *sbi, + struct inode *inode, umode_t *mode_ptr) { - mode_t mask, req = mode & ~S_IFMT; + mode_t mask, perm; - if (S_ISREG(mode)) + /* + * Note, the basic check is already done by a caller of + * (attr->ia_mode & ~MSDOS_VALID_MODE) + */ + + if (S_ISREG(inode->i_mode)) mask = sbi->options.fs_fmask; else mask = sbi->options.fs_dmask; + perm = *mode_ptr & ~(S_IFMT | mask); + /* * Of the r and x bits, all (subject to umask) must be present. Of the * w bits, either all (subject to umask) or none must be present. */ - req &= ~mask; - if ((req & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) + if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) return -EPERM; - if ((req & S_IWUGO) && ((req & S_IWUGO) != (S_IWUGO & ~mask))) + if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) return -EPERM; + *mode_ptr &= S_IFMT | perm; + return 0; } @@ -299,7 +307,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); struct inode *inode = dentry->d_inode; - int mask, error = 0; + int error = 0; unsigned int ia_valid; lock_kernel(); @@ -332,12 +340,13 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) error = 0; goto out; } + if (((attr->ia_valid & ATTR_UID) && (attr->ia_uid != sbi->options.fs_uid)) || ((attr->ia_valid & ATTR_GID) && (attr->ia_gid != sbi->options.fs_gid)) || ((attr->ia_valid & ATTR_MODE) && - fat_check_mode(sbi, inode, attr->ia_mode) < 0)) + (attr->ia_mode & ~MSDOS_VALID_MODE))) error = -EPERM; if (error) { @@ -346,15 +355,16 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) goto out; } - error = inode_setattr(inode, attr); - if (error) - goto out; + /* + * We don't return -EPERM here. Yes, strange, but this is too + * old behavior. + */ + if (attr->ia_valid & ATTR_MODE) { + if (fat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0) + attr->ia_valid &= ~ATTR_MODE; + } - if (S_ISDIR(inode->i_mode)) - mask = sbi->options.fs_dmask; - else - mask = sbi->options.fs_fmask; - inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask); + error = inode_setattr(inode, attr); out: unlock_kernel(); return error; -- cgit v1.2.3-70-g09d2 From 2165009bdf63f79716a36ad545df14c3cdf958b7 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 12 Jun 2008 15:21:47 -0700 Subject: pagemap: pass mm into pagewalkers We need this at least for huge page detection for now, because powerpc needs the vm_area_struct to be able to determine whether a virtual address is referring to a huge page (its pmd_huge() doesn't work). It might also come in handy for some of the other users. Signed-off-by: Dave Hansen Acked-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 44 +++++++++++++++++++++++++------------------- include/linux/mm.h | 17 +++++++++-------- mm/pagewalk.c | 42 ++++++++++++++++++++++-------------------- 3 files changed, 56 insertions(+), 47 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 17403629e330..f0df3109343d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -315,9 +315,9 @@ struct mem_size_stats { }; static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - void *private) + struct mm_walk *walk) { - struct mem_size_stats *mss = private; + struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; pte_t *pte, ptent; spinlock_t *ptl; @@ -365,19 +365,21 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, return 0; } -static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; - static int show_smap(struct seq_file *m, void *v) { struct vm_area_struct *vma = v; struct mem_size_stats mss; int ret; + struct mm_walk smaps_walk = { + .pmd_entry = smaps_pte_range, + .mm = vma->vm_mm, + .private = &mss, + }; memset(&mss, 0, sizeof mss); mss.vma = vma; if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, - &smaps_walk, &mss); + walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); ret = show_map(m, v); if (ret) @@ -426,9 +428,9 @@ const struct file_operations proc_smaps_operations = { }; static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, void *private) + unsigned long end, struct mm_walk *walk) { - struct vm_area_struct *vma = private; + struct vm_area_struct *vma = walk->private; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -452,8 +454,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, return 0; } -static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; - static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -476,11 +476,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, return -ESRCH; mm = get_task_mm(task); if (mm) { + static struct mm_walk clear_refs_walk; + memset(&clear_refs_walk, 0, sizeof(clear_refs_walk)); + clear_refs_walk.pmd_entry = clear_refs_pte_range; + clear_refs_walk.mm = mm; down_read(&mm->mmap_sem); - for (vma = mm->mmap; vma; vma = vma->vm_next) + for (vma = mm->mmap; vma; vma = vma->vm_next) { + clear_refs_walk.private = vma; if (!is_vm_hugetlb_page(vma)) - walk_page_range(mm, vma->vm_start, vma->vm_end, - &clear_refs_walk, vma); + walk_page_range(vma->vm_start, vma->vm_end, + &clear_refs_walk); + } flush_tlb_mm(mm); up_read(&mm->mmap_sem); mmput(mm); @@ -528,9 +534,9 @@ static int add_to_pagemap(unsigned long addr, u64 pfn, } static int pagemap_pte_hole(unsigned long start, unsigned long end, - void *private) + struct mm_walk *walk) { - struct pagemapread *pm = private; + struct pagemapread *pm = walk->private; unsigned long addr; int err = 0; for (addr = start; addr < end; addr += PAGE_SIZE) { @@ -548,9 +554,9 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) } static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - void *private) + struct mm_walk *walk) { - struct pagemapread *pm = private; + struct pagemapread *pm = walk->private; pte_t *pte; int err = 0; @@ -675,8 +681,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, * user buffer is tracked in "pm", and the walk * will stop when we hit the end of the buffer. */ - ret = walk_page_range(mm, start_vaddr, end_vaddr, - &pagemap_walk, &pm); + ret = walk_page_range(start_vaddr, end_vaddr, + &pagemap_walk); if (ret == PM_END_OF_BUFFER) ret = 0; /* don't need mmap_sem for these, but this looks cleaner */ diff --git a/include/linux/mm.h b/include/linux/mm.h index c31a9cd2a30e..586a943cab01 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -760,16 +760,17 @@ unsigned long unmap_vmas(struct mmu_gather **tlb, * (see walk_page_range for more details) */ struct mm_walk { - int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, void *); - int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *); - int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *); - int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *); - int (*pte_hole)(unsigned long, unsigned long, void *); + int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, struct mm_walk *); + int (*pud_entry)(pud_t *, unsigned long, unsigned long, struct mm_walk *); + int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *); + int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *); + int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *); + struct mm_struct *mm; + void *private; }; -int walk_page_range(const struct mm_struct *, unsigned long addr, - unsigned long end, const struct mm_walk *walk, - void *private); +int walk_page_range(unsigned long addr, unsigned long end, + struct mm_walk *walk); void free_pgd_range(struct mmu_gather **tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma, diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 0afd2387e507..d5878bed7841 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -3,14 +3,14 @@ #include static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - const struct mm_walk *walk, void *private) + struct mm_walk *walk) { pte_t *pte; int err = 0; pte = pte_offset_map(pmd, addr); for (;;) { - err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, private); + err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk); if (err) break; addr += PAGE_SIZE; @@ -24,7 +24,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, } static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, - const struct mm_walk *walk, void *private) + struct mm_walk *walk) { pmd_t *pmd; unsigned long next; @@ -35,15 +35,15 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) { if (walk->pte_hole) - err = walk->pte_hole(addr, next, private); + err = walk->pte_hole(addr, next, walk); if (err) break; continue; } if (walk->pmd_entry) - err = walk->pmd_entry(pmd, addr, next, private); + err = walk->pmd_entry(pmd, addr, next, walk); if (!err && walk->pte_entry) - err = walk_pte_range(pmd, addr, next, walk, private); + err = walk_pte_range(pmd, addr, next, walk); if (err) break; } while (pmd++, addr = next, addr != end); @@ -52,7 +52,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, } static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, - const struct mm_walk *walk, void *private) + struct mm_walk *walk) { pud_t *pud; unsigned long next; @@ -63,15 +63,15 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) { if (walk->pte_hole) - err = walk->pte_hole(addr, next, private); + err = walk->pte_hole(addr, next, walk); if (err) break; continue; } if (walk->pud_entry) - err = walk->pud_entry(pud, addr, next, private); + err = walk->pud_entry(pud, addr, next, walk); if (!err && (walk->pmd_entry || walk->pte_entry)) - err = walk_pmd_range(pud, addr, next, walk, private); + err = walk_pmd_range(pud, addr, next, walk); if (err) break; } while (pud++, addr = next, addr != end); @@ -85,15 +85,15 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, * @addr: starting address * @end: ending address * @walk: set of callbacks to invoke for each level of the tree - * @private: private data passed to the callback function * * Recursively walk the page table for the memory area in a VMA, * calling supplied callbacks. Callbacks are called in-order (first * PGD, first PUD, first PMD, first PTE, second PTE... second PMD, * etc.). If lower-level callbacks are omitted, walking depth is reduced. * - * Each callback receives an entry pointer, the start and end of the - * associated range, and a caller-supplied private data pointer. + * Each callback receives an entry pointer and the start and end of the + * associated range, and a copy of the original mm_walk for access to + * the ->private or ->mm fields. * * No locks are taken, but the bottom level iterator will map PTE * directories from highmem if necessary. @@ -101,9 +101,8 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, * If any callback returns a non-zero value, the walk is aborted and * the return value is propagated back to the caller. Otherwise 0 is returned. */ -int walk_page_range(const struct mm_struct *mm, - unsigned long addr, unsigned long end, - const struct mm_walk *walk, void *private) +int walk_page_range(unsigned long addr, unsigned long end, + struct mm_walk *walk) { pgd_t *pgd; unsigned long next; @@ -112,21 +111,24 @@ int walk_page_range(const struct mm_struct *mm, if (addr >= end) return err; - pgd = pgd_offset(mm, addr); + if (!walk->mm) + return -EINVAL; + + pgd = pgd_offset(walk->mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) { if (walk->pte_hole) - err = walk->pte_hole(addr, next, private); + err = walk->pte_hole(addr, next, walk); if (err) break; continue; } if (walk->pgd_entry) - err = walk->pgd_entry(pgd, addr, next, private); + err = walk->pgd_entry(pgd, addr, next, walk); if (!err && (walk->pud_entry || walk->pmd_entry || walk->pte_entry)) - err = walk_pud_range(pgd, addr, next, walk, private); + err = walk_pud_range(pgd, addr, next, walk); if (err) break; } while (pgd++, addr = next, addr != end); -- cgit v1.2.3-70-g09d2 From bcf8039ed45f56013c4afea5520bca7d909e5e61 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 12 Jun 2008 15:21:48 -0700 Subject: pagemap: fix large pages in pagemap We were walking right into huge page areas in the pagemap walker, and calling the pmds pmd_bad() and clearing them. That leaked huge pages. Bad. This patch at least works around that for now. It ignores huge pages in the pagemap walker for the time being, and won't leak those pages. Signed-off-by: Dave Hansen Acked-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f0df3109343d..ab8ccc9d14ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -553,24 +553,45 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); } +static unsigned long pte_to_pagemap_entry(pte_t pte) +{ + unsigned long pme = 0; + if (is_swap_pte(pte)) + pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) + | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; + else if (pte_present(pte)) + pme = PM_PFRAME(pte_pfn(pte)) + | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; + return pme; +} + static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { + struct vm_area_struct *vma; struct pagemapread *pm = walk->private; pte_t *pte; int err = 0; + /* find the first VMA at or above 'addr' */ + vma = find_vma(walk->mm, addr); for (; addr != end; addr += PAGE_SIZE) { u64 pfn = PM_NOT_PRESENT; - pte = pte_offset_map(pmd, addr); - if (is_swap_pte(*pte)) - pfn = PM_PFRAME(swap_pte_to_pagemap_entry(*pte)) - | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; - else if (pte_present(*pte)) - pfn = PM_PFRAME(pte_pfn(*pte)) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; - /* unmap so we're not in atomic when we copy to userspace */ - pte_unmap(pte); + + /* check to see if we've left 'vma' behind + * and need a new, higher one */ + if (vma && (addr >= vma->vm_end)) + vma = find_vma(walk->mm, addr); + + /* check that 'vma' actually covers this address, + * and that it isn't a huge page vma */ + if (vma && (vma->vm_start <= addr) && + !is_vm_hugetlb_page(vma)) { + pte = pte_offset_map(pmd, addr); + pfn = pte_to_pagemap_entry(*pte); + /* unmap before userspace copy */ + pte_unmap(pte); + } err = add_to_pagemap(addr, pfn, pm); if (err) return err; -- cgit v1.2.3-70-g09d2 From e4f3ec063421bdbcb93330e72aa3eeedb6a0d85a Mon Sep 17 00:00:00 2001 From: Paul Collins Date: Sat, 14 Jun 2008 14:14:59 +1200 Subject: udf: restore UDFFS_DEBUG to being undefined by default Commit 706047a79725b585cf272fdefc234b31b6545c72, "udf: Fix compilation warnings when UDF debug is on" inadvertently (I assume) enabled debugging messages by default for UDF. This patch disables them again. Signed-off-by: Paul Collins Signed-off-by: Jan Kara --- fs/udf/udfdecl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 8fa9c2d70911..8ec865de5f13 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -16,7 +16,7 @@ #define UDF_PREALLOCATE #define UDF_DEFAULT_PREALLOC_BLOCKS 8 -#define UDFFS_DEBUG +#undef UDFFS_DEBUG #ifdef UDFFS_DEBUG #define udf_debug(f, a...) \ -- cgit v1.2.3-70-g09d2 From 702773b16e83fcddc41e0019b8214d3c3cecedbe Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 16 Jun 2008 12:11:54 +0100 Subject: Include in fs/exec.c only for Alpha. We only need it for the /sbin/loader hack for OSF/1 executables, and we don't want to include it otherwise. While we're at it, remove the redundant '&& CONFIG_ARCH_SUPPORTS_AOUT' in the ifdef around that code. It's already dependent on __alpha__, and CONFIG_ARCH_SUPPORTS_AOUT is hard-coded to 'y' there. Signed-off-by: David Woodhouse Acked-by: Peter Korsgaard Signed-off-by: Linus Torvalds --- fs/exec.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 9448f1b50b4a..da94a6f05df3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -61,6 +60,11 @@ #include #endif +#ifdef __alpha__ +/* for /sbin/loader handling in search_binary_handler() */ +#include +#endif + int core_uses_pid; char core_pattern[CORENAME_MAX_SIZE] = "core"; int suid_dumpable = 0; @@ -1155,7 +1159,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) { int try,retval; struct linux_binfmt *fmt; -#if defined(__alpha__) && defined(CONFIG_ARCH_SUPPORTS_AOUT) +#ifdef __alpha__ /* handle /sbin/loader.. */ { struct exec * eh = (struct exec *) bprm->buf; -- cgit v1.2.3-70-g09d2 From a9e0f5293d4999f93b469af4e70382db800a8204 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 16 Jun 2008 12:18:13 +0100 Subject: Remove last traces of a.out support from ELF loader. In commit d20894a23708c2af75966534f8e4dedb46d48db2 ("Remove a.out interpreter support in ELF loader"), Andi removed support for a.out interpreters from the ELF loader, which was only ever needed for the transition from a.out to ELF. This removes the last traces of that support, in particular the inclusion of . Signed-off-by: David Woodhouse Acked-by: Peter Korsgaard Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0fa95b198e6e..d48ff5f370f4 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -548,7 +547,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) struct { struct elfhdr elf_ex; struct elfhdr interp_elf_ex; - struct exec interp_ex; } *loc; loc = kmalloc(sizeof(*loc), GFP_KERNEL); @@ -680,7 +678,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) } /* Get the exec headers */ - loc->interp_ex = *((struct exec *)bprm->buf); loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); break; } -- cgit v1.2.3-70-g09d2 From 3878f110f71a0971ff7acc15dd6db711b6ef37c6 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 30 May 2008 15:30:49 -0700 Subject: ocfs2: Move the hb_ctl_path sysctl into the stack glue. ocfs2 needs to call out to the hb_ctl program at unmount for all cluster stacks. The first step is to move the hb_ctl_path sysctl out of the o2cb code and into the generic stack glue. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/nodemanager.c | 74 +----------------------------------- fs/ocfs2/cluster/nodemanager.h | 4 -- fs/ocfs2/stack_o2cb.c | 2 +- fs/ocfs2/stackglue.c | 85 ++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/stackglue.h | 2 + 5 files changed, 89 insertions(+), 78 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index cf9401e8cd0b..cfdb08b484ed 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -21,7 +21,6 @@ #include #include -#include #include #include "tcp.h" @@ -36,65 +35,6 @@ * cluster references throughout where nodes are looked up */ struct o2nm_cluster *o2nm_single_cluster = NULL; -#define OCFS2_MAX_HB_CTL_PATH 256 -static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; - -static ctl_table ocfs2_nm_table[] = { - { - .ctl_name = 1, - .procname = "hb_ctl_path", - .data = ocfs2_hb_ctl_path, - .maxlen = OCFS2_MAX_HB_CTL_PATH, - .mode = 0644, - .proc_handler = &proc_dostring, - .strategy = &sysctl_string, - }, - { .ctl_name = 0 } -}; - -static ctl_table ocfs2_mod_table[] = { - { - .ctl_name = FS_OCFS2_NM, - .procname = "nm", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_nm_table - }, - { .ctl_name = 0} -}; - -static ctl_table ocfs2_kern_table[] = { - { - .ctl_name = FS_OCFS2, - .procname = "ocfs2", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_mod_table - }, - { .ctl_name = 0} -}; - -static ctl_table ocfs2_root_table[] = { - { - .ctl_name = CTL_FS, - .procname = "fs", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_kern_table - }, - { .ctl_name = 0 } -}; - -static struct ctl_table_header *ocfs2_table_header = NULL; - -const char *o2nm_get_hb_ctl_path(void) -{ - return ocfs2_hb_ctl_path; -} -EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path); struct o2nm_node *o2nm_get_node_by_num(u8 node_num) { @@ -941,9 +881,6 @@ void o2nm_undepend_this_node(void) static void __exit exit_o2nm(void) { - if (ocfs2_table_header) - unregister_sysctl_table(ocfs2_table_header); - /* XXX sync with hb callbacks and shut down hb? */ o2net_unregister_hb_callbacks(); configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); @@ -964,16 +901,9 @@ static int __init init_o2nm(void) if (ret) goto out; - ocfs2_table_header = register_sysctl_table(ocfs2_root_table); - if (!ocfs2_table_header) { - printk(KERN_ERR "nodemanager: unable to register sysctl\n"); - ret = -ENOMEM; /* or something. */ - goto out_o2net; - } - ret = o2net_register_hb_callbacks(); if (ret) - goto out_sysctl; + goto out_o2net; config_group_init(&o2nm_cluster_group.cs_subsys.su_group); mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex); @@ -990,8 +920,6 @@ static int __init init_o2nm(void) configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); out_callbacks: o2net_unregister_hb_callbacks(); -out_sysctl: - unregister_sysctl_table(ocfs2_table_header); out_o2net: o2net_exit(); out: diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index 7c860361b8dd..c992ea0da4ad 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h @@ -33,10 +33,6 @@ #include #include -#define FS_OCFS2_NM 1 - -const char *o2nm_get_hb_ctl_path(void); - struct o2nm_node { spinlock_t nd_lock; struct config_item nd_item; diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index bbd1667aa7d3..fb26a7c69c47 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -338,7 +338,7 @@ static void o2hb_stop(const char *group) int ret; char *argv[5], *envp[3]; - argv[0] = (char *)o2nm_get_hb_ctl_path(); + argv[0] = (char *)ocfs2_get_hb_ctl_path(); argv[1] = "-K"; argv[2] = "-u"; argv[3] = (char *)group; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 119f60cea9cc..fb9b8e0db260 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "ocfs2_fs.h" @@ -548,10 +549,92 @@ error: return ret; } +/* + * Sysctl bits + * + * The sysctl lives at /proc/sys/fs/ocfs2/nm/hb_ctl_path. The 'nm' doesn't + * make as much sense in a multiple cluster stack world, but it's safer + * and easier to preserve the name. + */ + +#define FS_OCFS2_NM 1 + +#define OCFS2_MAX_HB_CTL_PATH 256 +static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; + +static ctl_table ocfs2_nm_table[] = { + { + .ctl_name = 1, + .procname = "hb_ctl_path", + .data = ocfs2_hb_ctl_path, + .maxlen = OCFS2_MAX_HB_CTL_PATH, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, + { .ctl_name = 0 } +}; + +static ctl_table ocfs2_mod_table[] = { + { + .ctl_name = FS_OCFS2_NM, + .procname = "nm", + .data = NULL, + .maxlen = 0, + .mode = 0555, + .child = ocfs2_nm_table + }, + { .ctl_name = 0} +}; + +static ctl_table ocfs2_kern_table[] = { + { + .ctl_name = FS_OCFS2, + .procname = "ocfs2", + .data = NULL, + .maxlen = 0, + .mode = 0555, + .child = ocfs2_mod_table + }, + { .ctl_name = 0} +}; + +static ctl_table ocfs2_root_table[] = { + { + .ctl_name = CTL_FS, + .procname = "fs", + .data = NULL, + .maxlen = 0, + .mode = 0555, + .child = ocfs2_kern_table + }, + { .ctl_name = 0 } +}; + +static struct ctl_table_header *ocfs2_table_header = NULL; + +const char *ocfs2_get_hb_ctl_path(void) +{ + return ocfs2_hb_ctl_path; +} +EXPORT_SYMBOL_GPL(ocfs2_get_hb_ctl_path); + + +/* + * Initialization + */ + static int __init ocfs2_stack_glue_init(void) { strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); + ocfs2_table_header = register_sysctl_table(ocfs2_root_table); + if (!ocfs2_table_header) { + printk(KERN_ERR + "ocfs2 stack glue: unable to register sysctl\n"); + return -ENOMEM; /* or something. */ + } + return ocfs2_sysfs_init(); } @@ -559,6 +642,8 @@ static void __exit ocfs2_stack_glue_exit(void) { lproto = NULL; ocfs2_sysfs_exit(); + if (ocfs2_table_header) + unregister_sysctl_table(ocfs2_table_header); } MODULE_AUTHOR("Oracle"); diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index 005e4f170e0f..c9fb01ab6347 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -258,4 +258,6 @@ void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) /* Used by stack plugins */ int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); +const char *ocfs2_get_hb_ctl_path(void); + #endif /* STACKGLUE_H */ -- cgit v1.2.3-70-g09d2 From 9f9a99f4eccc64650e932090cff0ebd07b81e334 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 30 May 2008 15:43:58 -0700 Subject: ocfs2: Move the call of ocfs2_hb_ctl into the stack glue. Take o2hb_stop() out of the o2cb code and make it part of the generic stack glue as ocfs2_leave_group(). This also allows us to remove the ocfs2_get_hb_ctl_path() function - everything to do with hb_ctl is now part of stackglue.c. o2cb no longer needs a ->hangup() function. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stack_o2cb.c | 38 -------------------------------------- fs/ocfs2/stackglue.c | 49 ++++++++++++++++++++++++++++++++++++++++--------- fs/ocfs2/stackglue.h | 1 - 3 files changed, 40 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index fb26a7c69c47..765ade5ee84a 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -333,43 +333,6 @@ static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn, return 0; } -static void o2hb_stop(const char *group) -{ - int ret; - char *argv[5], *envp[3]; - - argv[0] = (char *)ocfs2_get_hb_ctl_path(); - argv[1] = "-K"; - argv[2] = "-u"; - argv[3] = (char *)group; - argv[4] = NULL; - - mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]); - - /* minimal command environment taken from cpu_run_sbin_hotplug */ - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[2] = NULL; - - ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); - if (ret < 0) - mlog_errno(ret); -} - -/* - * Hangup is a hack for tools compatibility. Older ocfs2-tools software - * expects the filesystem to call "ocfs2_hb_ctl" during unmount. This - * happens regardless of whether the DLM got started, so we can't do it - * in ocfs2_cluster_disconnect(). We bring the o2hb_stop() function into - * the glue and provide a "hangup" API for super.c to call. - * - * Other stacks will eventually provide a NULL ->hangup() pointer. - */ -static void o2cb_cluster_hangup(const char *group, int grouplen) -{ - o2hb_stop(group); -} - static int o2cb_cluster_this_node(unsigned int *node) { int node_num; @@ -388,7 +351,6 @@ static int o2cb_cluster_this_node(unsigned int *node) static struct ocfs2_stack_operations o2cb_stack_ops = { .connect = o2cb_cluster_connect, .disconnect = o2cb_cluster_disconnect, - .hangup = o2cb_cluster_hangup, .this_node = o2cb_cluster_this_node, .dlm_lock = o2cb_dlm_lock, .dlm_unlock = o2cb_dlm_unlock, diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index fb9b8e0db260..5f78ff4c76c7 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -34,11 +34,13 @@ #define OCFS2_STACK_PLUGIN_O2CB "o2cb" #define OCFS2_STACK_PLUGIN_USER "user" +#define OCFS2_MAX_HB_CTL_PATH 256 static struct ocfs2_locking_protocol *lproto; static DEFINE_SPINLOCK(ocfs2_stack_lock); static LIST_HEAD(ocfs2_stack_list); static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; +static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; /* * The stack currently in use. If not null, active_stack->sp_count > 0, @@ -363,6 +365,42 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, } EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); +/* + * Leave the group for this filesystem. This is executed by a userspace + * program (stored in ocfs2_hb_ctl_path). + */ +static void ocfs2_leave_group(const char *group) +{ + int ret; + char *argv[5], *envp[3]; + + argv[0] = ocfs2_hb_ctl_path; + argv[1] = "-K"; + argv[2] = "-u"; + argv[3] = (char *)group; + argv[4] = NULL; + + /* minimal command environment taken from cpu_run_sbin_hotplug */ + envp[0] = "HOME=/"; + envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; + envp[2] = NULL; + + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + if (ret < 0) { + printk(KERN_ERR + "ocfs2: Error %d running user helper " + "\"%s %s %s %s\"\n", + ret, argv[0], argv[1], argv[2], argv[3]); + } +} + +/* + * Hangup is a required post-umount. ocfs2-tools software expects the + * filesystem to call "ocfs2_hb_ctl" during unmount. This happens + * regardless of whether the DLM got started, so we can't do it + * in ocfs2_cluster_disconnect(). The ocfs2_leave_group() function does + * the actual work. + */ void ocfs2_cluster_hangup(const char *group, int grouplen) { BUG_ON(group == NULL); @@ -371,6 +409,8 @@ void ocfs2_cluster_hangup(const char *group, int grouplen) if (active_stack->sp_ops->hangup) active_stack->sp_ops->hangup(group, grouplen); + ocfs2_leave_group(group); + /* cluster_disconnect() was called with hangup_pending==1 */ ocfs2_stack_driver_put(); } @@ -559,9 +599,6 @@ error: #define FS_OCFS2_NM 1 -#define OCFS2_MAX_HB_CTL_PATH 256 -static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; - static ctl_table ocfs2_nm_table[] = { { .ctl_name = 1, @@ -613,12 +650,6 @@ static ctl_table ocfs2_root_table[] = { static struct ctl_table_header *ocfs2_table_header = NULL; -const char *ocfs2_get_hb_ctl_path(void) -{ - return ocfs2_hb_ctl_path; -} -EXPORT_SYMBOL_GPL(ocfs2_get_hb_ctl_path); - /* * Initialization diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index c9fb01ab6347..fe3fd2a12821 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -258,6 +258,5 @@ void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) /* Used by stack plugins */ int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); -const char *ocfs2_get_hb_ctl_path(void); #endif /* STACKGLUE_H */ -- cgit v1.2.3-70-g09d2 From 2c39450b39880e162b3eb339672314101f58ee1a Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 30 May 2008 15:58:26 -0700 Subject: ocfs2: Remove ->hangup() from stack glue operations. The ->hangup() call was only used to execute ocfs2_hb_ctl. Now that the generic stack glue code handles this, the underlying stack drivers don't need to know about it. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stack_o2cb.c | 3 +-- fs/ocfs2/stack_user.c | 3 +-- fs/ocfs2/stackglue.c | 5 +---- fs/ocfs2/stackglue.h | 18 +++--------------- 4 files changed, 6 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 765ade5ee84a..fcd120f1493a 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -317,8 +317,7 @@ out: return rc; } -static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn, - int hangup_pending) +static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn) { struct dlm_ctxt *dlm = conn->cc_lockspace; struct o2dlm_private *priv = conn->cc_private; diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 6b97d11f6bf8..c021280dd462 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -816,8 +816,7 @@ out: return rc; } -static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn, - int hangup_pending) +static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) { dlm_release_lockspace(conn->cc_lockspace, 2); conn->cc_lockspace = NULL; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 5f78ff4c76c7..10e149ae5e3a 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -352,7 +352,7 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, BUG_ON(conn == NULL); - ret = active_stack->sp_ops->disconnect(conn, hangup_pending); + ret = active_stack->sp_ops->disconnect(conn); /* XXX Should we free it anyway? */ if (!ret) { @@ -406,9 +406,6 @@ void ocfs2_cluster_hangup(const char *group, int grouplen) BUG_ON(group == NULL); BUG_ON(group[grouplen] != '\0'); - if (active_stack->sp_ops->hangup) - active_stack->sp_ops->hangup(group, grouplen); - ocfs2_leave_group(group); /* cluster_disconnect() was called with hangup_pending==1 */ diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index fe3fd2a12821..db56281dd1be 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -134,22 +134,10 @@ struct ocfs2_stack_operations { * be freed. Thus, a stack must not return from ->disconnect() * until it will no longer reference the conn pointer. * - * If hangup_pending is zero, ocfs2_cluster_disconnect() will also - * be dropping the reference on the module. + * Once this call returns, the stack glue will be dropping this + * connection's reference on the module. */ - int (*disconnect)(struct ocfs2_cluster_connection *conn, - int hangup_pending); - - /* - * ocfs2_cluster_hangup() exists for compatibility with older - * ocfs2 tools. Only the classic stack really needs it. As such - * ->hangup() is not required of all stacks. See the comment by - * ocfs2_cluster_hangup() for more details. - * - * Note that ocfs2_cluster_hangup() can only be called if - * hangup_pending was passed to ocfs2_cluster_disconnect(). - */ - void (*hangup)(const char *group, int grouplen); + int (*disconnect)(struct ocfs2_cluster_connection *conn); /* * ->this_node() returns the cluster's unique identifier for the -- cgit v1.2.3-70-g09d2 From f948d56435fc1f7506f08866302ecd6e60b533dd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 17 Jun 2008 18:05:40 +0200 Subject: fuse: fix thinko in max I/O size calucation Use max not min to enforce a lower limit on the max I/O size. This bug was introduced by "fuse: fix max i/o size calculation" (commit e5d9a0df07484d6d191756878c974e4307fb24ce). Thanks to Brian Wang for noticing. Reported-by: Brian Wang Signed-off-by: Miklos Szeredi Acked-by: Szabolcs Szakacsits Signed-off-by: Linus Torvalds --- fs/fuse/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 43e99513334a..3141690558c8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -591,7 +591,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); fc->minor = arg->minor; fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; - fc->max_write = min_t(unsigned, 4096, fc->max_write); + fc->max_write = max_t(unsigned, 4096, fc->max_write); fc->conn_init = 1; } fuse_put_request(fc, req); @@ -667,7 +667,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->flags = d.flags; fc->user_id = d.user_id; fc->group_id = d.group_id; - fc->max_read = min_t(unsigned, 4096, d.max_read); + fc->max_read = max_t(unsigned, 4096, d.max_read); /* Used by get_root_inode() */ sb->s_fs_info = fc; -- cgit v1.2.3-70-g09d2 From 2856922c158605514ec5974a03097eaec91f4c0d Mon Sep 17 00:00:00 2001 From: Frederic Bohe Date: Fri, 20 Jun 2008 11:48:48 -0400 Subject: Ext4: Fix online resize block group descriptor corruption This is the patch for the group descriptor table corruption during online resize pointed out by Theodore Tso. The problem was caused by the fact that the ext4 group descriptor can be either 32 or 64 bytes long. Only the 64 bytes structure was taken into account. Signed-off-by: Frederic Bohe Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9ecb92f68543..9ff7b1c04239 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -855,7 +855,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) */ /* Update group descriptor block for new group */ - gdp = (struct ext4_group_desc *)primary->b_data + gdb_off; + gdp = (struct ext4_group_desc *)((char *)primary->b_data + + gdb_off * EXT4_DESC_SIZE(sb)); ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ -- cgit v1.2.3-70-g09d2 From 55d8538498f62ec72b5ba67aa386c7726f630475 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 Jun 2008 12:23:15 -0700 Subject: Fix performance regression on lmbench select benchmark Christian Borntraeger reported that reinstating cond_resched() with CONFIG_PREEMPT caused a performance regression on lmbench: For example select file 500: 23 microseconds 32 microseconds and that's really because we totally unnecessarily do the cond_resched() in the innermost loop of select(), which is just silly. This moves it out from the innermost loop (which only ever loops ove the bits in a single "unsigned long" anyway), which makes the performance regression go away. Reported-and-tested-by: Christian Borntraeger Signed-off-by: Linus Torvalds --- fs/select.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/select.c b/fs/select.c index 8dda969614a9..da0e88201c3a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -249,7 +249,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) retval++; } } - cond_resched(); } if (res_in) *rinp = res_in; @@ -257,6 +256,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) *routp = res_out; if (res_ex) *rexp = res_ex; + cond_resched(); } wait = NULL; if (retval || !*timeout || signal_pending(current)) -- cgit v1.2.3-70-g09d2 From fe6e9c1f25ac01f848bd084ee0ee62a5a0966ff3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 23 Jun 2008 08:30:55 -0400 Subject: [PATCH] fix cgroup-inflicted breakage in block_dev.c devcgroup_inode_permission() expects MAY_FOO, not FMODE_FOO; kindly keep your misdesign consistent if you positively have to inflict it on the kernel. Signed-off-by: Al Viro --- fs/block_dev.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 470c10ceb0fb..10d8a0aa871a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -931,8 +931,16 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) struct gendisk *disk; int ret; int part; + int perm = 0; - ret = devcgroup_inode_permission(bdev->bd_inode, file->f_mode); + if (file->f_mode & FMODE_READ) + perm |= MAY_READ; + if (file->f_mode & FMODE_WRITE) + perm |= MAY_WRITE; + /* + * hooks: /n/, see "layering violations". + */ + ret = devcgroup_inode_permission(bdev->bd_inode, perm); if (ret != 0) return ret; -- cgit v1.2.3-70-g09d2 From 12fd0d3088d27867be68655bcab2b074f2835f60 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 9 Jun 2008 21:16:07 -0700 Subject: [patch for 2.6.26 2/4] vfs: utimensat(): be consistent with utime() for immutable and append-only files This patch fixes utimensat() to make its behavior consistent with that of utime()/utimes() when dealing with files marked immutable and append-only. The current utimensat() implementation also returns EPERM if 'times' is non-NULL and the tv_nsec fields are both UTIME_NOW. For consistency, the (times != NULL && times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) case should be treated like the traditional utimes() case where 'times' is NULL. That is, the call should succeed for a file marked append-only and should give the error EACCES if the file is marked as immutable. The simple way to do this is to set 'times' to NULL if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW). This is also the natural approach, since POSIX.1 semantics consider the times == {{x, UTIME_NOW}, {y, UTIME_NOW}} to be exactly equivalent to the case for times == NULL. (Thanks to Miklos for pointing this out.) Patch 3 in this series relies on the simplification provided by this patch. Acked-by: Miklos Szeredi Cc: Al Viro Cc: Ulrich Drepper Signed-off-by: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/utimes.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/utimes.c b/fs/utimes.c index af059d5cb485..14d3edbb3d7c 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -102,6 +102,10 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags if (error) goto dput_and_out; + if (times && times[0].tv_nsec == UTIME_NOW && + times[1].tv_nsec == UTIME_NOW) + times = NULL; + /* Don't worry, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { -- cgit v1.2.3-70-g09d2 From 94c70b9ba7e9c1036284e779e2fef5be89021533 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 9 Jun 2008 21:16:05 -0700 Subject: [patch for 2.6.26 1/4] vfs: utimensat(): ignore tv_sec if tv_nsec == UTIME_OMIT or UTIME_NOW The POSIX.1 draft spec for utimensat() says that if a times[n].tv_nsec field is UTIME_OMIT or UTIME_NOW, then the value in the corresponding tv_sec field is ignored. See the last sentence of this para, from the spec: If the tv_nsec field of a timespec structure has the special value UTIME_NOW, the file's relevant timestamp shall be set to the greatest value supported by the file system that is not greater than the current time. If the tv_nsec field has the special value UTIME_OMIT, the file's relevant timestamp shall not be changed. In either case, the tv_sec field shall be ignored. However the current Linux implementation requires the tv_sec value to be zero (or the EINVAL error results). This requirement should be removed. Acked-by: Miklos Szeredi Cc: Al Viro Cc: Ulrich Drepper Signed-off-by: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/utimes.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs') diff --git a/fs/utimes.c b/fs/utimes.c index 14d3edbb3d7c..d466bc587e6e 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -173,14 +173,6 @@ asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __ if (utimes) { if (copy_from_user(&tstimes, utimes, sizeof(tstimes))) return -EFAULT; - if ((tstimes[0].tv_nsec == UTIME_OMIT || - tstimes[0].tv_nsec == UTIME_NOW) && - tstimes[0].tv_sec != 0) - return -EINVAL; - if ((tstimes[1].tv_nsec == UTIME_OMIT || - tstimes[1].tv_nsec == UTIME_NOW) && - tstimes[1].tv_sec != 0) - return -EINVAL; /* Nothing to do, we must not even check the path. */ if (tstimes[0].tv_nsec == UTIME_OMIT && -- cgit v1.2.3-70-g09d2 From 4cca92264e61a90b43fc4e076cd25b7f4e16dc61 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 9 Jun 2008 21:16:08 -0700 Subject: [patch for 2.6.26 3/4] vfs: utimensat(): fix error checking for {UTIME_NOW,UTIME_OMIT} case The POSIX.1 draft spec for utimensat() says: Only a process with the effective user ID equal to the user ID of the file or with appropriate privileges may use futimens() or utimensat() with a non-null times argument that does not have both tv_nsec fields set to UTIME_NOW and does not have both tv_nsec fields set to UTIME_OMIT. If this condition is violated, then the error EPERM should result. However, the current implementation does not generate EPERM if one tv_nsec field is UTIME_NOW while the other is UTIME_OMIT. It should give this error for that case. This patch: a) Repairs that problem. b) Removes the now unneeded nsec_special() helper function. c) Adds some comments to explain the checks that are being performed. Thanks to Miklos, who provided comments on the previous iteration of this patch. As a result, this version is a little simpler and and its logic is better structured. Miklos suggested an alternative idea, migrating the is_owner_or_cap() checks into fs/attr.c:inode_change_ok() via the use of an ATTR_OWNER_CHECK flag. Maybe we could do that later, but for now I've gone with this version, which is IMO simpler, and can be more easily read as being correct. Acked-by: Miklos Szeredi Cc: Al Viro Cc: Ulrich Drepper Signed-off-by: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/utimes.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/utimes.c b/fs/utimes.c index d466bc587e6e..118d1c3241be 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -40,14 +40,9 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times) #endif -static bool nsec_special(long nsec) -{ - return nsec == UTIME_OMIT || nsec == UTIME_NOW; -} - static bool nsec_valid(long nsec) { - if (nsec_special(nsec)) + if (nsec == UTIME_OMIT || nsec == UTIME_NOW) return true; return nsec >= 0 && nsec <= 999999999; @@ -106,7 +101,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags times[1].tv_nsec == UTIME_NOW) times = NULL; - /* Don't worry, the checks are done in inode_change_ok() */ + /* In most cases, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { error = -EPERM; @@ -128,15 +123,26 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; newattrs.ia_valid |= ATTR_MTIME_SET; } - } - /* - * If times is NULL or both times are either UTIME_OMIT or - * UTIME_NOW, then need to check permissions, because - * inode_change_ok() won't do it. - */ - if (!times || (nsec_special(times[0].tv_nsec) && - nsec_special(times[1].tv_nsec))) { + /* + * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT + * cases, we need to make an extra check that is not done by + * inode_change_ok(). + */ + if (((times[0].tv_nsec == UTIME_NOW && + times[1].tv_nsec == UTIME_OMIT) + || + (times[0].tv_nsec == UTIME_OMIT && + times[1].tv_nsec == UTIME_NOW)) + && !is_owner_or_cap(inode)) + goto mnt_drop_write_and_out; + } else { + + /* + * If times is NULL (or both times are UTIME_NOW), + * then we need to check permissions, because + * inode_change_ok() won't do it. + */ error = -EACCES; if (IS_IMMUTABLE(inode)) goto mnt_drop_write_and_out; -- cgit v1.2.3-70-g09d2 From c70f84417429f41519be0197a1092a53c2201f47 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 9 Jun 2008 21:16:09 -0700 Subject: [patch for 2.6.26 4/4] vfs: utimensat(): fix write access check for futimens() The POSIX.1 draft spec for futimens()/utimensat() says: Only a process with the effective user ID equal to the user ID of the file, *or with write access to the file*, or with appropriate privileges may use futimens() or utimensat() with a null pointer as the times argument or with both tv_nsec fields set to the special value UTIME_NOW. The important piece here is "with write access to the file", and this matters for futimens(), which deals with an argument that is a file descriptor referring to the file whose timestamps are being updated, The standard is saying that the "writability" check is based on the file permissions, not the access mode with which the file is opened. (This behavior is consistent with the semantics of FreeBSD's futimes().) However, Linux is currently doing the latter -- futimens(fd, times) is a library function implemented as utimensat(fd, NULL, times, 0) and within the utimensat() implementation we have the code: f = fget(dfd); // dfd is 'fd' ... if (f) { if (!(f->f_mode & FMODE_WRITE)) goto mnt_drop_write_and_out; The check should instead be based on the file permissions. Thanks to Miklos for pointing out how to do this check. Miklos also pointed out a simplification that could be made to my first version of this patch, since the checks for the pathname and file descriptor cases can now be conflated. Acked-by: Miklos Szeredi Cc: Al Viro Cc: Ulrich Drepper Signed-off-by: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/utimes.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/utimes.c b/fs/utimes.c index 118d1c3241be..b6b664e7145e 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -148,14 +148,9 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags goto mnt_drop_write_and_out; if (!is_owner_or_cap(inode)) { - if (f) { - if (!(f->f_mode & FMODE_WRITE)) - goto mnt_drop_write_and_out; - } else { - error = vfs_permission(&nd, MAY_WRITE); - if (error) - goto mnt_drop_write_and_out; - } + error = permission(inode, MAY_WRITE, NULL); + if (error) + goto mnt_drop_write_and_out; } } mutex_lock(&inode->i_mutex); -- cgit v1.2.3-70-g09d2 From c8e7f449b225ee6c87454ac069f0a041035c5140 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Mon, 9 Jun 2008 16:40:35 -0700 Subject: [patch 1/4] vfs: path_{get,put}() cleanups Here are some more places where path_{get,put}() can be used instead of dput()/mntput() pair. Signed-off-by: Jan Blunck Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/namei.c | 11 +++++------ fs/pipe.c | 10 ++++------ 2 files changed, 9 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index c7e43536c49a..ee1544696e83 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -581,15 +581,13 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd int result; /* make sure the stuff we saved doesn't go away */ - dget(save.dentry); - mntget(save.mnt); + path_get(&save); result = __link_path_walk(name, nd); if (result == -ESTALE) { /* nd->path had been dropped */ nd->path = save; - dget(nd->path.dentry); - mntget(nd->path.mnt); + path_get(&nd->path); nd->flags |= LOOKUP_REVAL; result = __link_path_walk(name, nd); } @@ -1216,8 +1214,9 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, nd->flags = flags; nd->depth = 0; - nd->path.mnt = mntget(mnt); - nd->path.dentry = dget(dentry); + nd->path.dentry = dentry; + nd->path.mnt = mnt; + path_get(&nd->path); retval = path_walk(name, nd); if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && diff --git a/fs/pipe.c b/fs/pipe.c index ec228bc9f882..700f4e0d9572 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1003,8 +1003,7 @@ struct file *create_write_pipe(void) void free_write_pipe(struct file *f) { free_pipe_info(f->f_dentry->d_inode); - dput(f->f_path.dentry); - mntput(f->f_path.mnt); + path_put(&f->f_path); put_filp(f); } @@ -1015,8 +1014,8 @@ struct file *create_read_pipe(struct file *wrf) return ERR_PTR(-ENFILE); /* Grab pipe from the writer */ - f->f_path.mnt = mntget(wrf->f_path.mnt); - f->f_path.dentry = dget(wrf->f_path.dentry); + f->f_path = wrf->f_path; + path_get(&wrf->f_path); f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; f->f_pos = 0; @@ -1068,8 +1067,7 @@ int do_pipe(int *fd) err_fdr: put_unused_fd(fdr); err_read_pipe: - dput(fr->f_dentry); - mntput(fr->f_vfsmnt); + path_put(&fr->f_path); put_filp(fr); err_write_pipe: free_write_pipe(fw); -- cgit v1.2.3-70-g09d2 From 20d4fdc1a788e4ca0aaf2422772ba668e7e10839 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 9 Jun 2008 16:40:36 -0700 Subject: [patch 2/4] fs: make struct file arg to d_path const Signed-off-by: Jan Engelhardt Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/dcache.c | 2 +- include/linux/dcache.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 3ee588d5f585..c4c9072d810c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1847,7 +1847,7 @@ Elong: * * "buflen" should be positive. Caller holds the dcache_lock. */ -char *d_path(struct path *path, char *buf, int buflen) +char *d_path(const struct path *path, char *buf, int buflen) { char *res; struct path root; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2a6639407c80..d982eb89c77d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -300,7 +300,7 @@ extern int d_validate(struct dentry *, struct dentry *); extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *__d_path(const struct path *path, struct path *root, char *, int); -extern char *d_path(struct path *, char *, int); +extern char *d_path(const struct path *, char *, int); extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ -- cgit v1.2.3-70-g09d2 From 694a1764d657e0f7a9b139bc7269c8d5f5a2534b Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 9 Jun 2008 16:40:37 -0700 Subject: [patch 3/4] vfs: fix ERR_PTR abuse in generic_readlink generic_readlink calls ERR_PTR for negative and positive values (vfs_readlink returns length of "link"), but it should not (not an errno) and does not need to. Signed-off-by: Marcin Slusarz Cc: Al Viro Cc: Christoph Hellwig Acked-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/namei.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index ee1544696e83..01e67dddcc3d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2856,16 +2856,17 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct nameidata nd; void *cookie; + int res; nd.depth = 0; cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); - if (!IS_ERR(cookie)) { - int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); - if (dentry->d_inode->i_op->put_link) - dentry->d_inode->i_op->put_link(dentry, &nd, cookie); - cookie = ERR_PTR(res); - } - return PTR_ERR(cookie); + if (IS_ERR(cookie)) + return PTR_ERR(cookie); + + res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); + if (dentry->d_inode->i_op->put_link) + dentry->d_inode->i_op->put_link(dentry, &nd, cookie); + return res; } int vfs_follow_link(struct nameidata *nd, const char *link) -- cgit v1.2.3-70-g09d2 From f9f48ec72bfc9489a30bc6ddbfcf27d86a8bc651 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 9 Jun 2008 16:40:38 -0700 Subject: [patch 4/4] flock: remove unused fields from file_lock_operations fl_insert and fl_remove are not used right now in the kernel. Remove them. Signed-off-by: Denis V. Lunev Cc: Matthew Wilcox Cc: Alexander Viro Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/locks.c | 6 ------ include/linux/fs.h | 2 -- 2 files changed, 8 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index 11dbf08651b7..dce8c747371c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -561,9 +561,6 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) /* insert into file's list */ fl->fl_next = *pos; *pos = fl; - - if (fl->fl_ops && fl->fl_ops->fl_insert) - fl->fl_ops->fl_insert(fl); } /* @@ -586,9 +583,6 @@ static void locks_delete_lock(struct file_lock **thisfl_p) fl->fl_fasync = NULL; } - if (fl->fl_ops && fl->fl_ops->fl_remove) - fl->fl_ops->fl_remove(fl); - if (fl->fl_nspid) { put_pid(fl->fl_nspid); fl->fl_nspid = NULL; diff --git a/include/linux/fs.h b/include/linux/fs.h index d490779f18d9..7c1080826832 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -894,8 +894,6 @@ static inline int file_check_writeable(struct file *filp) typedef struct files_struct *fl_owner_t; struct file_lock_operations { - void (*fl_insert)(struct file_lock *); /* lock insertion callback */ - void (*fl_remove)(struct file_lock *); /* lock removal callback */ void (*fl_copy_lock)(struct file_lock *, struct file_lock *); void (*fl_release_private)(struct file_lock *); }; -- cgit v1.2.3-70-g09d2 From be285c712bbbe5db43e503782fbef2bfeaa345f9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 16 Jun 2008 13:28:07 +0200 Subject: [patch 3/3] vfs: make d_path() consistent across mount operations The path that __d_path() computes can become slightly inconsistent when it races with mount operations: it grabs the vfsmount_lock when traversing mount points but immediately drops it again, only to re-grab it when it reaches the next mount point. The result is that the filename computed is not always consisent, and the file may never have had that name. (This is unlikely, but still possible.) Fix this by grabbing the vfsmount_lock for the whole duration of __d_path(). Signed-off-by: Andreas Gruenbacher Signed-off-by: John Johansen Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index c4c9072d810c..2b479de10a0a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1782,6 +1782,7 @@ char *__d_path(const struct path *path, struct path *root, char * end = buffer+buflen; char * retval; + spin_lock(&vfsmount_lock); prepend(&end, &buflen, "\0", 1); if (!IS_ROOT(dentry) && d_unhashed(dentry) && (prepend(&end, &buflen, " (deleted)", 10) != 0)) @@ -1800,14 +1801,11 @@ char *__d_path(const struct path *path, struct path *root, break; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { /* Global root? */ - spin_lock(&vfsmount_lock); if (vfsmnt->mnt_parent == vfsmnt) { - spin_unlock(&vfsmount_lock); goto global_root; } dentry = vfsmnt->mnt_mountpoint; vfsmnt = vfsmnt->mnt_parent; - spin_unlock(&vfsmount_lock); continue; } parent = dentry->d_parent; @@ -1820,6 +1818,8 @@ char *__d_path(const struct path *path, struct path *root, dentry = parent; } +out: + spin_unlock(&vfsmount_lock); return retval; global_root: @@ -1829,9 +1829,11 @@ global_root: goto Elong; root->mnt = vfsmnt; root->dentry = dentry; - return retval; + goto out; + Elong: - return ERR_PTR(-ENAMETOOLONG); + retval = ERR_PTR(-ENAMETOOLONG); + goto out; } /** -- cgit v1.2.3-70-g09d2 From 31f3e0b3a18c6d48196c40a82a3b8c01f4ff6b23 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 23 Jun 2008 18:11:52 +0200 Subject: [patch 1/3] vfs: dcache sparse fixes Fix the following sparse warnings: fs/dcache.c:2183:19: warning: symbol 'filp_cachep' was not declared. Should it be static? fs/dcache.c:115:3: warning: context imbalance in 'dentry_iput' - unexpected unlock fs/dcache.c:188:2: warning: context imbalance in 'dput' - different lock contexts for basic block fs/dcache.c:400:2: warning: context imbalance in 'prune_one_dentry' - different lock contexts for basic block fs/dcache.c:431:22: warning: context imbalance in 'prune_dcache' - different lock contexts for basic block fs/dcache.c:563:2: warning: context imbalance in 'shrink_dcache_sb' - different lock contexts for basic block fs/dcache.c:1385:6: warning: context imbalance in 'd_delete' - wrong count at exit fs/dcache.c:1636:2: warning: context imbalance in '__d_unalias' - unexpected unlock fs/dcache.c:1735:2: warning: context imbalance in 'd_materialise_unique' - different lock contexts for basic block Signed-off-by: Miklos Szeredi Reviewed-by: Matthew Wilcox Acked-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 2b479de10a0a..e4b2b9436b32 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -106,9 +107,10 @@ static void dentry_lru_remove(struct dentry *dentry) /* * Release the dentry's inode, using the filesystem * d_iput() operation if defined. - * Called with dcache_lock and per dentry lock held, drops both. */ static void dentry_iput(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dcache_lock) { struct inode *inode = dentry->d_inode; if (inode) { @@ -132,12 +134,13 @@ static void dentry_iput(struct dentry * dentry) * d_kill - kill dentry and return parent * @dentry: dentry to kill * - * Called with dcache_lock and d_lock, releases both. The dentry must - * already be unhashed and removed from the LRU. + * The dentry must already be unhashed and removed from the LRU. * * If this is the root of the dentry tree, return NULL. */ static struct dentry *d_kill(struct dentry *dentry) + __releases(dentry->d_lock) + __releases(dcache_lock) { struct dentry *parent; @@ -383,11 +386,11 @@ restart: * Try to prune ancestors as well. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also expected * to be beneficial in reducing dentry cache fragmentation. - * - * Called with dcache_lock, drops it and then regains. - * Called with dentry->d_lock held, drops it. */ static void prune_one_dentry(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dcache_lock) + __acquires(dcache_lock) { __d_drop(dentry); dentry = d_kill(dentry); @@ -1604,10 +1607,9 @@ static int d_isparent(struct dentry *p1, struct dentry *p2) * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... - * - * On return, dcache_lock will have been unlocked. */ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) + __releases(dcache_lock) { struct mutex *m1 = NULL, *m2 = NULL; struct dentry *ret; @@ -1743,7 +1745,6 @@ out_nolock: shouldnt_be_hashed: spin_unlock(&dcache_lock); BUG(); - goto shouldnt_be_hashed; } static int prepend(char **buffer, int *buflen, const char *str, @@ -1758,7 +1759,7 @@ static int prepend(char **buffer, int *buflen, const char *str, } /** - * d_path - return the path of a dentry + * __d_path - return the path of a dentry * @path: the dentry/vfsmount to report * @root: root vfsmnt/dentry (may be modified by this function) * @buffer: buffer to return value in @@ -1847,7 +1848,7 @@ Elong: * * Returns the buffer or an error code if the path was too long. * - * "buflen" should be positive. Caller holds the dcache_lock. + * "buflen" should be positive. */ char *d_path(const struct path *path, char *buf, int buflen) { -- cgit v1.2.3-70-g09d2 From cdd16d0265c9234228fd37fbbad844d7e894b278 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 23 Jun 2008 18:11:53 +0200 Subject: [patch 2/3] vfs: dcache cleanups Comment from Al Viro: add prepend_name() wrapper. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/dcache.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index e4b2b9436b32..6068c25b393c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1747,8 +1747,7 @@ shouldnt_be_hashed: BUG(); } -static int prepend(char **buffer, int *buflen, const char *str, - int namelen) +static int prepend(char **buffer, int *buflen, const char *str, int namelen) { *buflen -= namelen; if (*buflen < 0) @@ -1758,6 +1757,11 @@ static int prepend(char **buffer, int *buflen, const char *str, return 0; } +static int prepend_name(char **buffer, int *buflen, struct qstr *name) +{ + return prepend(buffer, buflen, name->name, name->len); +} + /** * __d_path - return the path of a dentry * @path: the dentry/vfsmount to report @@ -1780,8 +1784,8 @@ char *__d_path(const struct path *path, struct path *root, { struct dentry *dentry = path->dentry; struct vfsmount *vfsmnt = path->mnt; - char * end = buffer+buflen; - char * retval; + char *end = buffer + buflen; + char *retval; spin_lock(&vfsmount_lock); prepend(&end, &buflen, "\0", 1); @@ -1811,8 +1815,7 @@ char *__d_path(const struct path *path, struct path *root, } parent = dentry->d_parent; prefetch(parent); - if ((prepend(&end, &buflen, dentry->d_name.name, - dentry->d_name.len) != 0) || + if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || (prepend(&end, &buflen, "/", 1) != 0)) goto Elong; retval = end; @@ -1825,8 +1828,7 @@ out: global_root: retval += 1; /* hit the slash */ - if (prepend(&retval, &buflen, dentry->d_name.name, - dentry->d_name.len) != 0) + if (prepend_name(&retval, &buflen, &dentry->d_name) != 0) goto Elong; root->mnt = vfsmnt; root->dentry = dentry; @@ -1918,16 +1920,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) retval = end-1; *retval = '/'; - for (;;) { - struct dentry *parent; - if (IS_ROOT(dentry)) - break; + while (!IS_ROOT(dentry)) { + struct dentry *parent = dentry->d_parent; - parent = dentry->d_parent; prefetch(parent); - - if ((prepend(&end, &buflen, dentry->d_name.name, - dentry->d_name.len) != 0) || + if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || (prepend(&end, &buflen, "/", 1) != 0)) goto Elong; @@ -1978,7 +1975,7 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) error = -ENOENT; /* Has the current directory has been unlinked? */ spin_lock(&dcache_lock); - if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { + if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) { unsigned long len; struct path tmp = root; char * cwd; -- cgit v1.2.3-70-g09d2 From 33852a1f2bb014e4047a844556c0d76a2f790c37 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jun 2008 14:20:11 -0400 Subject: NFS: Reduce the NFS mount code stack usage. This appears to fix the Oops reported in http://bugzilla.kernel.org/show_bug.cgi?id=10826 Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 68 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2a4a024a4e7b..dac663dc5611 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1216,8 +1216,6 @@ static int nfs_validate_mount_data(void *options, { struct nfs_mount_data *data = (struct nfs_mount_data *)options; - memset(args, 0, sizeof(*args)); - if (data == NULL) goto out_no_data; @@ -1585,24 +1583,29 @@ static int nfs_get_sb(struct file_system_type *fs_type, { struct nfs_server *server = NULL; struct super_block *s; - struct nfs_fh mntfh; - struct nfs_parsed_mount_data data; + struct nfs_parsed_mount_data *data; + struct nfs_fh *mntfh; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, }; - int error; + int error = -ENOMEM; - security_init_mnt_opts(&data.lsm_opts); + data = kzalloc(sizeof(*data), GFP_KERNEL); + mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); + if (data == NULL || mntfh == NULL) + goto out_free_fh; + + security_init_mnt_opts(&data->lsm_opts); /* Validate the mount data */ - error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name); + error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); if (error < 0) goto out; /* Get a volume representation */ - server = nfs_create_server(&data, &mntfh); + server = nfs_create_server(data, mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -1630,16 +1633,16 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ - nfs_fill_super(s, &data); + nfs_fill_super(s, data); } - mntroot = nfs_get_root(s, &mntfh); + mntroot = nfs_get_root(s, mntfh); if (IS_ERR(mntroot)) { error = PTR_ERR(mntroot); goto error_splat_super; } - error = security_sb_set_mnt_opts(s, &data.lsm_opts); + error = security_sb_set_mnt_opts(s, &data->lsm_opts); if (error) goto error_splat_root; @@ -1649,9 +1652,12 @@ static int nfs_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data.nfs_server.hostname); - kfree(data.mount_server.hostname); - security_free_mnt_opts(&data.lsm_opts); + kfree(data->nfs_server.hostname); + kfree(data->mount_server.hostname); + security_free_mnt_opts(&data->lsm_opts); +out_free_fh: + kfree(mntfh); + kfree(data); return error; out_err_nosb: @@ -1800,8 +1806,6 @@ static int nfs4_validate_mount_data(void *options, struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; char *c; - memset(args, 0, sizeof(*args)); - if (data == NULL) goto out_no_data; @@ -1959,26 +1963,31 @@ out_no_client_address: static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - struct nfs_parsed_mount_data data; + struct nfs_parsed_mount_data *data; struct super_block *s; struct nfs_server *server; - struct nfs_fh mntfh; + struct nfs_fh *mntfh; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, }; - int error; + int error = -ENOMEM; - security_init_mnt_opts(&data.lsm_opts); + data = kzalloc(sizeof(*data), GFP_KERNEL); + mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); + if (data == NULL || mntfh == NULL) + goto out_free_fh; + + security_init_mnt_opts(&data->lsm_opts); /* Validate the mount data */ - error = nfs4_validate_mount_data(raw_data, &data, dev_name); + error = nfs4_validate_mount_data(raw_data, data, dev_name); if (error < 0) goto out; /* Get a volume representation */ - server = nfs4_create_server(&data, &mntfh); + server = nfs4_create_server(data, mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -2009,13 +2018,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type, nfs4_fill_super(s); } - mntroot = nfs4_get_root(s, &mntfh); + mntroot = nfs4_get_root(s, mntfh); if (IS_ERR(mntroot)) { error = PTR_ERR(mntroot); goto error_splat_super; } - error = security_sb_set_mnt_opts(s, &data.lsm_opts); + error = security_sb_set_mnt_opts(s, &data->lsm_opts); if (error) goto error_splat_root; @@ -2025,10 +2034,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data.client_address); - kfree(data.nfs_server.export_path); - kfree(data.nfs_server.hostname); - security_free_mnt_opts(&data.lsm_opts); + kfree(data->client_address); + kfree(data->nfs_server.export_path); + kfree(data->nfs_server.hostname); + security_free_mnt_opts(&data->lsm_opts); +out_free_fh: + kfree(mntfh); + kfree(data); return error; out_free: -- cgit v1.2.3-70-g09d2 From b7e2445737ff69cef892b6fd9cd71cae2c9e9515 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jun 2008 15:21:11 -0400 Subject: NFS: Fix filehandle size comparisons in the mount code Fix a sign issue in xdr_decode_fhstatus3() Fix incorrect comparison in nfs_validate_mount_data() Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 5 +++-- fs/nfs/super.c | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 49c7cd0502cc..779d2eb649c5 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -130,10 +130,11 @@ static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) { struct nfs_fh *fh = res->fh; + unsigned size; if ((res->status = ntohl(*p++)) == 0) { - int size = ntohl(*p++); - if (size <= NFS3_FHSIZE) { + size = ntohl(*p++); + if (size <= NFS3_FHSIZE && size != 0) { fh->size = size; memcpy(fh->data, p, size); } else diff --git a/fs/nfs/super.c b/fs/nfs/super.c index dac663dc5611..614efeed5437 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1249,13 +1249,13 @@ static int nfs_validate_mount_data(void *options, case 5: memset(data->context, 0, sizeof(data->context)); case 6: - if (data->flags & NFS_MOUNT_VER3) + if (data->flags & NFS_MOUNT_VER3) { + if (data->root.size > NFS3_FHSIZE || data->root.size == 0) + goto out_invalid_fh; mntfh->size = data->root.size; - else + } else mntfh->size = NFS2_FHSIZE; - if (mntfh->size > sizeof(mntfh->data)) - goto out_invalid_fh; memcpy(mntfh->data, data->root.data, mntfh->size); if (mntfh->size < sizeof(mntfh->data)) -- cgit v1.2.3-70-g09d2 From 03fa9e84e5dc10aeacb0e4eb2f708cd9fc36a5b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Jun 2008 16:02:35 -0400 Subject: NFS: nfs_updatepage(): don't mark page as dirty if an error occurred Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6d8ace3e3259..f333848fd3be 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -739,12 +739,13 @@ int nfs_updatepage(struct file *file, struct page *page, } status = nfs_writepage_setup(ctx, page, offset, count); - __set_page_dirty_nobuffers(page); + if (status < 0) + nfs_set_pageerror(page); + else + __set_page_dirty_nobuffers(page); dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", status, (long long)i_size_read(inode)); - if (status < 0) - nfs_set_pageerror(page); return status; } -- cgit v1.2.3-70-g09d2 From e8183c2452041326c95258ecc7865b6fcd91c730 Mon Sep 17 00:00:00 2001 From: Tomas Janousek Date: Mon, 23 Jun 2008 15:12:35 +0200 Subject: udf: Fix regression in UDF anchor block detection In some cases it could happen that some block passed test in udf_check_anchor_block() even though udf_read_tagged() refused to read it later (e.g. because checksum was not correct). This patch makes udf_check_anchor_block() use udf_read_tagged() so that the checking is stricter. This fixes the regression (certain disks unmountable) caused by commit 423cf6dc04eb79d441bfda2b127bc4b57134b41d. Signed-off-by: Tomas Janousek Signed-off-by: Jan Kara --- fs/udf/super.c | 57 +++++++++++++++++++++++---------------------------------- 1 file changed, 23 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/udf/super.c b/fs/udf/super.c index 7a5f69be6ac2..44cc702f96cc 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -682,38 +682,26 @@ static int udf_vrs(struct super_block *sb, int silent) /* * Check whether there is an anchor block in the given block */ -static int udf_check_anchor_block(struct super_block *sb, sector_t block, - bool varconv) +static int udf_check_anchor_block(struct super_block *sb, sector_t block) { - struct buffer_head *bh = NULL; - tag *t; + struct buffer_head *bh; uint16_t ident; - uint32_t location; - if (varconv) { - if (udf_fixed_to_variable(block) >= - sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) - return 0; - bh = sb_bread(sb, udf_fixed_to_variable(block)); - } - else - bh = sb_bread(sb, block); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) && + udf_fixed_to_variable(block) >= + sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) + return 0; + bh = udf_read_tagged(sb, block, block, &ident); if (!bh) return 0; - - t = (tag *)bh->b_data; - ident = le16_to_cpu(t->tagIdent); - location = le32_to_cpu(t->tagLocation); brelse(bh); - if (ident != TAG_IDENT_AVDP) - return 0; - return location == block; + + return ident == TAG_IDENT_AVDP; } /* Search for an anchor volume descriptor pointer */ -static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, - sector_t lastblock) +static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock) { sector_t last[6]; int i; @@ -739,7 +727,7 @@ static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, sb->s_blocksize_bits) continue; - if (udf_check_anchor_block(sb, last[i], varconv)) { + if (udf_check_anchor_block(sb, last[i])) { sbi->s_anchor[0] = last[i]; sbi->s_anchor[1] = last[i] - 256; return last[i]; @@ -748,17 +736,17 @@ static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, if (last[i] < 256) continue; - if (udf_check_anchor_block(sb, last[i] - 256, varconv)) { + if (udf_check_anchor_block(sb, last[i] - 256)) { sbi->s_anchor[1] = last[i] - 256; return last[i]; } } - if (udf_check_anchor_block(sb, sbi->s_session + 256, varconv)) { + if (udf_check_anchor_block(sb, sbi->s_session + 256)) { sbi->s_anchor[0] = sbi->s_session + 256; return last[0]; } - if (udf_check_anchor_block(sb, sbi->s_session + 512, varconv)) { + if (udf_check_anchor_block(sb, sbi->s_session + 512)) { sbi->s_anchor[0] = sbi->s_session + 512; return last[0]; } @@ -780,23 +768,24 @@ static void udf_find_anchor(struct super_block *sb) int i; struct udf_sb_info *sbi = UDF_SB(sb); - lastblock = udf_scan_anchors(sb, 0, sbi->s_last_block); + lastblock = udf_scan_anchors(sb, sbi->s_last_block); if (lastblock) goto check_anchor; /* No anchor found? Try VARCONV conversion of block numbers */ + UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); /* Firstly, we try to not convert number of the last block */ - lastblock = udf_scan_anchors(sb, 1, + lastblock = udf_scan_anchors(sb, udf_variable_to_fixed(sbi->s_last_block)); - if (lastblock) { - UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); + if (lastblock) goto check_anchor; - } /* Secondly, we try with converted number of the last block */ - lastblock = udf_scan_anchors(sb, 1, sbi->s_last_block); - if (lastblock) - UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); + lastblock = udf_scan_anchors(sb, sbi->s_last_block); + if (!lastblock) { + /* VARCONV didn't help. Clear it. */ + UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV); + } check_anchor: /* -- cgit v1.2.3-70-g09d2 From 17c15da00c0e7289375ad57e8fea0c7892b74aa0 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 18 Jun 2008 11:30:40 -0500 Subject: [GFS2] BUG: unable to handle kernel paging request at ffff81002690e000 This patch fixes bugzilla bug bz448866: gfs2: BUG: unable to handle kernel paging request at ffff81002690e000. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 6387523a3153..3401628d742b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -195,7 +195,7 @@ ulong_aligned: depending on architecture. I've experimented with several ways of writing this section such as using an else before the goto but this one seems to be the fastest. */ - while ((unsigned char *)plong < end - 1) { + while ((unsigned char *)plong < end - sizeof(unsigned long)) { prefetch(plong + 1); if (((*plong) & LBITMASK) != lskipval) break; -- cgit v1.2.3-70-g09d2 From 5af4e7a0bea715f2dd7190859a43eb2258b1f388 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 24 Jun 2008 12:53:38 -0500 Subject: [GFS2] fix gfs2 block allocation (cleaned up) This patch fixes bz 450641. This patch changes the computation for zero_metapath_length(), which it renames to metapath_branch_start(). When you are extending the metadata tree, The indirect blocks that point to the new data block must either diverge from the existing tree either at the inode, or at the first indirect block. They can diverge at the first indirect block because the inode has room for 483 pointers while the indirect blocks have room for 509 pointers, so when the tree is grown, there is some free space in the first indirect block. What metapath_branch_start() now computes is the height where the first indirect block for the new data block is located. It can either be 1 (if the indirect block diverges from the inode) or 2 (if it diverges from the first indirect block). Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index c19184f2e70e..bec76b1c2bb0 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -246,15 +246,11 @@ static void find_metapath(const struct gfs2_sbd *sdp, u64 block, } -static inline unsigned int zero_metapath_length(const struct metapath *mp, - unsigned height) +static inline unsigned int metapath_branch_start(const struct metapath *mp) { - unsigned int i; - for (i = 0; i < height - 1; i++) { - if (mp->mp_list[i] != 0) - return i; - } - return height; + if (mp->mp_list[0] == 0) + return 2; + return 1; } /** @@ -436,7 +432,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh = mp->mp_bh[0]; u64 bn, dblock = 0; - unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0; + unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; unsigned dblks = 0; unsigned ptrs_per_blk; const unsigned end_of_metadata = height - 1; @@ -471,9 +467,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, /* Building up tree height */ state = ALLOC_GROW_HEIGHT; iblks = height - ip->i_height; - zmpl = zero_metapath_length(mp, height); - iblks -= zmpl; - iblks += height; + branch_start = metapath_branch_start(mp); + iblks += (height - branch_start); } } @@ -509,13 +504,13 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, sizeof(struct gfs2_meta_header)); *ptr = zero_bn; state = ALLOC_GROW_DEPTH; - for(i = zmpl; i < height; i++) { + for(i = branch_start; i < height; i++) { if (mp->mp_bh[i] == NULL) break; brelse(mp->mp_bh[i]); mp->mp_bh[i] = NULL; } - i = zmpl; + i = branch_start; } if (n == 0) break; -- cgit v1.2.3-70-g09d2 From 18ce3751ccd488c78d3827e9f6bf54e6322676fb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 1 Jul 2008 09:07:34 +0200 Subject: Properly notify block layer of sync writes fsync_buffers_list() and sync_dirty_buffer() both issue async writes and then immediately wait on them. Conceptually, that makes them sync writes and we should treat them as such so that the IO schedulers can handle them appropriately. This patch fixes a write starvation issue that Lin Ming reported, where xx is stuck for more than 2 minutes because of a large number of synchronous IO in the system: INFO: task kjournald:20558 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kjournald D ffff810010820978 6712 20558 2 ffff81022ddb1d10 0000000000000046 ffff81022e7baa10 ffffffff803ba6f2 ffff81022ecd0000 ffff8101e6dc9160 ffff81022ecd0348 000000008048b6cb 0000000000000086 ffff81022c4e8d30 0000000000000000 ffffffff80247537 Call Trace: [] kobject_get+0x12/0x17 [] getnstimeofday+0x2f/0x83 [] sync_buffer+0x0/0x3f [] io_schedule+0x5d/0x9f [] sync_buffer+0x3b/0x3f [] __wait_on_bit+0x40/0x6f [] sync_buffer+0x0/0x3f [] out_of_line_wait_on_bit+0x6c/0x78 [] wake_bit_function+0x0/0x23 [] sync_dirty_buffer+0x98/0xcb [] journal_commit_transaction+0x97d/0xcb6 [] lock_timer_base+0x26/0x4b [] kjournald+0xc1/0x1fb [] autoremove_wake_function+0x0/0x2e [] kjournald+0x0/0x1fb [] kthread+0x47/0x74 [] schedule_tail+0x28/0x5d [] child_rip+0xa/0x12 [] kthread+0x0/0x74 [] child_rip+0x0/0x12 Lin Ming confirms that this patch fixes the issue. I've run tests with it for the past week and no ill effects have been observed, so I'm proposing it for inclusion into 2.6.26. Signed-off-by: Jens Axboe --- fs/buffer.c | 13 ++++++++----- include/linux/fs.h | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index a073f3f4f013..0f51c0f7c266 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -821,7 +821,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) * contents - it is a noop if I/O is still in * flight on potentially older contents. */ - ll_rw_block(SWRITE, 1, &bh); + ll_rw_block(SWRITE_SYNC, 1, &bh); brelse(bh); spin_lock(lock); } @@ -2940,16 +2940,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (rw == SWRITE) + if (rw == SWRITE || rw == SWRITE_SYNC) lock_buffer(bh); else if (test_set_buffer_locked(bh)) continue; - if (rw == WRITE || rw == SWRITE) { + if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(WRITE, bh); + if (rw == SWRITE_SYNC) + submit_bh(WRITE_SYNC, bh); + else + submit_bh(WRITE, bh); continue; } } else { @@ -2978,7 +2981,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE, bh); + ret = submit_bh(WRITE_SYNC, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7c1080826832..d8e2762ed14d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -83,6 +83,7 @@ extern int dir_notify_enable; #define READ_SYNC (READ | (1 << BIO_RW_SYNC)) #define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) +#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) #define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) #define SEL_IN 1 -- cgit v1.2.3-70-g09d2 From 2e4bef41a0f7df31be140ef354b9c12f2299016a Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Tue, 24 Jun 2008 17:39:39 -0500 Subject: 9p: fix O_APPEND in legacy mode The legacy protocol's open operation doesn't handle an append operation (it is expected that the client take care of it). We were incorrectly passing the extended protocol's flag through even in legacy mode. This was reported in bugzilla report #10689. This patch fixes the problem by disallowing extended protocol open modes from being passed in legacy mode and implemented append functionality on the client side by adding a seek after the open. Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs_vfs.h | 2 +- fs/9p/vfs_file.c | 4 +++- fs/9p/vfs_inode.c | 18 ++++++++++-------- 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index fd01d90cada5..57997fa14e69 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -51,4 +51,4 @@ int v9fs_dir_release(struct inode *inode, struct file *filp); int v9fs_file_open(struct inode *inode, struct file *file); void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat); void v9fs_dentry_release(struct dentry *); -int v9fs_uflags2omode(int uflags); +int v9fs_uflags2omode(int uflags, int extended); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 0d55affe37d4..52944d2249a4 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -59,7 +59,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file); v9ses = v9fs_inode2v9ses(inode); - omode = v9fs_uflags2omode(file->f_flags); + omode = v9fs_uflags2omode(file->f_flags, v9fs_extended(v9ses)); fid = file->private_data; if (!fid) { fid = v9fs_fid_clone(file->f_path.dentry); @@ -75,6 +75,8 @@ int v9fs_file_open(struct inode *inode, struct file *file) inode->i_size = 0; inode->i_blocks = 0; } + if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses))) + generic_file_llseek(file, 0, SEEK_END); } file->private_data = fid; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 40fa807bd929..c95295c65045 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -132,10 +132,10 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) /** * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits * @uflags: flags to convert - * + * @extended: if .u extensions are active */ -int v9fs_uflags2omode(int uflags) +int v9fs_uflags2omode(int uflags, int extended) { int ret; @@ -155,14 +155,16 @@ int v9fs_uflags2omode(int uflags) break; } - if (uflags & O_EXCL) - ret |= P9_OEXCL; - if (uflags & O_TRUNC) ret |= P9_OTRUNC; - if (uflags & O_APPEND) - ret |= P9_OAPPEND; + if (extended) { + if (uflags & O_EXCL) + ret |= P9_OEXCL; + + if (uflags & O_APPEND) + ret |= P9_OAPPEND; + } return ret; } @@ -506,7 +508,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, flags = O_RDWR; fid = v9fs_create(v9ses, dir, dentry, NULL, perm, - v9fs_uflags2omode(flags)); + v9fs_uflags2omode(flags, v9fs_extended(v9ses))); if (IS_ERR(fid)) { err = PTR_ERR(fid); fid = NULL; -- cgit v1.2.3-70-g09d2 From f5c8f7dae75e1e6bb3200fc61302e4d5e2df3dc2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Jul 2008 09:59:33 -0700 Subject: ext3: add missing unlock to error path in ext3_quota_write() When write in ext3_quota_write() fails, we have to properly release i_mutex. One error path has been missing the unlock... Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index fe3119a71ada..2845425077e8 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2875,8 +2875,10 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, blk++; } out: - if (len == towrite) + if (len == towrite) { + mutex_unlock(&inode->i_mutex); return err; + } if (inode->i_size < off+len-towrite) { i_size_write(inode, off+len-towrite); EXT3_I(inode)->i_disksize = inode->i_size; -- cgit v1.2.3-70-g09d2 From 4d04e4fbf8fc9f5136a64d45e2c20de095c08efb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Jul 2008 09:59:34 -0700 Subject: ext4: add missing unlock to an error path in ext4_quota_write() When write in ext4_quota_write() fails, we have to properly release i_mutex. One error path has been missing the unlock... Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cb96f127c366..02bf24343979 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3337,8 +3337,10 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, blk++; } out: - if (len == towrite) + if (len == towrite) { + mutex_unlock(&inode->i_mutex); return err; + } if (inode->i_size < off+len-towrite) { i_size_write(inode, off+len-towrite); EXT4_I(inode)->i_disksize = inode->i_size; -- cgit v1.2.3-70-g09d2 From 10dd08dc04c881dcc9f7f19e2a3ad8e0778e4db5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Jul 2008 09:59:34 -0700 Subject: reiserfs: add missing unlock to an error path in reiserfs_quota_write() When write in reiserfs_quota_write() fails, we have to properly release i_mutex. One error path has been missing the unlock... Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index ed424d708e69..1d40f2bd1970 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2165,8 +2165,10 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, blk++; } out: - if (len == towrite) + if (len == towrite) { + mutex_unlock(&inode->i_mutex); return err; + } if (inode->i_size < off + len - towrite) i_size_write(inode, off + len - towrite); inode->i_version++; -- cgit v1.2.3-70-g09d2 From c4a2d7fbec3029c8891a3ad5fceec2992096a3b7 Mon Sep 17 00:00:00 2001 From: Michael Halcrow Date: Fri, 4 Jul 2008 09:59:35 -0700 Subject: ecryptfs: remove unnecessary mux from ecryptfs_init_ecryptfs_miscdev() The misc_mtx should provide all the protection required to keep the daemon hash table sane during miscdev registration. Since this mutex is causing gratuitous lockdep warnings, this patch removes it. Signed-off-by: Michael Halcrow Reported-by: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/miscdev.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 50c994a249a5..09a4522f65e6 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -575,13 +575,11 @@ int ecryptfs_init_ecryptfs_miscdev(void) int rc; atomic_set(&ecryptfs_num_miscdev_opens, 0); - mutex_lock(&ecryptfs_daemon_hash_mux); rc = misc_register(&ecryptfs_miscdev); if (rc) printk(KERN_ERR "%s: Failed to register miscellaneous device " "for communications with userspace daemons; rc = [%d]\n", __func__, rc); - mutex_unlock(&ecryptfs_daemon_hash_mux); return rc; } -- cgit v1.2.3-70-g09d2 From 337e2ab5d1efca56c6fdd57bffaea7e7899e7283 Mon Sep 17 00:00:00 2001 From: Jess Guerrero Date: Fri, 4 Jul 2008 09:59:50 -0700 Subject: ntfs: update help text The url in the help text for ntfs should be updated. Acked-by: Anton Altaparmakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index cf12c403b8c7..2694648cbd1b 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -830,7 +830,7 @@ config NTFS_FS from the project web site. For more information see - and . + and . To compile this file system support as a module, choose M here: the module will be called ntfs. -- cgit v1.2.3-70-g09d2 From 6d1029b56329b1cc9b7233e5333c1a48ddbbfad8 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 4 Jul 2008 09:59:51 -0700 Subject: add kernel-doc for simple_read_from_buffer and memory_read_from_buffer Add kernel-doc comments describing simple_read_from_buffer and memory_read_from_buffer. Signed-off-by: Akinobu Mita Cc: Christoph Hellwig Cc: "Randy.Dunlap" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/libfs.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'fs') diff --git a/fs/libfs.c b/fs/libfs.c index 892d41cb3382..baeb71ee1cde 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -512,6 +512,20 @@ void simple_release_fs(struct vfsmount **mount, int *count) mntput(mnt); } +/** + * simple_read_from_buffer - copy data from the buffer to user space + * @to: the user space buffer to read to + * @count: the maximum number of bytes to read + * @ppos: the current position in the buffer + * @from: the buffer to read from + * @available: the size of the buffer + * + * The simple_read_from_buffer() function reads up to @count bytes from the + * buffer @from at offset @ppos into the user space address starting at @to. + * + * On success, the number of bytes read is returned and the offset @ppos is + * advanced by this number, or negative value is returned on error. + **/ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available) { @@ -528,6 +542,20 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, return count; } +/** + * memory_read_from_buffer - copy data from the buffer + * @to: the kernel space buffer to read to + * @count: the maximum number of bytes to read + * @ppos: the current position in the buffer + * @from: the buffer to read from + * @available: the size of the buffer + * + * The memory_read_from_buffer() function reads up to @count bytes from the + * buffer @from at offset @ppos into the kernel space address starting at @to. + * + * On success, the number of bytes read is returned and the offset @ppos is + * advanced by this number, or negative value is returned on error. + **/ ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, const void *from, size_t available) { -- cgit v1.2.3-70-g09d2 From 086f7316f0d400806d76323beefae996bb3849b1 Mon Sep 17 00:00:00 2001 From: "Andrew G. Morgan" Date: Fri, 4 Jul 2008 09:59:58 -0700 Subject: security: filesystem capabilities: fix fragile setuid fixup code This commit includes a bugfix for the fragile setuid fixup code in the case that filesystem capabilities are supported (in access()). The effect of this fix is gated on filesystem capability support because changing securebits is only supported when filesystem capabilities support is configured.) [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Andrew G. Morgan Acked-by: Serge Hallyn Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/open.c | 37 ++++++++++++++++++++++--------------- include/linux/capability.h | 2 ++ include/linux/securebits.h | 15 ++++++++------- kernel/capability.c | 21 +++++++++++++++++++++ 4 files changed, 53 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index a1450086e92f..a99ad09c3197 100644 --- a/fs/open.c +++ b/fs/open.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -425,7 +426,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) { struct nameidata nd; int old_fsuid, old_fsgid; - kernel_cap_t old_cap; + kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */ int res; if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ @@ -433,23 +434,27 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) old_fsuid = current->fsuid; old_fsgid = current->fsgid; - old_cap = current->cap_effective; current->fsuid = current->uid; current->fsgid = current->gid; - /* - * Clear the capabilities if we switch to a non-root user - * - * FIXME: There is a race here against sys_capset. The - * capabilities can change yet we will restore the old - * value below. We should hold task_capabilities_lock, - * but we cannot because user_path_walk can sleep. - */ - if (current->uid) - cap_clear(current->cap_effective); - else - current->cap_effective = current->cap_permitted; + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + /* + * Clear the capabilities if we switch to a non-root user + */ +#ifndef CONFIG_SECURITY_FILE_CAPABILITIES + /* + * FIXME: There is a race here against sys_capset. The + * capabilities can change yet we will restore the old + * value below. We should hold task_capabilities_lock, + * but we cannot because user_path_walk can sleep. + */ +#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */ + if (current->uid) + old_cap = cap_set_effective(__cap_empty_set); + else + old_cap = cap_set_effective(current->cap_permitted); + } res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); if (res) @@ -478,7 +483,9 @@ out_path_release: out: current->fsuid = old_fsuid; current->fsgid = old_fsgid; - current->cap_effective = old_cap; + + if (!issecure(SECURE_NO_SETUID_FIXUP)) + cap_set_effective(old_cap); return res; } diff --git a/include/linux/capability.h b/include/linux/capability.h index fa830f8de032..02673846d205 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -501,6 +501,8 @@ extern const kernel_cap_t __cap_empty_set; extern const kernel_cap_t __cap_full_set; extern const kernel_cap_t __cap_init_eff_set; +kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); + int capable(int cap); int __capable(struct task_struct *t, int cap); diff --git a/include/linux/securebits.h b/include/linux/securebits.h index c1f19dbceb05..92f09bdf1175 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -7,14 +7,15 @@ inheritance of root-permissions and suid-root executable under compatibility mode. We raise the effective and inheritable bitmasks *of the executable file* if the effective uid of the new process is - 0. If the real uid is 0, we raise the inheritable bitmask of the + 0. If the real uid is 0, we raise the effective (legacy) bit of the executable file. */ #define SECURE_NOROOT 0 #define SECURE_NOROOT_LOCKED 1 /* make bit-0 immutable */ -/* When set, setuid to/from uid 0 does not trigger capability-"fixes" - to be compatible with old programs relying on set*uid to loose - privileges. When unset, setuid doesn't change privileges. */ +/* When set, setuid to/from uid 0 does not trigger capability-"fixup". + When unset, to provide compatiblility with old programs relying on + set*uid to gain/lose privilege, transitions to/from uid 0 cause + capabilities to be gained/lost. */ #define SECURE_NO_SETUID_FIXUP 2 #define SECURE_NO_SETUID_FIXUP_LOCKED 3 /* make bit-2 immutable */ @@ -26,10 +27,10 @@ #define SECURE_KEEP_CAPS 4 #define SECURE_KEEP_CAPS_LOCKED 5 /* make bit-4 immutable */ -/* Each securesetting is implemented using two bits. One bit specify +/* Each securesetting is implemented using two bits. One bit specifies whether the setting is on or off. The other bit specify whether the - setting is fixed or not. A setting which is fixed cannot be changed - from user-level. */ + setting is locked or not. A setting which is locked cannot be + changed from user-level. */ #define issecure_mask(X) (1 << (X)) #define issecure(X) (issecure_mask(X) & current->securebits) diff --git a/kernel/capability.c b/kernel/capability.c index cfbe44299488..901e0fdc3fff 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -121,6 +121,27 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) * uninteresting and/or not to be changed. */ +/* + * Atomically modify the effective capabilities returning the original + * value. No permission check is performed here - it is assumed that the + * caller is permitted to set the desired effective capabilities. + */ +kernel_cap_t cap_set_effective(const kernel_cap_t pE_new) +{ + kernel_cap_t pE_old; + + spin_lock(&task_capability_lock); + + pE_old = current->cap_effective; + current->cap_effective = pE_new; + + spin_unlock(&task_capability_lock); + + return pE_old; +} + +EXPORT_SYMBOL(cap_set_effective); + /** * sys_capget - get the capabilities of a given process. * @header: pointer to struct that contains capability version and -- cgit v1.2.3-70-g09d2 From 20cbc972617069c1ed434f62151e4de57d26ea46 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 5 Jul 2008 12:29:05 -0700 Subject: Fix clear_refs_write() use of struct mm_walk Don't use a static entry, so as to prevent races during concurrent use of this function. Reported-by: Alexey Dobriyan Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ab8ccc9d14ff..05053d701ac5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -476,10 +476,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, return -ESRCH; mm = get_task_mm(task); if (mm) { - static struct mm_walk clear_refs_walk; - memset(&clear_refs_walk, 0, sizeof(clear_refs_walk)); - clear_refs_walk.pmd_entry = clear_refs_pte_range; - clear_refs_walk.mm = mm; + struct mm_walk clear_refs_walk = { + .pmd_entry = clear_refs_pte_range, + .mm = mm, + }; down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; -- cgit v1.2.3-70-g09d2 From 5d7e0d2bd98ef4f5a16ac9da1987ae655368dd6a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 5 Jul 2008 01:02:01 -0700 Subject: Fix pagemap_read() use of struct mm_walk Fix some issues in pagemap_read noted by Alexey: - initialize pagemap_walk.mm to "mm" , so the code starts working as advertised - initialize ->private to "&pm" so it wouldn't immediately oops in pagemap_pte_hole() - unstatic struct pagemap_walk, so two threads won't fsckup each other (including those started by root, including flipping ->mm when you don't have permissions) - pagemap_read() contains two calls to ptrace_may_attach(), second one looks unneeded. - avoid possible kmalloc(0) and integer wraparound. Cc: Alexey Dobriyan Cc: Matt Mackall Signed-off-by: Andrew Morton [ Personally, I'd just remove the functionality entirely - Linus ] Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 72 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 05053d701ac5..c492449f3b45 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -602,11 +602,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, return err; } -static struct mm_walk pagemap_walk = { - .pmd_entry = pagemap_pte_range, - .pte_hole = pagemap_pte_hole -}; - /* * /proc/pid/pagemap - an array mapping virtual pages to pfns * @@ -641,6 +636,11 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, struct pagemapread pm; int pagecount; int ret = -ESRCH; + struct mm_walk pagemap_walk; + unsigned long src; + unsigned long svpfn; + unsigned long start_vaddr; + unsigned long end_vaddr; if (!task) goto out; @@ -659,11 +659,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!mm) goto out_task; - ret = -ENOMEM; + uaddr = (unsigned long)buf & PAGE_MASK; uend = (unsigned long)(buf + count); pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; - pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); + ret = 0; + if (pagecount == 0) + goto out_mm; + pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); + ret = -ENOMEM; if (!pages) goto out_mm; @@ -684,33 +688,33 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, pm.out = (u64 *)buf; pm.end = (u64 *)(buf + count); - if (!ptrace_may_attach(task)) { - ret = -EIO; - } else { - unsigned long src = *ppos; - unsigned long svpfn = src / PM_ENTRY_BYTES; - unsigned long start_vaddr = svpfn << PAGE_SHIFT; - unsigned long end_vaddr = TASK_SIZE_OF(task); - - /* watch out for wraparound */ - if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) - start_vaddr = end_vaddr; - - /* - * The odds are that this will stop walking way - * before end_vaddr, because the length of the - * user buffer is tracked in "pm", and the walk - * will stop when we hit the end of the buffer. - */ - ret = walk_page_range(start_vaddr, end_vaddr, - &pagemap_walk); - if (ret == PM_END_OF_BUFFER) - ret = 0; - /* don't need mmap_sem for these, but this looks cleaner */ - *ppos += (char *)pm.out - buf; - if (!ret) - ret = (char *)pm.out - buf; - } + pagemap_walk.pmd_entry = pagemap_pte_range; + pagemap_walk.pte_hole = pagemap_pte_hole; + pagemap_walk.mm = mm; + pagemap_walk.private = ± + + src = *ppos; + svpfn = src / PM_ENTRY_BYTES; + start_vaddr = svpfn << PAGE_SHIFT; + end_vaddr = TASK_SIZE_OF(task); + + /* watch out for wraparound */ + if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) + start_vaddr = end_vaddr; + + /* + * The odds are that this will stop walking way + * before end_vaddr, because the length of the + * user buffer is tracked in "pm", and the walk + * will stop when we hit the end of the buffer. + */ + ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk); + if (ret == PM_END_OF_BUFFER) + ret = 0; + /* don't need mmap_sem for these, but this looks cleaner */ + *ppos += (char *)pm.out - buf; + if (!ret) + ret = (char *)pm.out - buf; out_pages: for (; pagecount; pagecount--) { -- cgit v1.2.3-70-g09d2 From 18c6ac383f3e46cfce08d0bf972705852a4e1268 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 7 Jul 2008 10:06:29 -0700 Subject: [PATCH] ocfs2/dlm: Fixes oops in dlm_new_lockres() Patch fixes a race that can result in an oops while adding a lockres to the dlm lockres tracking list. Bug introduced by mainline commit 29576f8bb54045be944ba809d4fca1ad77c94165. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmmaster.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index efc015c6128a..44f87caf3683 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -606,7 +606,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, res->last_used = 0; + spin_lock(&dlm->spinlock); list_add_tail(&res->tracking, &dlm->tracking_list); + spin_unlock(&dlm->spinlock); memset(res->lvb, 0, DLM_LVB_LEN); memset(res->refmap, 0, sizeof(res->refmap)); -- cgit v1.2.3-70-g09d2 From 2aac05a91971fbd1bf6cbed78b8731eb7454b9b7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 7 Jul 2008 13:26:10 -0400 Subject: NFS: Fix readdir cache invalidation invalidate_inode_pages2_range() takes page offset arguments, not byte ranges. Another thought is that individual pages might perhaps get evicted by VM pressure, in which case we might perhaps want to re-read not only the evicted page, but all subsequent pages too (in case the server returns more/less data per page so that the alignment of the next entry changes). We should therefore remove the condition that we only do this on page->index==0. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 58d43daec084..982a2064fe4c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -204,7 +204,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) * Note: assumes we have exclusive access to this mapping either * through inode->i_mutex or some other mechanism. */ - if (page->index == 0 && invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1) < 0) { + if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { /* Should never happen */ nfs_zap_mapping(inode, inode->i_mapping); } -- cgit v1.2.3-70-g09d2 From eb35c218d83ec0780d9db869310f2e333f628702 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 8 Jul 2008 14:37:06 -0400 Subject: reiserfs: discard prealloc in reiserfs_delete_inode With the removal of struct file from the xattr code, reiserfs_file_release() isn't used anymore, so the prealloc isn't discarded. This causes hangs later down the line. This patch adds it to reiserfs_delete_inode. In most cases it will be a no-op due to it already having been called, but will avoid hangs with xattrs. Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/reiserfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 57917932212e..192269698a8a 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -45,6 +45,8 @@ void reiserfs_delete_inode(struct inode *inode) goto out; reiserfs_update_inode_transaction(inode); + reiserfs_discard_prealloc(&th, inode); + err = reiserfs_delete_object(&th, inode); /* Do quota update inside a transaction for journaled quotas. We must do that -- cgit v1.2.3-70-g09d2 From e988cf1cfed4ed80bf40528e655fe18bed6a38b6 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 10 Jul 2008 09:25:39 -0700 Subject: ocfs2: Fix flags in ocfs2_file_lock The stack-glue merge changed the way we use flags in dlmglue in that we now use the fs/dlm equivalents. Unfortunately, a merge error left the new flock code only partially updated. This took a while to show up though, because the lock level constants are actually identical between o2dlm and fs/dlm. The *_CONVERT and *_NOQUEUE flags have different values though, which is eventually causing a crash in flags_to_o2dlm(). Signed-off-by: Mark Fasheh --- fs/ocfs2/dlmglue.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 394d25a131a5..80e20d9f2780 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1554,8 +1554,8 @@ out: */ int ocfs2_file_lock(struct file *file, int ex, int trylock) { - int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; - unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; + int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; + unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; unsigned long flags; struct ocfs2_file_private *fp = file->private_data; struct ocfs2_lock_res *lockres = &fp->fp_flock; @@ -1582,7 +1582,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) * Get the lock at NLMODE to start - that way we * can cancel the upconvert request if need be. */ - ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); + ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); if (ret < 0) { mlog_errno(ret); goto out; @@ -1597,7 +1597,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) } lockres->l_action = OCFS2_AST_CONVERT; - lkm_flags |= LKM_CONVERT; + lkm_flags |= DLM_LKF_CONVERT; lockres->l_requested = level; lockres_or_flags(lockres, OCFS2_LOCK_BUSY); @@ -1664,7 +1664,7 @@ void ocfs2_file_unlock(struct file *file) if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) return; - if (lockres->l_level == LKM_NLMODE) + if (lockres->l_level == DLM_LOCK_NL) return; mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", @@ -1678,11 +1678,11 @@ void ocfs2_file_unlock(struct file *file) lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); lockres->l_blocking = DLM_LOCK_EX; - gen = ocfs2_prepare_downconvert(lockres, LKM_NLMODE); + gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); spin_unlock_irqrestore(&lockres->l_lock, flags); - ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0, gen); + ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); if (ret) { mlog_errno(ret); return; -- cgit v1.2.3-70-g09d2 From 96a8e13ed44e380fc2bb6c711d74d5ba698c00b2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 10 Jul 2008 21:19:20 +0100 Subject: exec: fix stack excutability without PT_GNU_STACK Kernel Bugzilla #11063 points out that on some architectures (e.g. x86_32) exec'ing an ELF without a PT_GNU_STACK program header should default to an executable stack; but this got broken by the unlimited argv feature because stack vma is now created before the right personality has been established: so breaking old binaries using nested function trampolines. Therefore re-evaluate VM_STACK_FLAGS in setup_arg_pages, where stack vm_flags used to be set, before the mprotect_fixup. Checking through our existing VM_flags, none would have changed since insert_vm_struct: so this seems safer than finding a way through the personality labyrinth. Reported-by: pageexec@freemail.hu Signed-off-by: Hugh Dickins Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index da94a6f05df3..fd9234379e8d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -610,7 +610,7 @@ int setup_arg_pages(struct linux_binprm *bprm, bprm->exec -= stack_shift; down_write(&mm->mmap_sem); - vm_flags = vma->vm_flags; + vm_flags = VM_STACK_FLAGS; /* * Adjust stack execute permissions; explicitly enable for -- cgit v1.2.3-70-g09d2 From 49641f1acfdfd437ed9b0a70b86bf36626c02afe Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 11 Jul 2008 17:43:55 +1000 Subject: Fix reference counting race on log buffers When we release the iclog, we do an atomic_dec_and_lock to determine if we are the last reference and need to trigger update of log headers and writeout. However, in xlog_state_get_iclog_space() we also need to check if we have the last reference count there. If we do, we release the log buffer, otherwise we decrement the reference count. But the compare and decrement in xlog_state_get_iclog_space() is not atomic, so both places can see a reference count of 2 and neither will release the iclog. That leads to a filesystem hang. Close the race by replacing the atomic_read() and atomic_dec() pair with atomic_add_unless() to ensure that they are executed atomically. Signed-off-by: Dave Chinner Reviewed-by: Tim Shimmin Tested-by: Eric Sandeen Signed-off-by: Linus Torvalds --- fs/xfs/xfs_log.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index afaee301b0ee..ad3d26ddfe31 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2427,13 +2427,20 @@ restart: if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) { xlog_state_switch_iclogs(log, iclog, iclog->ic_size); - /* If I'm the only one writing to this iclog, sync it to disk */ - if (atomic_read(&iclog->ic_refcnt) == 1) { + /* + * If I'm the only one writing to this iclog, sync it to disk. + * We need to do an atomic compare and decrement here to avoid + * racing with concurrent atomic_dec_and_lock() calls in + * xlog_state_release_iclog() when there is more than one + * reference to the iclog. + */ + if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) { + /* we are the only one */ spin_unlock(&log->l_icloglock); - if ((error = xlog_state_release_iclog(log, iclog))) + error = xlog_state_release_iclog(log, iclog); + if (error) return error; } else { - atomic_dec(&iclog->ic_refcnt); spin_unlock(&log->l_icloglock); } goto restart; -- cgit v1.2.3-70-g09d2 From e911d0cc877ff027d5bd09fc33148ab76f0fdf0e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 12 Jul 2008 13:47:59 -0700 Subject: cifs: fix inode leak in cifs_get_inode_info_unix Try this: mount a share with unix extensions create a file on it umount the share You'll get the following message in the ring buffer: VFS: Busy inodes after unmount of cifs. Self-destruct in 5 seconds. Have a nice day... ...the problem is that cifs_get_inode_info_unix is creating and hashing a new inode even when it's going to return error anyway. The first lookup when creating a file returns an error so we end up leaking this inode before we do the actual create. This appears to be a regression caused by commit 0e4bbde94fdc33f5b3d793166b21bf768ca3e098. The following patch seems to fix it for me, and fixes a minor formatting nit as well. Signed-off-by: Jeff Layton Acked-by: Steven French Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cifs/inode.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 722be543ceec..2e904bd111c8 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -219,15 +219,15 @@ int cifs_get_inode_info_unix(struct inode **pinode, rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc) { - if (rc == -EREMOTE && !is_dfs_referral) { - is_dfs_referral = true; - cFYI(DBG2, ("DFS ref")); - /* for DFS, server does not give us real inode data */ - fill_fake_finddataunix(&find_data, sb); - rc = 0; - } - } + if (rc == -EREMOTE && !is_dfs_referral) { + is_dfs_referral = true; + cFYI(DBG2, ("DFS ref")); + /* for DFS, server does not give us real inode data */ + fill_fake_finddataunix(&find_data, sb); + rc = 0; + } else if (rc) + goto cgiiu_exit; + num_of_bytes = le64_to_cpu(find_data.NumOfBytes); end_of_file = le64_to_cpu(find_data.EndOfFile); @@ -236,7 +236,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, *pinode = new_inode(sb); if (*pinode == NULL) { rc = -ENOMEM; - goto cgiiu_exit; + goto cgiiu_exit; } /* Is an i_ino of zero legal? */ /* note ino incremented to unique num in new_inode */ -- cgit v1.2.3-70-g09d2 From 536abdb0802f3fac1b217530741853843d63c281 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 12 Jul 2008 13:48:00 -0700 Subject: cifs: fix wksidarr declaration to be big-endian friendly The current definition of wksidarr works fine on little endian arches (since cpu_to_le32 is a no-op there), but on big-endian arches, it fails to compile with this error: error: braced-group within expression allowed only inside a function The problem is that this static declaration has cpu_to_le32 embedded within it, and that expands into a function macro. We need to use __constant_cpu_to_le32() instead. Signed-off-by: Jeff Layton Cc: Steven French Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cifs/cifsacl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 34902cff5400..0e9fc2ba90ee 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -34,11 +34,11 @@ static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"}, {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"}, - {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"}, - {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(18), 0, 0, 0, 0} }, "sys"}, - {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(544), 0, 0, 0} }, "root"}, - {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(545), 0, 0, 0} }, "users"}, - {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(546), 0, 0, 0} }, "guest"} } + {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"}, + {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"}, + {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"}, + {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"}, + {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} } ; -- cgit v1.2.3-70-g09d2