158 files changed, 3298 insertions, 1777 deletions
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index f4407eb276c7..8d45ed668837 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -712,7 +712,7 @@ static void v9fs_read_work(void *a)
  * v9fs_send_request - send 9P request
  * The function can sleep until the request is scheduled for sending.
  * The function can be interrupted. Return from the function is not
- * a guarantee that the request is sent succesfully. Can return errors
+ * a guarantee that the request is sent successfully. Can return errors
  * that can be retrieved by PTR_ERR macros.
  *
  * @m: mux data
@@ -932,6 +932,8 @@ v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
 					r.rcall || r.err);
 			} while (!r.rcall && !r.err && err==-ERESTARTSYS &&
 				m->trans->status==Connected && !m->err);
+
+			err = -ERESTARTSYS;
 		}
 		sigpending = 1;
 	}
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index f867b8d3e973..450b0c1b385e 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -38,7 +38,7 @@
  */
 
 extern struct file_system_type v9fs_fs_type;
-extern struct address_space_operations v9fs_addr_operations;
+extern const struct address_space_operations v9fs_addr_operations;
 extern const struct file_operations v9fs_file_operations;
 extern const struct file_operations v9fs_dir_operations;
 extern struct dentry_operations v9fs_dentry_operations;
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index efda46fb64d9..d4f0aa3c87f2 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -103,6 +103,6 @@ UnmapAndUnlock:
 	return retval;
 }
 
-struct address_space_operations v9fs_addr_operations = {
+const struct address_space_operations v9fs_addr_operations = {
       .readpage = v9fs_vfs_readpage,
 };
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 5c6bdf82146c..2f580a197b8d 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -300,7 +300,7 @@ clunk_fid:
 	fid = V9FS_NOFID;
 
 put_fid:
-	if (fid >= 0)
+	if (fid != V9FS_NOFID)
 		v9fs_put_idpool(fid, &v9ses->fidpool);
 
 	kfree(fcall);
diff --git a/fs/Kconfig b/fs/Kconfig
index 6c5051802bd2..6dc8cfd6d80c 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1116,7 +1116,7 @@ config JFFS2_SUMMARY
 
 config JFFS2_FS_XATTR
 	bool "JFFS2 XATTR support (EXPERIMENTAL)"
-	depends on JFFS2_FS && EXPERIMENTAL && !JFFS2_FS_WRITEBUFFER
+	depends on JFFS2_FS && EXPERIMENTAL
 	default n
 	help
 	  Extended attributes are name:value pairs associated with inodes by
@@ -1722,7 +1722,7 @@ config CIFS_STATS
 	  mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
 
 config CIFS_STATS2
-	bool "CIFS extended statistics"
+	bool "Extended statistics"
 	depends on CIFS_STATS
 	help
 	  Enabling this option will allow more detailed statistics on SMB
@@ -1735,6 +1735,32 @@ config CIFS_STATS2
 	  Unless you are a developer or are doing network performance analysis
 	  or tuning, say N.
 
+config CIFS_WEAK_PW_HASH
+	bool "Support legacy servers which use weaker LANMAN security"
+	depends on CIFS
+	help
+	  Modern CIFS servers including Samba and most Windows versions
+	  (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
+	  security mechanisms. These hash the password more securely
+	  than the mechanisms used in the older LANMAN version of the
+          SMB protocol needed to establish sessions with old SMB servers.
+
+	  Enabling this option allows the cifs module to mount to older
+	  LANMAN based servers such as OS/2 and Windows 95, but such
+	  mounts may be less secure than mounts using NTLM or more recent
+	  security mechanisms if you are on a public network.  Unless you
+	  have a need to access old SMB servers (and are on a private 
+	  network) you probably want to say N.  Even if this support
+	  is enabled in the kernel build, they will not be used
+	  automatically. At runtime LANMAN mounts are disabled but
+	  can be set to required (or optional) either in
+	  /proc/fs/cifs (see fs/cifs/README for more detail) or via an
+	  option on the mount command. This support is disabled by 
+	  default in order to reduce the possibility of a downgrade
+	  attack.
+ 
+	  If unsure, say N.
+
 config CIFS_XATTR
         bool "CIFS extended attributes"
         depends on CIFS
@@ -1763,6 +1789,16 @@ config CIFS_POSIX
 	  (such as Samba 3.10 and later) which can negotiate
 	  CIFS POSIX ACL support.  If unsure, say N.
 
+config CIFS_DEBUG2
+	bool "Enable additional CIFS debugging routines"
+	help
+	   Enabling this option adds a few more debugging routines
+	   to the cifs code which slightly increases the size of
+	   the cifs module and can cause additional logging of debug
+	   messages in some error paths, slowing performance. This
+	   option can be turned off unless you are debugging
+	   cifs problems.  If unsure, say N.
+	   
 config CIFS_EXPERIMENTAL
 	  bool "CIFS Experimental Features (EXPERIMENTAL)"
 	  depends on CIFS && EXPERIMENTAL
@@ -1778,7 +1814,7 @@ config CIFS_EXPERIMENTAL
 	    If unsure, say N.
 
 config CIFS_UPCALL
-	  bool "CIFS Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
+	  bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
 	  depends on CIFS_EXPERIMENTAL
 	  select CONNECTOR
 	  help
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index a02802a30798..534f3eecc985 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -72,7 +72,7 @@ static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, adfs_get_block);
 }
 
-static struct address_space_operations adfs_aops = {
+static const struct address_space_operations adfs_aops = {
 	.readpage	= adfs_readpage,
 	.writepage	= adfs_writepage,
 	.sync_page	= block_sync_page,
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index a43a876742b8..0ddd4cc0d1a0 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -195,9 +195,9 @@ extern struct inode_operations   affs_symlink_inode_operations;
 extern const struct file_operations	 affs_file_operations;
 extern const struct file_operations	 affs_file_operations_ofs;
 extern const struct file_operations	 affs_dir_operations;
-extern struct address_space_operations	 affs_symlink_aops;
-extern struct address_space_operations	 affs_aops;
-extern struct address_space_operations	 affs_aops_ofs;
+extern const struct address_space_operations	 affs_symlink_aops;
+extern const struct address_space_operations	 affs_aops;
+extern const struct address_space_operations	 affs_aops_ofs;
 
 extern struct dentry_operations	 affs_dentry_operations;
 extern struct dentry_operations	 affs_dentry_operations_intl;
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 7076262af39b..3de8590e4f6a 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -406,7 +406,7 @@ static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,affs_get_block);
 }
-struct address_space_operations affs_aops = {
+const struct address_space_operations affs_aops = {
 	.readpage = affs_readpage,
 	.writepage = affs_writepage,
 	.sync_page = block_sync_page,
@@ -759,7 +759,7 @@ out:
 	goto done;
 }
 
-struct address_space_operations affs_aops_ofs = {
+const struct address_space_operations affs_aops_ofs = {
 	.readpage = affs_readpage_ofs,
 	//.writepage = affs_writepage_ofs,
 	//.sync_page = affs_sync_page_ofs,
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index 426f0f094f23..f802256a5933 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -66,7 +66,7 @@ fail:
 	return err;
 }
 
-struct address_space_operations affs_symlink_aops = {
+const struct address_space_operations affs_symlink_aops = {
 	.readpage	= affs_symlink_readpage,
 };
 
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 7bb716887e29..67d6634101fd 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -35,7 +35,7 @@ struct inode_operations afs_file_inode_operations = {
 	.getattr	= afs_inode_getattr,
 };
 
-struct address_space_operations afs_fs_aops = {
+const struct address_space_operations afs_fs_aops = {
 	.readpage	= afs_file_readpage,
 	.sync_page	= block_sync_page,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 72febdf9a35a..e88b3b65ae49 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -69,7 +69,7 @@ extern const struct file_operations afs_dir_file_operations;
 /*
  * file.c
  */
-extern struct address_space_operations afs_fs_aops;
+extern const struct address_space_operations afs_fs_aops;
 extern struct inode_operations afs_file_inode_operations;
 
 #ifdef AFS_CACHING_SUPPORT
diff --git a/fs/aio.c b/fs/aio.c
index 8c34a62df7d7..950630187acc 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -641,7 +641,7 @@ static inline int __queue_kicked_iocb(struct kiocb *iocb)
  *	invoked both for initial i/o submission and
  *	subsequent retries via the aio_kick_handler.
  *	Expects to be invoked with iocb->ki_ctx->lock
- *	already held. The lock is released and reaquired
+ *	already held. The lock is released and reacquired
  *	as needed during processing.
  *
  * Calls the iocb retry method (already setup for the
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 08201fab26cd..a83e889a97cd 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -73,7 +73,7 @@ static struct inode_operations befs_dir_inode_operations = {
 	.lookup		= befs_lookup,
 };
 
-static struct address_space_operations befs_aops = {
+static const struct address_space_operations befs_aops = {
 	.readpage	= befs_readpage,
 	.sync_page	= block_sync_page,
 	.bmap		= befs_bmap,
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 9d791004b21c..31973bbbf057 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -50,7 +50,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
 /* file.c */
 extern struct inode_operations bfs_file_inops;
 extern const struct file_operations bfs_file_operations;
-extern struct address_space_operations bfs_aops;
+extern const struct address_space_operations bfs_aops;
 
 /* dir.c */
 extern struct inode_operations bfs_dir_inops;
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index d83cd74a2e4e..3d5aca28a0a0 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -153,7 +153,7 @@ static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, bfs_get_block);
 }
 
-struct address_space_operations bfs_aops = {
+const struct address_space_operations bfs_aops = {
 	.readpage	= bfs_readpage,
 	.writepage	= bfs_writepage,
 	.sync_page	= block_sync_page,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ddb305eebf90..909cb0595b4e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1094,7 +1094,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
 }
 
-struct address_space_operations def_blk_aops = {
+const struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
 	.writepage	= blkdev_writepage,
 	.sync_page	= block_sync_page,
diff --git a/fs/buffer.c b/fs/buffer.c
index 373bb6292bdc..e9994722f4a3 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -564,7 +564,7 @@ still_busy:
  * Completion handler for block_write_full_page() - pages which are unlocked
  * during I/O, and which have PageWriteback cleared upon I/O completion.
  */
-void end_buffer_async_write(struct buffer_head *bh, int uptodate)
+static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 {
 	char b[BDEVNAME_SIZE];
 	unsigned long flags;
@@ -2598,7 +2598,7 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from)
 	unsigned offset = from & (PAGE_CACHE_SIZE-1);
 	unsigned to;
 	struct page *page;
-	struct address_space_operations *a_ops = mapping->a_ops;
+	const struct address_space_operations *a_ops = mapping->a_ops;
 	char *kaddr;
 	int ret = 0;
 
@@ -3166,7 +3166,6 @@ EXPORT_SYMBOL(block_sync_page);
 EXPORT_SYMBOL(block_truncate_page);
 EXPORT_SYMBOL(block_write_full_page);
 EXPORT_SYMBOL(cont_prepare_write);
-EXPORT_SYMBOL(end_buffer_async_write);
 EXPORT_SYMBOL(end_buffer_read_sync);
 EXPORT_SYMBOL(end_buffer_write_sync);
 EXPORT_SYMBOL(file_fsync);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 7271bb0257f6..a61d17ed1827 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,9 +1,24 @@
+Version 1.44
+------------
+Rewritten sessionsetup support, including support for legacy SMB
+session setup needed for OS/2 and older servers such as Windows 95 and 98.
+Fix oops on ls to OS/2 servers.  Add support for level 1 FindFirst
+so we can do search (ls etc.) to OS/2.  Do not send NTCreateX
+or recent levels of FindFirst unless server says it supports NT SMBs
+(instead use legacy equivalents from LANMAN dialect). Fix to allow
+NTLMv2 authentication support (now can use stronger password hashing
+on mount if corresponding /proc/fs/cifs/SecurityFlags is set (0x4004).
+Allow override of global cifs security flags on mount via "sec=" option(s).
+
 Version 1.43
 ------------
 POSIX locking to servers which support CIFS POSIX Extensions
 (disabled by default controlled by proc/fs/cifs/Experimental).
 Handle conversion of long share names (especially Asian languages)
-to Unicode during mount. 
+to Unicode during mount. Fix memory leak in sess struct on reconnect.
+Fix rare oops after acpi suspend.  Fix O_TRUNC opens to overwrite on
+cifs open which helps rare case when setpathinfo fails or server does
+not support it. 
 
 Version 1.42
 ------------
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 58c77254a23b..a26f26ed5a17 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -3,4 +3,4 @@
 #
 obj-$(CONFIG_CIFS) += cifs.o
 
-cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o ntlmssp.o
+cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o sess.o
diff --git a/fs/cifs/README b/fs/cifs/README
index 0355003f4f0a..7986d0d97ace 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -443,7 +443,10 @@ A partial list of the supported mount options follows:
 		SFU does).  In the future the bottom 9 bits of the mode
 		mode also will be emulated using queries of the security
 		descriptor (ACL).
-sec		Security mode.  Allowed values are:
+ sign           Must use packet signing (helps avoid unwanted data modification
+		by intermediate systems in the route).  Note that signing
+		does not work with lanman or plaintext authentication.
+ sec            Security mode.  Allowed values are:
 			none	attempt to connection as a null user (no name)
 			krb5    Use Kerberos version 5 authentication
 			krb5i   Use Kerberos authentication and packet signing
@@ -453,6 +456,8 @@ sec		Security mode.  Allowed values are:
 				server requires signing also can be the default) 
 			ntlmv2  Use NTLMv2 password hashing      
 			ntlmv2i Use NTLMv2 password hashing with packet signing
+			lanman  (if configured in kernel config) use older
+				lanman hash
 
 The mount.cifs mount helper also accepts a few mount options before -o
 including:
@@ -485,14 +490,34 @@ PacketSigningEnabled	If set to one, cifs packet signing is enabled
 			it.  If set to two, cifs packet signing is
 			required even if the server considers packet
 			signing optional. (default 1)
+SecurityFlags		Flags which control security negotiation and
+			also packet signing. Authentication (may/must)
+			flags (e.g. for NTLM and/or NTLMv2) may be combined with
+			the signing flags.  Specifying two different password
+			hashing mechanisms (as "must use") on the other hand 
+			does not make much sense. Default flags are 
+				0x07007 
+			(NTLM, NTLMv2 and packet signing allowed).  Maximum 
+			allowable flags if you want to allow mounts to servers
+			using weaker password hashes is 0x37037 (lanman,
+			plaintext, ntlm, ntlmv2, signing allowed):
+ 
+			may use packet signing 				0x00001
+			must use packet signing				0x01001
+			may use NTLM (most common password hash)	0x00002
+			must use NTLM					0x02002
+			may use NTLMv2					0x00004
+			must use NTLMv2					0x04004
+			may use Kerberos security (not implemented yet) 0x00008
+			must use Kerberos (not implemented yet)         0x08008
+			may use lanman (weak) password hash  		0x00010
+			must use lanman password hash			0x10010
+			may use plaintext passwords    			0x00020
+			must use plaintext passwords			0x20020
+			(reserved for future packet encryption)		0x00040
+
 cifsFYI			If set to one, additional debug information is
 			logged to the system error log. (default 0)
-ExtendedSecurity	If set to one, SPNEGO session establishment
-			is allowed which enables more advanced 
-			secure CIFS session establishment (default 0)
-NTLMV2Enabled		If set to one, more secure password hashes
-			are used when the server supports them and
-			when kerberos is not negotiated (default 0)
 traceSMB		If set to one, debug information is logged to the
 			system error log with the start of smb requests
 			and responses (default 0)
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 086ae8f4a207..031cdf293256 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -467,7 +467,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 	asn1_open(&ctx, security_blob, length);
 
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		cFYI(1, ("Error decoding negTokenInit header "));
+		cFYI(1, ("Error decoding negTokenInit header"));
 		return 0;
 	} else if ((cls != ASN1_APL) || (con != ASN1_CON)
 		   || (tag != ASN1_EOC)) {
@@ -495,7 +495,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		}
 
 		if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-			cFYI(1, ("Error decoding negTokenInit "));
+			cFYI(1, ("Error decoding negTokenInit"));
 			return 0;
 		} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
 			   || (tag != ASN1_EOC)) {
@@ -505,7 +505,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		}
 
 		if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-			cFYI(1, ("Error decoding negTokenInit "));
+			cFYI(1, ("Error decoding negTokenInit"));
 			return 0;
 		} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
 			   || (tag != ASN1_SEQ)) {
@@ -515,7 +515,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		}
 
 		if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-			cFYI(1, ("Error decoding 2nd part of negTokenInit "));
+			cFYI(1, ("Error decoding 2nd part of negTokenInit"));
 			return 0;
 		} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
 			   || (tag != ASN1_EOC)) {
@@ -527,7 +527,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 
 		if (asn1_header_decode
 		    (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
-			cFYI(1, ("Error decoding 2nd part of negTokenInit "));
+			cFYI(1, ("Error decoding 2nd part of negTokenInit"));
 			return 0;
 		} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
 			   || (tag != ASN1_SEQ)) {
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index f4124a32bef8..96abeb738978 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -39,7 +39,7 @@ cifs_dump_mem(char *label, void *data, int length)
 	char *charptr = data;
 	char buf[10], line[80];
 
-	printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n\n", 
+	printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n", 
 		label, length, data);
 	for (i = 0; i < length; i += 16) {
 		line[0] = 0;
@@ -57,6 +57,57 @@ cifs_dump_mem(char *label, void *data, int length)
 	}
 }
 
+#ifdef CONFIG_CIFS_DEBUG2
+void cifs_dump_detail(struct smb_hdr * smb)
+{
+	cERROR(1,("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
+		  smb->Command, smb->Status.CifsError,
+		  smb->Flags, smb->Flags2, smb->Mid, smb->Pid));
+	cERROR(1,("smb buf %p len %d", smb, smbCalcSize_LE(smb)));
+}
+
+
+void cifs_dump_mids(struct TCP_Server_Info * server)
+{
+	struct list_head *tmp;
+	struct mid_q_entry * mid_entry;
+
+	if(server == NULL)
+		return;
+
+	cERROR(1,("Dump pending requests:"));
+	spin_lock(&GlobalMid_Lock);
+	list_for_each(tmp, &server->pending_mid_q) {
+		mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
+		if(mid_entry) {
+			cERROR(1,("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
+				mid_entry->midState,
+				(int)mid_entry->command,
+				mid_entry->pid,
+				mid_entry->tsk,
+				mid_entry->mid));
+#ifdef CONFIG_CIFS_STATS2
+			cERROR(1,("IsLarge: %d buf: %p time rcv: %ld now: %ld",
+				mid_entry->largeBuf,
+				mid_entry->resp_buf,
+				mid_entry->when_received,
+				jiffies));
+#endif /* STATS2 */
+			cERROR(1,("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
+				  mid_entry->multiEnd));
+			if(mid_entry->resp_buf) {
+				cifs_dump_detail(mid_entry->resp_buf);
+				cifs_dump_mem("existing buf: ",
+					mid_entry->resp_buf,
+					62 /* fixme */);
+			}
+			
+		}
+	}
+	spin_unlock(&GlobalMid_Lock);
+}
+#endif /* CONFIG_CIFS_DEBUG2 */
+
 #ifdef CONFIG_PROC_FS
 static int
 cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
@@ -73,7 +124,6 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 
 	*beginBuffer = buf + offset;
 
-	
 	length =
 	    sprintf(buf,
 		    "Display Internal CIFS Data Structures for Debugging\n"
@@ -395,12 +445,12 @@ static read_proc_t traceSMB_read;
 static write_proc_t traceSMB_write;
 static read_proc_t multiuser_mount_read;
 static write_proc_t multiuser_mount_write;
-static read_proc_t extended_security_read;
-static write_proc_t extended_security_write;
-static read_proc_t ntlmv2_enabled_read;
+static read_proc_t security_flags_read;
+static write_proc_t security_flags_write;
+/* static read_proc_t ntlmv2_enabled_read;
 static write_proc_t ntlmv2_enabled_write;
 static read_proc_t packet_signing_enabled_read;
-static write_proc_t packet_signing_enabled_write;
+static write_proc_t packet_signing_enabled_write;*/
 static read_proc_t experimEnabled_read;
 static write_proc_t experimEnabled_write;
 static read_proc_t linuxExtensionsEnabled_read;
@@ -458,10 +508,10 @@ cifs_proc_init(void)
 		pde->write_proc = multiuser_mount_write;
 
 	pde =
-	    create_proc_read_entry("ExtendedSecurity", 0, proc_fs_cifs,
-				extended_security_read, NULL);
+	    create_proc_read_entry("SecurityFlags", 0, proc_fs_cifs,
+				security_flags_read, NULL);
 	if (pde)
-		pde->write_proc = extended_security_write;
+		pde->write_proc = security_flags_write;
 
 	pde =
 	create_proc_read_entry("LookupCacheEnabled", 0, proc_fs_cifs,
@@ -469,7 +519,7 @@ cifs_proc_init(void)
 	if (pde)
 		pde->write_proc = lookupFlag_write;
 
-	pde =
+/*	pde =
 	    create_proc_read_entry("NTLMV2Enabled", 0, proc_fs_cifs,
 				ntlmv2_enabled_read, NULL);
 	if (pde)
@@ -479,7 +529,7 @@ cifs_proc_init(void)
 	    create_proc_read_entry("PacketSigningEnabled", 0, proc_fs_cifs,
 				packet_signing_enabled_read, NULL);
 	if (pde)
-		pde->write_proc = packet_signing_enabled_write;
+		pde->write_proc = packet_signing_enabled_write;*/
 }
 
 void
@@ -496,9 +546,9 @@ cifs_proc_clean(void)
 #endif
 	remove_proc_entry("MultiuserMount", proc_fs_cifs);
 	remove_proc_entry("OplockEnabled", proc_fs_cifs);
-	remove_proc_entry("NTLMV2Enabled",proc_fs_cifs);
-	remove_proc_entry("ExtendedSecurity",proc_fs_cifs);
-	remove_proc_entry("PacketSigningEnabled",proc_fs_cifs);
+/*	remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */
+	remove_proc_entry("SecurityFlags",proc_fs_cifs);
+/*	remove_proc_entry("PacketSigningEnabled",proc_fs_cifs); */
 	remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs);
 	remove_proc_entry("Experimental",proc_fs_cifs);
 	remove_proc_entry("LookupCacheEnabled",proc_fs_cifs);
@@ -782,12 +832,12 @@ multiuser_mount_write(struct file *file, const char __user *buffer,
 }
 
 static int
-extended_security_read(char *page, char **start, off_t off,
+security_flags_read(char *page, char **start, off_t off,
 		       int count, int *eof, void *data)
 {
 	int len;
 
-	len = sprintf(page, "%d\n", extended_security);
+	len = sprintf(page, "0x%x\n", extended_security);
 
 	len -= off;
 	*start = page + off;
@@ -803,24 +853,52 @@ extended_security_read(char *page, char **start, off_t off,
 	return len;
 }
 static int
-extended_security_write(struct file *file, const char __user *buffer,
+security_flags_write(struct file *file, const char __user *buffer,
 			unsigned long count, void *data)
 {
+	unsigned int flags;
+	char flags_string[12];
 	char c;
-	int rc;
 
-	rc = get_user(c, buffer);
-	if (rc)
-		return rc;
-	if (c == '0' || c == 'n' || c == 'N')
-		extended_security = 0;
-	else if (c == '1' || c == 'y' || c == 'Y')
-		extended_security = 1;
+	if((count < 1) || (count > 11))
+		return -EINVAL;
+
+	memset(flags_string, 0, 12);
+
+	if(copy_from_user(flags_string, buffer, count))
+		return -EFAULT;
+
+	if(count < 3) {
+		/* single char or single char followed by null */
+		c = flags_string[0];
+		if (c == '0' || c == 'n' || c == 'N')
+			extended_security = CIFSSEC_DEF; /* default */
+		else if (c == '1' || c == 'y' || c == 'Y')
+			extended_security = CIFSSEC_MAX;
+		return count;
+	}
+	/* else we have a number */
+
+	flags = simple_strtoul(flags_string, NULL, 0);
+
+	cFYI(1,("sec flags 0x%x", flags));
+
+	if(flags <= 0)  {
+		cERROR(1,("invalid security flags %s",flags_string));
+		return -EINVAL;
+	}
 
+	if(flags & ~CIFSSEC_MASK) {
+		cERROR(1,("attempt to set unsupported security flags 0x%x",
+			flags & ~CIFSSEC_MASK));
+		return -EINVAL;
+	}
+	/* flags look ok - update the global security flags for cifs module */
+	extended_security = flags;
 	return count;
 }
 
-static int
+/* static int
 ntlmv2_enabled_read(char *page, char **start, off_t off,
 		       int count, int *eof, void *data)
 {
@@ -855,6 +933,8 @@ ntlmv2_enabled_write(struct file *file, const char __user *buffer,
 		ntlmv2_support = 0;
 	else if (c == '1' || c == 'y' || c == 'Y')
 		ntlmv2_support = 1;
+	else if (c == '2')
+		ntlmv2_support = 2;
 
 	return count;
 }
@@ -898,7 +978,7 @@ packet_signing_enabled_write(struct file *file, const char __user *buffer,
 		sign_CIFS_PDUs = 2;
 
 	return count;
-}
+} */
 
 
 #endif
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 4304d9dcfb6c..c26cd0d2c6d5 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -24,6 +24,10 @@
 #define _H_CIFS_DEBUG
 
 void cifs_dump_mem(char *label, void *data, int length);
+#ifdef CONFIG_CIFS_DEBUG2
+void cifs_dump_detail(struct smb_hdr *);
+void cifs_dump_mids(struct TCP_Server_Info *);
+#endif
 extern int traceSMB;		/* flag which enables the function below */
 void dump_smb(struct smb_hdr *, int);
 #define CIFS_INFO	0x01
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index d2b128255944..d2a8b2941fc2 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -22,6 +22,7 @@
 #include "cifs_unicode.h"
 #include "cifs_uniupr.h"
 #include "cifspdu.h"
+#include "cifsglob.h"
 #include "cifs_debug.h"
 
 /*
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index e7d63737e651..a89efaf78a26 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -26,6 +26,8 @@
 #include "md5.h"
 #include "cifs_unicode.h"
 #include "cifsproto.h"
+#include <linux/ctype.h>
+#include <linux/random.h>
 
 /* Calculate and return the CIFS signature based on the mac key and the smb pdu */
 /* the 16 byte signature must be allocated by the caller  */
@@ -35,6 +37,8 @@
 
 extern void mdfour(unsigned char *out, unsigned char *in, int n);
 extern void E_md4hash(const unsigned char *passwd, unsigned char *p16);
+extern void SMBencrypt(unsigned char *passwd, unsigned char *c8,
+                       unsigned char *p24);
 	
 static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu, 
 				    const char * key, char * signature)
@@ -45,7 +49,7 @@ static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu,
 		return -EINVAL;
 
 	MD5Init(&context);
-	MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16);
+	MD5Update(&context,key,CIFS_SESS_KEY_SIZE+16);
 	MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length);
 	MD5Final(signature,&context);
 	return 0;
@@ -90,7 +94,7 @@ static int cifs_calc_signature2(const struct kvec * iov, int n_vec,
 		return -EINVAL;
 
 	MD5Init(&context);
-	MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16);
+	MD5Update(&context,key,CIFS_SESS_KEY_SIZE+16);
 	for(i=0;i<n_vec;i++) {
 		if(iov[i].iov_base == NULL) {
 			cERROR(1,("null iovec entry"));
@@ -204,11 +208,12 @@ int cifs_calculate_mac_key(char * key, const char * rn, const char * password)
 
 	E_md4hash(password, temp_key);
 	mdfour(key,temp_key,16);
-	memcpy(key+16,rn, CIFS_SESSION_KEY_SIZE);
+	memcpy(key+16,rn, CIFS_SESS_KEY_SIZE);
 	return 0;
 }
 
-int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses, struct nls_table * nls_info)
+int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses, 
+				const struct nls_table * nls_info)
 {
 	char temp_hash[16];
 	struct HMACMD5Context ctx;
@@ -225,6 +230,8 @@ int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses, struct nls_table * nls_
 	user_name_len = strlen(ses->userName);
 	if(user_name_len > MAX_USERNAME_SIZE)
 		return -EINVAL;
+	if(ses->domainName == NULL)
+		return -EINVAL; /* BB should we use CIFS_LINUX_DOM */
 	dom_name_len = strlen(ses->domainName);
 	if(dom_name_len > MAX_USERNAME_SIZE)
 		return -EINVAL;
@@ -259,16 +266,131 @@ int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses, struct nls_table * nls_
 	kfree(unicode_buf);
 	return 0;
 }
-void CalcNTLMv2_response(const struct cifsSesInfo * ses,char * v2_session_response)
+
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key)
+{
+	int i;
+	char password_with_pad[CIFS_ENCPWD_SIZE];
+
+	if(ses->server == NULL)
+		return;
+
+	memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
+	strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE);
+
+	if((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0)
+		if(extended_security & CIFSSEC_MAY_PLNTXT) {
+			memcpy(lnm_session_key, password_with_pad, CIFS_ENCPWD_SIZE); 
+			return;
+		}
+
+	/* calculate old style session key */
+	/* calling toupper is less broken than repeatedly
+	calling nls_toupper would be since that will never
+	work for UTF8, but neither handles multibyte code pages
+	but the only alternative would be converting to UCS-16 (Unicode)
+	(using a routine something like UniStrupr) then
+	uppercasing and then converting back from Unicode - which
+	would only worth doing it if we knew it were utf8. Basically
+	utf8 and other multibyte codepages each need their own strupper
+	function since a byte at a time will ont work. */
+
+	for(i = 0; i < CIFS_ENCPWD_SIZE; i++) {
+		password_with_pad[i] = toupper(password_with_pad[i]);
+	}
+
+	SMBencrypt(password_with_pad, ses->server->cryptKey, lnm_session_key);
+	/* clear password before we return/free memory */
+	memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
+}
+#endif /* CIFS_WEAK_PW_HASH */
+
+static int calc_ntlmv2_hash(struct cifsSesInfo *ses, 
+			    const struct nls_table * nls_cp)
+{
+	int rc = 0;
+	int len;
+	char nt_hash[16];
+	struct HMACMD5Context * pctxt;
+	wchar_t * user;
+	wchar_t * domain;
+
+	pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL);
+
+	if(pctxt == NULL)
+		return -ENOMEM;
+
+	/* calculate md4 hash of password */
+	E_md4hash(ses->password, nt_hash);
+
+	/* convert Domainname to unicode and uppercase */
+	hmac_md5_init_limK_to_64(nt_hash, 16, pctxt);
+
+	/* convert ses->userName to unicode and uppercase */
+	len = strlen(ses->userName);
+	user = kmalloc(2 + (len * 2), GFP_KERNEL);
+	if(user == NULL)
+		goto calc_exit_2;
+	len = cifs_strtoUCS(user, ses->userName, len, nls_cp);
+	UniStrupr(user);
+	hmac_md5_update((char *)user, 2*len, pctxt);
+
+	/* convert ses->domainName to unicode and uppercase */
+	if(ses->domainName) {
+		len = strlen(ses->domainName);
+
+        	domain = kmalloc(2 + (len * 2), GFP_KERNEL);
+		if(domain == NULL)
+			goto calc_exit_1;
+		len = cifs_strtoUCS(domain, ses->domainName, len, nls_cp);
+		UniStrupr(domain);
+
+		hmac_md5_update((char *)domain, 2*len, pctxt);
+	
+		kfree(domain);
+	}
+calc_exit_1:
+	kfree(user);
+calc_exit_2:
+	/* BB FIXME what about bytes 24 through 40 of the signing key? 
+	   compare with the NTLM example */
+	hmac_md5_final(ses->server->mac_signing_key, pctxt);
+
+	return rc;
+}
+
+void setup_ntlmv2_rsp(struct cifsSesInfo * ses, char * resp_buf, 
+		      const struct nls_table * nls_cp)
+{
+	int rc;
+	struct ntlmv2_resp * buf = (struct ntlmv2_resp *)resp_buf;
+
+	buf->blob_signature = cpu_to_le32(0x00000101);
+	buf->reserved = 0;
+	buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
+	get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
+	buf->reserved2 = 0;
+	buf->names[0].type = 0;
+	buf->names[0].length = 0;
+
+	/* calculate buf->ntlmv2_hash */
+	rc = calc_ntlmv2_hash(ses, nls_cp);
+	if(rc)
+		cERROR(1,("could not get v2 hash rc %d",rc));
+	CalcNTLMv2_response(ses, resp_buf);
+}
+
+void CalcNTLMv2_response(const struct cifsSesInfo * ses, char * v2_session_response)
 {
 	struct HMACMD5Context context;
+	/* rest of v2 struct already generated */
 	memcpy(v2_session_response + 8, ses->server->cryptKey,8);
-	/* gen_blob(v2_session_response + 16); */
 	hmac_md5_init_limK_to_64(ses->server->mac_signing_key, 16, &context);
 
-	hmac_md5_update(ses->server->cryptKey,8,&context);
-/*	hmac_md5_update(v2_session_response+16)client thing,8,&context); */ /* BB fix */
+	hmac_md5_update(v2_session_response+8, 
+			sizeof(struct ntlmv2_resp) - 8, &context);
 
 	hmac_md5_final(v2_session_response,&context);
-	cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); /* BB removeme BB */
+/*	cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */
 }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8b4de6eaabd0..c28ede599946 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -56,8 +56,8 @@ unsigned int experimEnabled = 0;
 unsigned int linuxExtEnabled = 1;
 unsigned int lookupCacheEnabled = 1;
 unsigned int multiuser_mount = 0;
-unsigned int extended_security = 0;
-unsigned int ntlmv2_support = 0;
+unsigned int extended_security = CIFSSEC_DEF;
+/* unsigned int ntlmv2_support = 0; */
 unsigned int sign_CIFS_PDUs = 1;
 extern struct task_struct * oplockThread; /* remove sparse warning */
 struct task_struct * oplockThread = NULL;
@@ -908,7 +908,7 @@ static int cifs_dnotify_thread(void * dummyarg)
 	struct cifsSesInfo *ses;
 
 	do {
-		if(try_to_freeze())
+		if (try_to_freeze())
 			continue;
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(15*HZ);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index d56c0577c710..8f75c6f24701 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -32,7 +32,8 @@
 #define TRUE 1
 #endif
 
-extern struct address_space_operations cifs_addr_ops;
+extern const struct address_space_operations cifs_addr_ops;
+extern const struct address_space_operations cifs_addr_ops_smallbuf;
 
 /* Functions related to super block operations */
 extern struct super_operations cifs_super_ops;
@@ -99,5 +100,5 @@ extern ssize_t	cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t	cifs_listxattr(struct dentry *, char *, size_t);
 extern int cifs_ioctl (struct inode * inode, struct file * filep,
 		       unsigned int command, unsigned long arg);
-#define CIFS_VERSION   "1.43"
+#define CIFS_VERSION   "1.44"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 006eb33bff5f..6d7cf5f3bc0b 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -88,7 +88,8 @@ enum statusEnum {
 };
 
 enum securityEnum {
-	NTLM = 0,		/* Legacy NTLM012 auth with NTLM hash */
+	LANMAN = 0,             /* Legacy LANMAN auth */
+	NTLM,			/* Legacy NTLM012 auth with NTLM hash */
 	NTLMv2,			/* Legacy NTLM auth with NTLMv2 hash */
 	RawNTLMSSP,		/* NTLMSSP without SPNEGO */
 	NTLMSSP,		/* NTLMSSP via SPNEGO */
@@ -157,7 +158,7 @@ struct TCP_Server_Info {
 	/* 16th byte of RFC1001 workstation name is always null */
 	char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL];
 	__u32 sequence_number; /* needed for CIFS PDU signature */
-	char mac_signing_key[CIFS_SESSION_KEY_SIZE + 16]; 
+	char mac_signing_key[CIFS_SESS_KEY_SIZE + 16]; 
 };
 
 /*
@@ -179,10 +180,13 @@ struct cifsUidInfo {
 struct cifsSesInfo {
 	struct list_head cifsSessionList;
 	struct semaphore sesSem;
+#if 0
 	struct cifsUidInfo *uidInfo;	/* pointer to user info */
+#endif
 	struct TCP_Server_Info *server;	/* pointer to server info */
 	atomic_t inUse; /* # of mounts (tree connections) on this ses */
 	enum statusEnum status;
+	unsigned overrideSecFlg;  /* if non-zero override global sec flags */
 	__u16 ipc_tid;		/* special tid for connection to IPC share */
 	__u16 flags;
 	char *serverOS;		/* name of operating system underlying server */
@@ -194,7 +198,7 @@ struct cifsSesInfo {
 	char serverName[SERVER_NAME_LEN_WITH_NULL * 2];	/* BB make bigger for 
 				TCP names - will ipv6 and sctp addresses fit? */
 	char userName[MAX_USERNAME_SIZE + 1];
-	char domainName[MAX_USERNAME_SIZE + 1];
+	char * domainName;
 	char * password;
 };
 /* session flags */
@@ -209,12 +213,12 @@ struct cifsTconInfo {
 	struct list_head openFileList;
 	struct semaphore tconSem;
 	struct cifsSesInfo *ses;	/* pointer to session associated with */
-	char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource (in ASCII not UTF) */
+	char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
 	char *nativeFileSystem;
 	__u16 tid;		/* The 2 byte tree id */
 	__u16 Flags;		/* optional support bits */
 	enum statusEnum tidStatus;
-	atomic_t useCount;	/* how many mounts (explicit or implicit) to this share */
+	atomic_t useCount;	/* how many explicit/implicit mounts to share */
 #ifdef CONFIG_CIFS_STATS
 	atomic_t num_smbs_sent;
 	atomic_t num_writes;
@@ -254,7 +258,7 @@ struct cifsTconInfo {
 	spinlock_t stat_lock;
 #endif /* CONFIG_CIFS_STATS */
 	FILE_SYSTEM_DEVICE_INFO fsDevInfo;
-	FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo;	/* ok if file system name truncated */
+	FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
 	FILE_SYSTEM_UNIX_INFO fsUnixInfo;
 	unsigned retry:1;
 	unsigned nocase:1;
@@ -305,7 +309,6 @@ struct cifsFileInfo {
 	atomic_t wrtPending;   /* handle in use - defer close */
 	struct semaphore fh_sem; /* prevents reopen race after dead ses*/
 	char * search_resume_name; /* BB removeme BB */
-	unsigned int resume_name_length; /* BB removeme - field renamed and moved BB */
 	struct cifs_search_info srch_inf;
 };
 
@@ -391,9 +394,9 @@ struct mid_q_entry {
 	struct smb_hdr *resp_buf;	/* response buffer */
 	int midState;	/* wish this were enum but can not pass to wait_event */
 	__u8 command;	/* smb command code */
-	unsigned multiPart:1;	/* multiple responses to one SMB request */
 	unsigned largeBuf:1;    /* if valid response, is pointer to large buf */
-	unsigned multiResp:1;   /* multiple trans2 responses for one request  */
+	unsigned multiRsp:1;   /* multiple trans2 responses for one request  */
+	unsigned multiEnd:1; /* both received */
 };
 
 struct oplock_q_entry {
@@ -430,15 +433,35 @@ struct dir_notify_req {
 #define   CIFS_LARGE_BUFFER     2
 #define   CIFS_IOVEC            4    /* array of response buffers */
 
-/* Type of session setup needed */
-#define   CIFS_PLAINTEXT	0
-#define   CIFS_LANMAN		1
-#define   CIFS_NTLM		2
-#define   CIFS_NTLMSSP_NEG	3
-#define   CIFS_NTLMSSP_AUTH	4
-#define   CIFS_SPNEGO_INIT	5
-#define   CIFS_SPNEGO_TARG	6
-
+/* Security Flags: indicate type of session setup needed */
+#define   CIFSSEC_MAY_SIGN	0x00001
+#define   CIFSSEC_MAY_NTLM	0x00002
+#define   CIFSSEC_MAY_NTLMV2	0x00004
+#define   CIFSSEC_MAY_KRB5	0x00008
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+#define   CIFSSEC_MAY_LANMAN	0x00010
+#define   CIFSSEC_MAY_PLNTXT	0x00020
+#endif /* weak passwords */
+#define   CIFSSEC_MAY_SEAL	0x00040 /* not supported yet */
+
+#define   CIFSSEC_MUST_SIGN	0x01001
+/* note that only one of the following can be set so the
+result of setting MUST flags more than once will be to
+require use of the stronger protocol */
+#define   CIFSSEC_MUST_NTLM	0x02002
+#define   CIFSSEC_MUST_NTLMV2	0x04004
+#define   CIFSSEC_MUST_KRB5	0x08008
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+#define   CIFSSEC_MUST_LANMAN	0x10010
+#define   CIFSSEC_MUST_PLNTXT	0x20020
+#define   CIFSSEC_MASK          0x37037 /* current flags supported if weak */
+#else	  
+#define	  CIFSSEC_MASK          0x07007 /* flags supported if no weak config */
+#endif /* WEAK_PW_HASH */
+#define   CIFSSEC_MUST_SEAL	0x40040 /* not supported yet */
+
+#define   CIFSSEC_DEF  CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2
+#define   CIFSSEC_MAX  CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2
 /*
  *****************************************************************
  * All constants go here
@@ -500,16 +523,16 @@ GLOBAL_EXTERN rwlock_t GlobalSMBSeslock;  /* protects list inserts on 3 above */
 GLOBAL_EXTERN struct list_head GlobalOplock_Q;
 
 GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; /* Outstanding dir notify requests */
-GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q; /* Dir notify response queue */
+GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q;/* DirNotify response queue */
 
 /*
  * Global transaction id (XID) information
  */
 GLOBAL_EXTERN unsigned int GlobalCurrentXid;	/* protected by GlobalMid_Sem */
-GLOBAL_EXTERN unsigned int GlobalTotalActiveXid;	/* prot by GlobalMid_Sem */
+GLOBAL_EXTERN unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */
 GLOBAL_EXTERN unsigned int GlobalMaxActiveXid;	/* prot by GlobalMid_Sem */
-GLOBAL_EXTERN spinlock_t GlobalMid_Lock;  /* protects above and list operations */
-					/* on midQ entries */
+GLOBAL_EXTERN spinlock_t GlobalMid_Lock;  /* protects above & list operations */
+					  /* on midQ entries */
 GLOBAL_EXTERN char Local_System_Name[15];
 
 /*
@@ -531,7 +554,7 @@ GLOBAL_EXTERN atomic_t smBufAllocCount;
 GLOBAL_EXTERN atomic_t midCount;
 
 /* Misc globals */
-GLOBAL_EXTERN unsigned int multiuser_mount;	/* if enabled allows new sessions
+GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions
 				to be established on existing mount if we
 				have the uid/password or Kerberos credential 
 				or equivalent for current user */
@@ -540,8 +563,8 @@ GLOBAL_EXTERN unsigned int experimEnabled;
 GLOBAL_EXTERN unsigned int lookupCacheEnabled;
 GLOBAL_EXTERN unsigned int extended_security;	/* if on, session setup sent 
 				with more secure ntlmssp2 challenge/resp */
-GLOBAL_EXTERN unsigned int ntlmv2_support;  /* better optional password hash */
 GLOBAL_EXTERN unsigned int sign_CIFS_PDUs;  /* enable smb packet signing */
+GLOBAL_EXTERN unsigned int secFlags;
 GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
 GLOBAL_EXTERN unsigned int CIFSMaxBufSize;  /* max size not including hdr */
 GLOBAL_EXTERN unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b2233ac05bd2..86239023545b 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -16,7 +16,7 @@
  *
  *   You should have received a copy of the GNU Lesser General Public License
  *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #ifndef _CIFSPDU_H
@@ -24,8 +24,14 @@
 
 #include <net/sock.h>
 
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+#define LANMAN_PROT 0
+#define CIFS_PROT   1
+#else
 #define CIFS_PROT   0
-#define BAD_PROT    CIFS_PROT+1
+#endif
+#define POSIX_PROT  CIFS_PROT+1
+#define BAD_PROT 0xFFFF
 
 /* SMB command codes */
 /* Some commands have minimal (wct=0,bcc=0), or uninteresting, responses
@@ -110,7 +116,7 @@
 /*
  * Size of the session key (crypto key encrypted with the password
  */
-#define CIFS_SESSION_KEY_SIZE (24)
+#define CIFS_SESS_KEY_SIZE (24)
 
 /*
  * Maximum user name length
@@ -400,6 +406,29 @@ typedef struct negotiate_req {
 	unsigned char DialectsArray[1];
 } __attribute__((packed)) NEGOTIATE_REQ;
 
+/* Dialect index is 13 for LANMAN */
+
+typedef struct lanman_neg_rsp {
+	struct smb_hdr hdr;	/* wct = 13 */
+	__le16 DialectIndex;
+	__le16 SecurityMode;
+	__le16 MaxBufSize;
+	__le16 MaxMpxCount;
+	__le16 MaxNumberVcs;
+	__le16 RawMode;
+	__le32 SessionKey;
+	__le32 ServerTime;
+	__le16 ServerTimeZone;
+	__le16 EncryptionKeyLength;
+	__le16 Reserved;
+	__u16  ByteCount;
+	unsigned char EncryptionKey[1];
+} __attribute__((packed)) LANMAN_NEG_RSP;
+
+#define READ_RAW_ENABLE 1
+#define WRITE_RAW_ENABLE 2
+#define RAW_ENABLE (READ_RAW_ENABLE | WRITE_RAW_ENABLE)
+
 typedef struct negotiate_rsp {
 	struct smb_hdr hdr;	/* wct = 17 */
 	__le16 DialectIndex;
@@ -509,7 +538,7 @@ typedef union smb_com_session_setup_andx {
 /*      unsigned char  * NativeOS;      */
 /*	unsigned char  * NativeLanMan;  */
 /*      unsigned char  * PrimaryDomain; */
-	} __attribute__((packed)) resp;			/* NTLM response format (with or without extended security */
+	} __attribute__((packed)) resp;	/* NTLM response with or without extended sec*/
 
 	struct {		/* request format */
 		struct smb_hdr hdr;	/* wct = 10 */
@@ -520,8 +549,8 @@ typedef union smb_com_session_setup_andx {
 		__le16 MaxMpxCount;
 		__le16 VcNumber;
 		__u32 SessionKey;
-		__le16 PassswordLength;
-		__u32 Reserved;
+		__le16 PasswordLength;
+		__u32 Reserved; /* encrypt key len and offset */
 		__le16 ByteCount;
 		unsigned char AccountPassword[1];	/* followed by */
 		/* STRING AccountName */
@@ -543,6 +572,26 @@ typedef union smb_com_session_setup_andx {
 	} __attribute__((packed)) old_resp; /* pre-NTLM (LANMAN2.1) response */
 } __attribute__((packed)) SESSION_SETUP_ANDX;
 
+/* format of NLTMv2 Response ie "case sensitive password" hash when NTLMv2 */
+
+struct ntlmssp2_name {
+	__le16 type;
+	__le16 length;
+/*	char   name[length]; */
+} __attribute__((packed));
+
+struct ntlmv2_resp {
+	char ntlmv2_hash[CIFS_ENCPWD_SIZE];
+	__le32 blob_signature;
+	__u32  reserved;
+	__le64  time;
+	__u64  client_chal; /* random */
+	__u32  reserved2;
+	struct ntlmssp2_name names[1];
+	/* array of name entries could follow ending in minimum 4 byte struct */
+} __attribute__((packed));
+
+
 #define CIFS_NETWORK_OPSYS "CIFS VFS Client for Linux"
 
 /* Capabilities bits (for NTLM SessSetup request) */
@@ -573,7 +622,9 @@ typedef struct smb_com_tconx_req {
 } __attribute__((packed)) TCONX_REQ;
 
 typedef struct smb_com_tconx_rsp {
-	struct smb_hdr hdr;	/* wct = 3 *//* note that Win2000 has sent wct=7 in some cases on responses. Four unspecified words followed OptionalSupport */
+	struct smb_hdr hdr;	/* wct = 3 note that Win2000 has sent wct = 7
+				 in some cases on responses. Four unspecified
+				 words followed OptionalSupport */
 	__u8 AndXCommand;
 	__u8 AndXReserved;
 	__le16 AndXOffset;
@@ -1323,6 +1374,9 @@ struct smb_t2_rsp {
 #define SMB_FILE_MAXIMUM_INFO           0x40d
 
 /* Find File infolevels */
+#define SMB_FIND_FILE_INFO_STANDARD       0x001
+#define SMB_FIND_FILE_QUERY_EA_SIZE       0x002
+#define SMB_FIND_FILE_QUERY_EAS_FROM_LIST 0x003
 #define SMB_FIND_FILE_DIRECTORY_INFO      0x101
 #define SMB_FIND_FILE_FULL_DIRECTORY_INFO 0x102
 #define SMB_FIND_FILE_NAMES_INFO          0x103
@@ -1844,13 +1898,13 @@ typedef struct {
 typedef struct {
 	__le32 DeviceType;
 	__le32 DeviceCharacteristics;
-} __attribute__((packed)) FILE_SYSTEM_DEVICE_INFO;	/* device info, level 0x104 */
+} __attribute__((packed)) FILE_SYSTEM_DEVICE_INFO; /* device info level 0x104 */
 
 typedef struct {
 	__le32 Attributes;
 	__le32 MaxPathNameComponentLength;
 	__le32 FileSystemNameLen;
-	char FileSystemName[52]; /* do not really need to save this - so potentially get only subset of name */
+	char FileSystemName[52]; /* do not have to save this - get subset? */
 } __attribute__((packed)) FILE_SYSTEM_ATTRIBUTE_INFO;
 
 /******************************************************************************/
@@ -1947,7 +2001,8 @@ typedef struct {
 
 struct file_allocation_info {
 	__le64 AllocationSize; /* Note old Samba srvr rounds this up too much */
-} __attribute__((packed));	/* size used on disk, level 0x103 for set, 0x105 for query */
+} __attribute__((packed));	/* size used on disk, for level 0x103 for set,
+				   0x105 for query */
 
 struct file_end_of_file_info {
 	__le64 FileSize;		/* offset to end of file */
@@ -2054,7 +2109,7 @@ typedef struct {
 	__le32 ExtFileAttributes;
 	__le32 FileNameLength;
 	char FileName[1];
-} __attribute__((packed)) FILE_DIRECTORY_INFO;   /* level 0x101 FF response data area */
+} __attribute__((packed)) FILE_DIRECTORY_INFO;   /* level 0x101 FF resp data */
 
 typedef struct {
 	__le32 NextEntryOffset;
@@ -2069,7 +2124,7 @@ typedef struct {
 	__le32 FileNameLength;
 	__le32 EaSize; /* length of the xattrs */
 	char FileName[1];
-} __attribute__((packed)) FILE_FULL_DIRECTORY_INFO;   /* level 0x102 FF response data area */
+} __attribute__((packed)) FILE_FULL_DIRECTORY_INFO; /* level 0x102 rsp data */
 
 typedef struct {
 	__le32 NextEntryOffset;
@@ -2086,7 +2141,7 @@ typedef struct {
 	__le32 Reserved;
 	__u64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/
 	char FileName[1];
-} __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO;   /* level 0x105 FF response data area */
+} __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO; /* level 0x105 FF rsp data */
 
 typedef struct {
 	__le32 NextEntryOffset;
@@ -2104,7 +2159,22 @@ typedef struct {
 	__u8   Reserved;
 	__u8   ShortName[12];
 	char FileName[1];
-} __attribute__((packed)) FILE_BOTH_DIRECTORY_INFO;   /* level 0x104 FF response data area */
+} __attribute__((packed)) FILE_BOTH_DIRECTORY_INFO; /* level 0x104 FFrsp data */
+
+typedef struct {
+	__u32  ResumeKey;
+	__le16 CreationDate; /* SMB Date */
+	__le16 CreationTime; /* SMB Time */
+	__le16 LastAccessDate;
+	__le16 LastAccessTime;
+	__le16 LastWriteDate;
+	__le16 LastWriteTime;
+	__le32 DataSize; /* File Size (EOF) */
+	__le32 AllocationSize;
+	__le16 Attributes; /* verify not u32 */
+	__u8   FileNameLength;
+	char FileName[1];
+} __attribute__((packed)) FIND_FILE_STANDARD_INFO; /* level 0x1 FF resp data */
 
 
 struct win_dev {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 310ea2f0e0bf..a5ddc62d6fe6 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -64,14 +64,12 @@ extern int map_smb_to_linux_error(struct smb_hdr *smb);
 extern void header_assemble(struct smb_hdr *, char /* command */ ,
 			    const struct cifsTconInfo *, int /* length of
 			    fixed section (word count) in two byte units */);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
 				struct cifsSesInfo *ses,
 				void ** request_buf);
 extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
-			     const int stage, int * pNTLMv2_flg,
+			     const int stage, 
 			     const struct nls_table *nls_cp);
-#endif
 extern __u16 GetNextMid(struct TCP_Server_Info *server);
 extern struct oplock_q_entry * AllocOplockQEntry(struct inode *, u16, 
 						 struct cifsTconInfo *);
@@ -285,8 +283,14 @@ extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
 extern int cifs_verify_signature(struct smb_hdr *, const char * mac_key,
 	__u32 expected_sequence_number);
 extern int cifs_calculate_mac_key(char * key,const char * rn,const char * pass);
-extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *, struct nls_table *);
-extern void CalcNTLMv2_response(const struct cifsSesInfo *,char * );
+extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *, 
+			const struct nls_table *);
+extern void CalcNTLMv2_response(const struct cifsSesInfo *, char * );
+extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, 
+			     const struct nls_table *);
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+extern void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key);
+#endif /* CIFS_WEAK_PW_HASH */
 extern int CIFSSMBCopy(int xid,
 			struct cifsTconInfo *source_tcon,
 			const char *fromName,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 925881e00ff2..19678c575dfc 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -44,8 +44,11 @@ static struct {
 	int index;
 	char *name;
 } protocols[] = {
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+	{LANMAN_PROT, "\2LM1.2X002"},
+#endif /* weak password hashing for legacy clients */
 	{CIFS_PROT, "\2NT LM 0.12"}, 
-	{CIFS_PROT, "\2POSIX 2"},
+	{POSIX_PROT, "\2POSIX 2"},
 	{BAD_PROT, "\2"}
 };
 #else
@@ -53,11 +56,29 @@ static struct {
 	int index;
 	char *name;
 } protocols[] = {
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+	{LANMAN_PROT, "\2LM1.2X002"},
+#endif /* weak password hashing for legacy clients */
 	{CIFS_PROT, "\2NT LM 0.12"}, 
 	{BAD_PROT, "\2"}
 };
 #endif
 
+/* define the number of elements in the cifs dialect array */
+#ifdef CONFIG_CIFS_POSIX
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+#define CIFS_NUM_PROT 3
+#else
+#define CIFS_NUM_PROT 2
+#endif /* CIFS_WEAK_PW_HASH */
+#else /* not posix */
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+#define CIFS_NUM_PROT 2
+#else
+#define CIFS_NUM_PROT 1
+#endif /* CONFIG_CIFS_WEAK_PW_HASH */
+#endif /* CIFS_POSIX */
+
 
 /* Mark as invalid, all open files on tree connections since they
    were closed when session to server was lost */
@@ -188,7 +209,6 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
 	return rc;
 }
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL  
 int
 small_smb_init_no_tc(const int smb_command, const int wct, 
 		     struct cifsSesInfo *ses, void **request_buf)
@@ -214,7 +234,6 @@ small_smb_init_no_tc(const int smb_command, const int wct,
 
 	return rc;
 }
-#endif  /* CONFIG_CIFS_EXPERIMENTAL */
 
 /* If the return code is zero, this function must fill in request_buf pointer */
 static int
@@ -322,7 +341,8 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
     /* potential retries of smb operations it turns out we can determine */
     /* from the mid flags when the request buffer can be resent without  */
     /* having to use a second distinct buffer for the response */
-	*response_buf = *request_buf; 
+	if(response_buf)
+		*response_buf = *request_buf; 
 
 	header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon,
 			wct /*wct */ );
@@ -373,8 +393,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 	NEGOTIATE_RSP *pSMBr;
 	int rc = 0;
 	int bytes_returned;
+	int i;
 	struct TCP_Server_Info * server;
 	u16 count;
+	unsigned int secFlags;
 
 	if(ses->server)
 		server = ses->server;
@@ -386,101 +408,200 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 		      (void **) &pSMB, (void **) &pSMBr);
 	if (rc)
 		return rc;
+
+	/* if any of auth flags (ie not sign or seal) are overriden use them */
+	if(ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
+		secFlags = ses->overrideSecFlg;
+	else /* if override flags set only sign/seal OR them with global auth */
+		secFlags = extended_security | ses->overrideSecFlg;
+
+	cFYI(1,("secFlags 0x%x",secFlags));
+
 	pSMB->hdr.Mid = GetNextMid(server);
 	pSMB->hdr.Flags2 |= SMBFLG2_UNICODE;
-	if (extended_security)
+	if((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
 		pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
-
-	count = strlen(protocols[0].name) + 1;
-	strncpy(pSMB->DialectsArray, protocols[0].name, 30);	
-    /* null guaranteed to be at end of source and target buffers anyway */
-
+	
+	count = 0;
+	for(i=0;i<CIFS_NUM_PROT;i++) {
+		strncpy(pSMB->DialectsArray+count, protocols[i].name, 16);
+		count += strlen(protocols[i].name) + 1;
+		/* null at end of source and target buffers anyway */
+	}
 	pSMB->hdr.smb_buf_length += count;
 	pSMB->ByteCount = cpu_to_le16(count);
 
 	rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
-	if (rc == 0) {
-		server->secMode = pSMBr->SecurityMode;
-		if((server->secMode & SECMODE_USER) == 0)
-			cFYI(1,("share mode security"));
-		server->secType = NTLM; /* BB override default for
-					   NTLMv2 or kerberos v5 */
-		/* one byte - no need to convert this or EncryptionKeyLen
-		   from little endian */
-		server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount);
-		/* probably no need to store and check maxvcs */
-		server->maxBuf =
-			min(le32_to_cpu(pSMBr->MaxBufferSize),
+	if (rc != 0) 
+		goto neg_err_exit;
+
+	cFYI(1,("Dialect: %d", pSMBr->DialectIndex));
+	/* Check wct = 1 error case */
+	if((pSMBr->hdr.WordCount < 13) || (pSMBr->DialectIndex == BAD_PROT)) {
+		/* core returns wct = 1, but we do not ask for core - otherwise
+		small wct just comes when dialect index is -1 indicating we 
+		could not negotiate a common dialect */
+		rc = -EOPNOTSUPP;
+		goto neg_err_exit;
+#ifdef CONFIG_CIFS_WEAK_PW_HASH 
+	} else if((pSMBr->hdr.WordCount == 13)
+			&& (pSMBr->DialectIndex == LANMAN_PROT)) {
+		struct lanman_neg_rsp * rsp = (struct lanman_neg_rsp *)pSMBr;
+
+		if((secFlags & CIFSSEC_MAY_LANMAN) || 
+			(secFlags & CIFSSEC_MAY_PLNTXT))
+			server->secType = LANMAN;
+		else {
+			cERROR(1, ("mount failed weak security disabled"
+				   " in /proc/fs/cifs/SecurityFlags"));
+			rc = -EOPNOTSUPP;
+			goto neg_err_exit;
+		}	
+		server->secMode = (__u8)le16_to_cpu(rsp->SecurityMode);
+		server->maxReq = le16_to_cpu(rsp->MaxMpxCount);
+		server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize),
+				(__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
+		GETU32(server->sessid) = le32_to_cpu(rsp->SessionKey);
+		/* even though we do not use raw we might as well set this
+		accurately, in case we ever find a need for it */
+		if((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) {
+			server->maxRw = 0xFF00;
+			server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE;
+		} else {
+			server->maxRw = 0;/* we do not need to use raw anyway */
+			server->capabilities = CAP_MPX_MODE;
+		}
+		server->timeZone = le16_to_cpu(rsp->ServerTimeZone);
+
+		/* BB get server time for time conversions and add
+		code to use it and timezone since this is not UTC */	
+
+		if (rsp->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
+			memcpy(server->cryptKey, rsp->EncryptionKey,
+				CIFS_CRYPTO_KEY_SIZE);
+		} else if (server->secMode & SECMODE_PW_ENCRYPT) {
+			rc = -EIO; /* need cryptkey unless plain text */
+			goto neg_err_exit;
+		}
+
+		cFYI(1,("LANMAN negotiated"));
+		/* we will not end up setting signing flags - as no signing
+		was in LANMAN and server did not return the flags on */
+		goto signing_check;
+#else /* weak security disabled */
+	} else if(pSMBr->hdr.WordCount == 13) {
+		cERROR(1,("mount failed, cifs module not built "
+			  "with CIFS_WEAK_PW_HASH support"));
+			rc = -EOPNOTSUPP;
+#endif /* WEAK_PW_HASH */
+		goto neg_err_exit;
+	} else if(pSMBr->hdr.WordCount != 17) {
+		/* unknown wct */
+		rc = -EOPNOTSUPP;
+		goto neg_err_exit;
+	}
+	/* else wct == 17 NTLM */
+	server->secMode = pSMBr->SecurityMode;
+	if((server->secMode & SECMODE_USER) == 0)
+		cFYI(1,("share mode security"));
+
+	if((server->secMode & SECMODE_PW_ENCRYPT) == 0)
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+		if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0)
+#endif /* CIFS_WEAK_PW_HASH */
+			cERROR(1,("Server requests plain text password"
+				  " but client support disabled"));
+
+	if((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2)
+		server->secType = NTLMv2;
+	else if(secFlags & CIFSSEC_MAY_NTLM)
+		server->secType = NTLM;
+	else if(secFlags & CIFSSEC_MAY_NTLMV2)
+		server->secType = NTLMv2;
+	/* else krb5 ... any others ... */
+
+	/* one byte, so no need to convert this or EncryptionKeyLen from
+	   little endian */
+	server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount);
+	/* probably no need to store and check maxvcs */
+	server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize),
 			(__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
-		server->maxRw = le32_to_cpu(pSMBr->MaxRawSize);
-		cFYI(0, ("Max buf = %d", ses->server->maxBuf));
-		GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
-		server->capabilities = le32_to_cpu(pSMBr->Capabilities);
-		server->timeZone = le16_to_cpu(pSMBr->ServerTimeZone);	
-        /* BB with UTC do we ever need to be using srvr timezone? */
-		if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
-			memcpy(server->cryptKey, pSMBr->u.EncryptionKey,
-			       CIFS_CRYPTO_KEY_SIZE);
-		} else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC)
-			   && (pSMBr->EncryptionKeyLength == 0)) {
-			/* decode security blob */
-		} else
-			rc = -EIO;
+	server->maxRw = le32_to_cpu(pSMBr->MaxRawSize);
+	cFYI(0, ("Max buf = %d", ses->server->maxBuf));
+	GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
+	server->capabilities = le32_to_cpu(pSMBr->Capabilities);
+	server->timeZone = le16_to_cpu(pSMBr->ServerTimeZone);	
+	if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
+		memcpy(server->cryptKey, pSMBr->u.EncryptionKey,
+		       CIFS_CRYPTO_KEY_SIZE);
+	} else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC)
+			&& (pSMBr->EncryptionKeyLength == 0)) {
+		/* decode security blob */
+	} else if (server->secMode & SECMODE_PW_ENCRYPT) {
+		rc = -EIO; /* no crypt key only if plain text pwd */
+		goto neg_err_exit;
+	}
 
-		/* BB might be helpful to save off the domain of server here */
+	/* BB might be helpful to save off the domain of server here */
 
-		if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) && 
-			(server->capabilities & CAP_EXTENDED_SECURITY)) {
-			count = pSMBr->ByteCount;
-			if (count < 16)
-				rc = -EIO;
-			else if (count == 16) {
-				server->secType = RawNTLMSSP;
-				if (server->socketUseCount.counter > 1) {
-					if (memcmp
-						(server->server_GUID,
-						pSMBr->u.extended_response.
-						GUID, 16) != 0) {
-						cFYI(1, ("server UID changed"));
-						memcpy(server->
-							server_GUID,
-							pSMBr->u.
-							extended_response.
-							GUID, 16);
-					}
-				} else
+	if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) && 
+		(server->capabilities & CAP_EXTENDED_SECURITY)) {
+		count = pSMBr->ByteCount;
+		if (count < 16)
+			rc = -EIO;
+		else if (count == 16) {
+			server->secType = RawNTLMSSP;
+			if (server->socketUseCount.counter > 1) {
+				if (memcmp(server->server_GUID,
+					   pSMBr->u.extended_response.
+					   GUID, 16) != 0) {
+					cFYI(1, ("server UID changed"));
 					memcpy(server->server_GUID,
-					       pSMBr->u.extended_response.
-					       GUID, 16);
-			} else {
-				rc = decode_negTokenInit(pSMBr->u.
-							 extended_response.
-							 SecurityBlob,
-							 count - 16,
-							 &server->secType);
-				if(rc == 1) {
-				/* BB Need to fill struct for sessetup here */
-					rc = -EOPNOTSUPP;
-				} else {
-					rc = -EINVAL;
+						pSMBr->u.extended_response.GUID,
+						16);
 				}
+			} else
+				memcpy(server->server_GUID,
+				       pSMBr->u.extended_response.GUID, 16);
+		} else {
+			rc = decode_negTokenInit(pSMBr->u.extended_response.
+						 SecurityBlob,
+						 count - 16,
+						 &server->secType);
+			if(rc == 1) {
+			/* BB Need to fill struct for sessetup here */
+				rc = -EOPNOTSUPP;
+			} else {
+				rc = -EINVAL;
 			}
-		} else
-			server->capabilities &= ~CAP_EXTENDED_SECURITY;
-		if(sign_CIFS_PDUs == FALSE) {        
-			if(server->secMode & SECMODE_SIGN_REQUIRED)
-				cERROR(1,
-				 ("Server requires /proc/fs/cifs/PacketSigningEnabled"));
-			server->secMode &= ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
-		} else if(sign_CIFS_PDUs == 1) {
-			if((server->secMode & SECMODE_SIGN_REQUIRED) == 0)
-				server->secMode &= ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
 		}
-				
+	} else
+		server->capabilities &= ~CAP_EXTENDED_SECURITY;
+
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+signing_check:
+#endif
+	if(sign_CIFS_PDUs == FALSE) {        
+		if(server->secMode & SECMODE_SIGN_REQUIRED)
+			cERROR(1,("Server requires "
+				 "/proc/fs/cifs/PacketSigningEnabled to be on"));
+		server->secMode &= 
+			~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
+	} else if(sign_CIFS_PDUs == 1) {
+		if((server->secMode & SECMODE_SIGN_REQUIRED) == 0)
+			server->secMode &= 
+				~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
+	} else if(sign_CIFS_PDUs == 2) {
+		if((server->secMode & 
+			(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) {
+			cERROR(1,("signing required but server lacks support"));
+		}
 	}
-	
+neg_err_exit:	
 	cifs_buf_release(pSMB);
+
+	cFYI(1,("negprot rc %d",rc));
 	return rc;
 }
 
@@ -2239,7 +2360,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
 			}
 			symlinkinfo[buflen] = 0; /* just in case so the caller
 					does not go off the end of the buffer */
-			cFYI(1,("readlink result - %s ",symlinkinfo));
+			cFYI(1,("readlink result - %s",symlinkinfo));
 		}
 	}
 qreparse_out:
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index bae1479318d1..876eb9ef85fe 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -49,8 +49,6 @@
 
 static DECLARE_COMPLETION(cifsd_complete);
 
-extern void SMBencrypt(unsigned char *passwd, unsigned char *c8,
-		       unsigned char *p24);
 extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
 			 unsigned char *p24);
 
@@ -70,6 +68,7 @@ struct smb_vol {
 	gid_t linux_gid;
 	mode_t file_mode;
 	mode_t dir_mode;
+	unsigned secFlg;
 	unsigned rw:1;
 	unsigned retry:1;
 	unsigned intr:1;
@@ -83,12 +82,7 @@ struct smb_vol {
 	unsigned remap:1;   /* set to remap seven reserved chars in filenames */
 	unsigned posix_paths:1;   /* unset to not ask for posix pathnames. */
 	unsigned sfu_emul:1;
-	unsigned krb5:1;
-	unsigned ntlm:1;
-	unsigned ntlmv2:1;
 	unsigned nullauth:1; /* attempt to authenticate with null user */
-	unsigned sign:1;
-	unsigned seal:1;     /* encrypt */
 	unsigned nocase;     /* request case insensitive filenames */
 	unsigned nobrl;      /* disable sending byte range locks to srv */
 	unsigned int rsize;
@@ -369,21 +363,21 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 			continue;
 		if (bigbuf == NULL) {
 			bigbuf = cifs_buf_get();
-			if(bigbuf == NULL) {
-				cERROR(1,("No memory for large SMB response"));
+			if (!bigbuf) {
+				cERROR(1, ("No memory for large SMB response"));
 				msleep(3000);
 				/* retry will check if exiting */
 				continue;
 			}
-		} else if(isLargeBuf) {
-			/* we are reusing a dirtry large buf, clear its start */
+		} else if (isLargeBuf) {
+			/* we are reusing a dirty large buf, clear its start */
 			memset(bigbuf, 0, sizeof (struct smb_hdr));
 		}
 
 		if (smallbuf == NULL) {
 			smallbuf = cifs_small_buf_get();
-			if(smallbuf == NULL) {
-				cERROR(1,("No memory for SMB response"));
+			if (!smallbuf) {
+				cERROR(1, ("No memory for SMB response"));
 				msleep(1000);
 				/* retry will check if exiting */
 				continue;
@@ -403,12 +397,12 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 		    kernel_recvmsg(csocket, &smb_msg,
 				 &iov, 1, 4, 0 /* BB see socket.h flags */);
 
-		if(server->tcpStatus == CifsExiting) {
+		if (server->tcpStatus == CifsExiting) {
 			break;
 		} else if (server->tcpStatus == CifsNeedReconnect) {
-			cFYI(1,("Reconnect after server stopped responding"));
+			cFYI(1, ("Reconnect after server stopped responding"));
 			cifs_reconnect(server);
-			cFYI(1,("call to reconnect done"));
+			cFYI(1, ("call to reconnect done"));
 			csocket = server->ssocket;
 			continue;
 		} else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) {
@@ -417,15 +411,15 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 				tcpStatus CifsNeedReconnect if server hung */
 			continue;
 		} else if (length <= 0) {
-			if(server->tcpStatus == CifsNew) {
-				cFYI(1,("tcp session abend after SMBnegprot"));
+			if (server->tcpStatus == CifsNew) {
+				cFYI(1, ("tcp session abend after SMBnegprot"));
 				/* some servers kill the TCP session rather than
 				   returning an SMB negprot error, in which
 				   case reconnecting here is not going to help,
 				   and so simply return error to mount */
 				break;
 			}
-			if(length == -EINTR) { 
+			if (!try_to_freeze() && (length == -EINTR)) {
 				cFYI(1,("cifsd thread killed"));
 				break;
 			}
@@ -585,9 +579,11 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 						/* merge response - fix up 1st*/
 						if(coalesce_t2(smb_buffer, 
 							mid_entry->resp_buf)) {
+							mid_entry->multiRsp = 1;
 							break;
 						} else {
 							/* all parts received */
+							mid_entry->multiEnd = 1;
 							goto multi_t2_fnd; 
 						}
 					} else {
@@ -632,9 +628,14 @@ multi_t2_fnd:
 			wake_up_process(task_to_wake);
 		} else if ((is_valid_oplock_break(smb_buffer, server) == FALSE)
 		    && (isMultiRsp == FALSE)) {                          
-			cERROR(1, ("No task to wake, unknown frame rcvd!"));
+			cERROR(1, ("No task to wake, unknown frame rcvd! NumMids %d", midCount.counter));
 			cifs_dump_mem("Received Data is: ",(char *)smb_buffer,
 				      sizeof(struct smb_hdr));
+#ifdef CONFIG_CIFS_DEBUG2
+			cifs_dump_detail(smb_buffer);
+			cifs_dump_mids(server);
+#endif /* CIFS_DEBUG2 */
+			
 		}
 	} /* end while !EXITING */
 
@@ -784,7 +785,6 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 
 	/* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
 	vol->rw = TRUE;
-	vol->ntlm = TRUE;
 	/* default is always to request posix paths. */
 	vol->posix_paths = 1;
 
@@ -915,30 +915,35 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 				cERROR(1,("no security value specified"));
                                 continue;
                         } else if (strnicmp(value, "krb5i", 5) == 0) {
-				vol->sign = 1;
-				vol->krb5 = 1;
+				vol->secFlg |= CIFSSEC_MAY_KRB5 | 
+					CIFSSEC_MUST_SIGN;
 			} else if (strnicmp(value, "krb5p", 5) == 0) {
-				/* vol->seal = 1; 
-				   vol->krb5 = 1; */
+				/* vol->secFlg |= CIFSSEC_MUST_SEAL | 
+					CIFSSEC_MAY_KRB5; */ 
 				cERROR(1,("Krb5 cifs privacy not supported"));
 				return 1;
 			} else if (strnicmp(value, "krb5", 4) == 0) {
-				vol->krb5 = 1;
+				vol->secFlg |= CIFSSEC_MAY_KRB5;
 			} else if (strnicmp(value, "ntlmv2i", 7) == 0) {
-				vol->ntlmv2 = 1;
-				vol->sign = 1;
+				vol->secFlg |= CIFSSEC_MAY_NTLMV2 |
+					CIFSSEC_MUST_SIGN;
 			} else if (strnicmp(value, "ntlmv2", 6) == 0) {
-				vol->ntlmv2 = 1;
+				vol->secFlg |= CIFSSEC_MAY_NTLMV2;
 			} else if (strnicmp(value, "ntlmi", 5) == 0) {
-				vol->ntlm = 1;
-				vol->sign = 1;
+				vol->secFlg |= CIFSSEC_MAY_NTLM |
+					CIFSSEC_MUST_SIGN;
 			} else if (strnicmp(value, "ntlm", 4) == 0) {
 				/* ntlm is default so can be turned off too */
-				vol->ntlm = 1;
+				vol->secFlg |= CIFSSEC_MAY_NTLM;
 			} else if (strnicmp(value, "nontlm", 6) == 0) {
-				vol->ntlm = 0;
+				/* BB is there a better way to do this? */
+				vol->secFlg |= CIFSSEC_MAY_NTLMV2;
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+			} else if (strnicmp(value, "lanman", 6) == 0) {
+                                vol->secFlg |= CIFSSEC_MAY_LANMAN;
+#endif
 			} else if (strnicmp(value, "none", 4) == 0) {
-				vol->nullauth = 1; 
+				vol->nullauth = 1;
                         } else {
                                 cERROR(1,("bad security option: %s", value));
                                 return 1;
@@ -976,7 +981,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 			}
 			/* BB are there cases in which a comma can be valid in
 			a domain name and need special handling? */
-			if (strnlen(value, 65) < 65) {
+			if (strnlen(value, 256) < 256) {
 				vol->domainname = value;
 				cFYI(1, ("Domain name set"));
 			} else {
@@ -1168,6 +1173,10 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 			vol->no_psx_acl = 0;
 		} else if (strnicmp(data, "noacl",5) == 0) {
 			vol->no_psx_acl = 1;
+		} else if (strnicmp(data, "sign",4) == 0) {
+			vol->secFlg |= CIFSSEC_MUST_SIGN;
+/*		} else if (strnicmp(data, "seal",4) == 0) {
+			vol->secFlg |= CIFSSEC_MUST_SEAL; */
 		} else if (strnicmp(data, "direct",6) == 0) {
 			vol->direct_io = 1;
 		} else if (strnicmp(data, "forcedirectio",13) == 0) {
@@ -1762,11 +1771,18 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			if (volume_info.username)
 				strncpy(pSesInfo->userName,
 					volume_info.username,MAX_USERNAME_SIZE);
-			if (volume_info.domainname)
-				strncpy(pSesInfo->domainName,
-					volume_info.domainname,MAX_USERNAME_SIZE);
+			if (volume_info.domainname) {
+				int len = strlen(volume_info.domainname);
+				pSesInfo->domainName = 
+					kmalloc(len + 1, GFP_KERNEL);
+				if(pSesInfo->domainName)
+					strcpy(pSesInfo->domainName,
+						volume_info.domainname);
+			}
 			pSesInfo->linux_uid = volume_info.linux_uid;
+			pSesInfo->overrideSecFlg = volume_info.secFlg;
 			down(&pSesInfo->sesSem);
+			/* BB FIXME need to pass vol->secFlgs BB */
 			rc = cifs_setup_session(xid,pSesInfo, cifs_sb->local_nls);
 			up(&pSesInfo->sesSem);
 			if(!rc)
@@ -1980,7 +1996,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 
 static int
 CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
-	      char session_key[CIFS_SESSION_KEY_SIZE],
+	      char session_key[CIFS_SESS_KEY_SIZE],
 	      const struct nls_table *nls_codepage)
 {
 	struct smb_hdr *smb_buffer;
@@ -2038,15 +2054,15 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 	pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
 
 	pSMB->req_no_secext.CaseInsensitivePasswordLength = 
-		cpu_to_le16(CIFS_SESSION_KEY_SIZE);
+		cpu_to_le16(CIFS_SESS_KEY_SIZE);
 
 	pSMB->req_no_secext.CaseSensitivePasswordLength =
-	    cpu_to_le16(CIFS_SESSION_KEY_SIZE);
+	    cpu_to_le16(CIFS_SESS_KEY_SIZE);
 	bcc_ptr = pByteArea(smb_buffer);
-	memcpy(bcc_ptr, (char *) session_key, CIFS_SESSION_KEY_SIZE);
-	bcc_ptr += CIFS_SESSION_KEY_SIZE;
-	memcpy(bcc_ptr, (char *) session_key, CIFS_SESSION_KEY_SIZE);
-	bcc_ptr += CIFS_SESSION_KEY_SIZE;
+	memcpy(bcc_ptr, (char *) session_key, CIFS_SESS_KEY_SIZE);
+	bcc_ptr += CIFS_SESS_KEY_SIZE;
+	memcpy(bcc_ptr, (char *) session_key, CIFS_SESS_KEY_SIZE);
+	bcc_ptr += CIFS_SESS_KEY_SIZE;
 
 	if (ses->capabilities & CAP_UNICODE) {
 		if ((long) bcc_ptr % 2) { /* must be word aligned for Unicode */
@@ -2054,7 +2070,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 			bcc_ptr++;
 		}
 		if(user == NULL)
-			bytes_returned = 0; /* skill null user */
+			bytes_returned = 0; /* skip null user */
 	        else
 			bytes_returned =
 			        cifs_strtoUCS((__le16 *) bcc_ptr, user, 100,
@@ -2162,8 +2178,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 				if (remaining_words > 0) {
 					len = UniStrnlen((wchar_t *)bcc_ptr,
 							 remaining_words-1);
-					if(ses->serverNOS)
-						kfree(ses->serverNOS);
+					kfree(ses->serverNOS);
 					ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL);
 					if(ses->serverNOS == NULL)
 						goto sesssetup_nomem;
@@ -2203,12 +2218,10 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 					/* if these kcallocs fail not much we
 					   can do, but better to not fail the
 					   sesssetup itself */
-					if(ses->serverDomain)
-						kfree(ses->serverDomain);
+					kfree(ses->serverDomain);
 					ses->serverDomain =
 					    kzalloc(2, GFP_KERNEL);
-					if(ses->serverNOS)
-						kfree(ses->serverNOS);
+					kfree(ses->serverNOS);
 					ses->serverNOS =
 					    kzalloc(2, GFP_KERNEL);
 				}
@@ -2217,8 +2230,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 				if (((long) bcc_ptr + len) - (long)
 				    pByteArea(smb_buffer_response)
 					    <= BCC(smb_buffer_response)) {
-					if(ses->serverOS)
-						kfree(ses->serverOS);
+					kfree(ses->serverOS);
 					ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
 					if(ses->serverOS == NULL)
 						goto sesssetup_nomem;
@@ -2229,8 +2241,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 					bcc_ptr++;
 
 					len = strnlen(bcc_ptr, 1024);
-					if(ses->serverNOS)
-						kfree(ses->serverNOS);
+					kfree(ses->serverNOS);
 					ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
 					if(ses->serverNOS == NULL)
 						goto sesssetup_nomem;
@@ -2274,292 +2285,6 @@ sesssetup_nomem:	/* do not return an error on nomem for the info strings,
 }
 
 static int
-CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
-		char *SecurityBlob,int SecurityBlobLength,
-		const struct nls_table *nls_codepage)
-{
-	struct smb_hdr *smb_buffer;
-	struct smb_hdr *smb_buffer_response;
-	SESSION_SETUP_ANDX *pSMB;
-	SESSION_SETUP_ANDX *pSMBr;
-	char *bcc_ptr;
-	char *user;
-	char *domain;
-	int rc = 0;
-	int remaining_words = 0;
-	int bytes_returned = 0;
-	int len;
-	__u32 capabilities;
-	__u16 count;
-
-	cFYI(1, ("In spnego sesssetup "));
-	if(ses == NULL)
-		return -EINVAL;
-	user = ses->userName;
-	domain = ses->domainName;
-
-	smb_buffer = cifs_buf_get();
-	if (smb_buffer == NULL) {
-		return -ENOMEM;
-	}
-	smb_buffer_response = smb_buffer;
-	pSMBr = pSMB = (SESSION_SETUP_ANDX *) smb_buffer;
-
-	/* send SMBsessionSetup here */
-	header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
-			NULL /* no tCon exists yet */ , 12 /* wct */ );
-
-	smb_buffer->Mid = GetNextMid(ses->server);
-	pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
-	pSMB->req.AndXCommand = 0xFF;
-	if(ses->server->maxBuf > 64*1024)
-		ses->server->maxBuf = (64*1023);
-	pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
-	pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
-
-	if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
-		smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
-
-	capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
-	    CAP_EXTENDED_SECURITY;
-	if (ses->capabilities & CAP_UNICODE) {
-		smb_buffer->Flags2 |= SMBFLG2_UNICODE;
-		capabilities |= CAP_UNICODE;
-	}
-	if (ses->capabilities & CAP_STATUS32) {
-		smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS;
-		capabilities |= CAP_STATUS32;
-	}
-	if (ses->capabilities & CAP_DFS) {
-		smb_buffer->Flags2 |= SMBFLG2_DFS;
-		capabilities |= CAP_DFS;
-	}
-	pSMB->req.Capabilities = cpu_to_le32(capabilities);
-
-	pSMB->req.SecurityBlobLength = cpu_to_le16(SecurityBlobLength);
-	bcc_ptr = pByteArea(smb_buffer);
-	memcpy(bcc_ptr, SecurityBlob, SecurityBlobLength);
-	bcc_ptr += SecurityBlobLength;
-
-	if (ses->capabilities & CAP_UNICODE) {
-		if ((long) bcc_ptr % 2) {	/* must be word aligned for Unicode strings */
-			*bcc_ptr = 0;
-			bcc_ptr++;
-		}
-		bytes_returned =
-		    cifs_strtoUCS((__le16 *) bcc_ptr, user, 100, nls_codepage);
-		bcc_ptr += 2 * bytes_returned;	/* convert num of 16 bit words to bytes */
-		bcc_ptr += 2;	/* trailing null */
-		if (domain == NULL)
-			bytes_returned =
-			    cifs_strtoUCS((__le16 *) bcc_ptr,
-					  "CIFS_LINUX_DOM", 32, nls_codepage);
-		else
-			bytes_returned =
-			    cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
-					  nls_codepage);
-		bcc_ptr += 2 * bytes_returned;
-		bcc_ptr += 2;
-		bytes_returned =
-		    cifs_strtoUCS((__le16 *) bcc_ptr, "Linux version ",
-				  32, nls_codepage);
-		bcc_ptr += 2 * bytes_returned;
-		bytes_returned =
-		    cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32,
-				  nls_codepage);
-		bcc_ptr += 2 * bytes_returned;
-		bcc_ptr += 2;
-		bytes_returned =
-		    cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
-				  64, nls_codepage);
-		bcc_ptr += 2 * bytes_returned;
-		bcc_ptr += 2;
-	} else {
-		strncpy(bcc_ptr, user, 200);
-		bcc_ptr += strnlen(user, 200);
-		*bcc_ptr = 0;
-		bcc_ptr++;
-		if (domain == NULL) {
-			strcpy(bcc_ptr, "CIFS_LINUX_DOM");
-			bcc_ptr += strlen("CIFS_LINUX_DOM") + 1;
-		} else {
-			strncpy(bcc_ptr, domain, 64);
-			bcc_ptr += strnlen(domain, 64);
-			*bcc_ptr = 0;
-			bcc_ptr++;
-		}
-		strcpy(bcc_ptr, "Linux version ");
-		bcc_ptr += strlen("Linux version ");
-		strcpy(bcc_ptr, system_utsname.release);
-		bcc_ptr += strlen(system_utsname.release) + 1;
-		strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
-		bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
-	}
-	count = (long) bcc_ptr - (long) pByteArea(smb_buffer);
-	smb_buffer->smb_buf_length += count;
-	pSMB->req.ByteCount = cpu_to_le16(count);
-
-	rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
-			 &bytes_returned, 1);
-	if (rc) {
-/*    rc = map_smb_to_linux_error(smb_buffer_response);  *//* done in SendReceive now */
-	} else if ((smb_buffer_response->WordCount == 3)
-		   || (smb_buffer_response->WordCount == 4)) {
-		__u16 action = le16_to_cpu(pSMBr->resp.Action);
-		__u16 blob_len =
-		    le16_to_cpu(pSMBr->resp.SecurityBlobLength);
-		if (action & GUEST_LOGIN)
-			cFYI(1, (" Guest login"));	/* BB do we want to set anything in SesInfo struct ? */
-		if (ses) {
-			ses->Suid = smb_buffer_response->Uid;	/* UID left in wire format (le) */
-			cFYI(1, ("UID = %d ", ses->Suid));
-			bcc_ptr = pByteArea(smb_buffer_response);	/* response can have either 3 or 4 word count - Samba sends 3 */
-
-			/* BB Fix below to make endian neutral !! */
-
-			if ((pSMBr->resp.hdr.WordCount == 3)
-			    || ((pSMBr->resp.hdr.WordCount == 4)
-				&& (blob_len <
-				    pSMBr->resp.ByteCount))) {
-				if (pSMBr->resp.hdr.WordCount == 4) {
-					bcc_ptr +=
-					    blob_len;
-					cFYI(1,
-					     ("Security Blob Length %d ",
-					      blob_len));
-				}
-
-				if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
-					if ((long) (bcc_ptr) % 2) {
-						remaining_words =
-						    (BCC(smb_buffer_response)
-						     - 1) / 2;
-						bcc_ptr++;	/* Unicode strings must be word aligned */
-					} else {
-						remaining_words =
-						    BCC
-						    (smb_buffer_response) / 2;
-					}
-					len =
-					    UniStrnlen((wchar_t *) bcc_ptr,
-						       remaining_words - 1);
-/* We look for obvious messed up bcc or strings in response so we do not go off
-   the end since (at least) WIN2K and Windows XP have a major bug in not null
-   terminating last Unicode string in response  */
-					if(ses->serverOS)
-						kfree(ses->serverOS);
-					ses->serverOS =
-					    kzalloc(2 * (len + 1), GFP_KERNEL);
-					cifs_strfromUCS_le(ses->serverOS,
-							   (__le16 *)
-							   bcc_ptr, len,
-							   nls_codepage);
-					bcc_ptr += 2 * (len + 1);
-					remaining_words -= len + 1;
-					ses->serverOS[2 * len] = 0;
-					ses->serverOS[1 + (2 * len)] = 0;
-					if (remaining_words > 0) {
-						len = UniStrnlen((wchar_t *)bcc_ptr,
-								 remaining_words
-								 - 1);
-						if(ses->serverNOS)
-							kfree(ses->serverNOS);
-						ses->serverNOS =
-						    kzalloc(2 * (len + 1),
-							    GFP_KERNEL);
-						cifs_strfromUCS_le(ses->serverNOS,
-								   (__le16 *)bcc_ptr,
-								   len,
-								   nls_codepage);
-						bcc_ptr += 2 * (len + 1);
-						ses->serverNOS[2 * len] = 0;
-						ses->serverNOS[1 + (2 * len)] = 0;
-						remaining_words -= len + 1;
-						if (remaining_words > 0) {
-							len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);	
-                     /* last string not null terminated (e.g.Windows XP/2000) */
-							if(ses->serverDomain)
-								kfree(ses->serverDomain);
-							ses->serverDomain = kzalloc(2*(len+1),GFP_KERNEL);
-							cifs_strfromUCS_le(ses->serverDomain,
-							     (__le16 *)bcc_ptr, 
-							     len, nls_codepage);
-							bcc_ptr += 2*(len+1);
-							ses->serverDomain[2*len] = 0;
-							ses->serverDomain[1+(2*len)] = 0;
-						} /* else no more room so create dummy domain string */
-						else {
-							if(ses->serverDomain)
-								kfree(ses->serverDomain);
-							ses->serverDomain =
-							    kzalloc(2,GFP_KERNEL);
-						}
-					} else {/* no room use dummy domain&NOS */
-						if(ses->serverDomain)
-							kfree(ses->serverDomain);
-						ses->serverDomain = kzalloc(2, GFP_KERNEL);
-						if(ses->serverNOS)
-							kfree(ses->serverNOS);
-						ses->serverNOS = kzalloc(2, GFP_KERNEL);
-					}
-				} else {	/* ASCII */
-
-					len = strnlen(bcc_ptr, 1024);
-					if (((long) bcc_ptr + len) - (long)
-					    pByteArea(smb_buffer_response)
-					    <= BCC(smb_buffer_response)) {
-						if(ses->serverOS)
-							kfree(ses->serverOS);
-						ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
-						strncpy(ses->serverOS, bcc_ptr, len);
-
-						bcc_ptr += len;
-						bcc_ptr[0] = 0;	/* null terminate the string */
-						bcc_ptr++;
-
-						len = strnlen(bcc_ptr, 1024);
-						if(ses->serverNOS)
-							kfree(ses->serverNOS);
-						ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
-						strncpy(ses->serverNOS, bcc_ptr, len);
-						bcc_ptr += len;
-						bcc_ptr[0] = 0;
-						bcc_ptr++;
-
-						len = strnlen(bcc_ptr, 1024);
-						if(ses->serverDomain)
-							kfree(ses->serverDomain);
-						ses->serverDomain = kzalloc(len + 1, GFP_KERNEL);
-						strncpy(ses->serverDomain, bcc_ptr, len);
-						bcc_ptr += len;
-						bcc_ptr[0] = 0;
-						bcc_ptr++;
-					} else
-						cFYI(1,
-						     ("Variable field of length %d extends beyond end of smb ",
-						      len));
-				}
-			} else {
-				cERROR(1,
-				       (" Security Blob Length extends beyond end of SMB"));
-			}
-		} else {
-			cERROR(1, ("No session structure passed in."));
-		}
-	} else {
-		cERROR(1,
-		       (" Invalid Word count %d: ",
-			smb_buffer_response->WordCount));
-		rc = -EIO;
-	}
-
-	if (smb_buffer)
-		cifs_buf_release(smb_buffer);
-
-	return rc;
-}
-
-static int
 CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 			      struct cifsSesInfo *ses, int * pNTLMv2_flag,
 			      const struct nls_table *nls_codepage)
@@ -2635,8 +2360,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 	    /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN | */ NTLMSSP_NEGOTIATE_128;
 	if(sign_CIFS_PDUs)
 		negotiate_flags |= NTLMSSP_NEGOTIATE_SIGN;
-	if(ntlmv2_support)
-		negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2;
+/*	if(ntlmv2_support)
+		negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2;*/
 	/* setup pointers to domain name and workstation name */
 	bcc_ptr += SecurityBlobLength;
 
@@ -2783,8 +2508,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 								 bcc_ptr,
 								 remaining_words
 								 - 1);
-						if(ses->serverNOS)
-							kfree(ses->serverNOS);
+						kfree(ses->serverNOS);
 						ses->serverNOS =
 						    kzalloc(2 * (len + 1),
 							    GFP_KERNEL);
@@ -2802,8 +2526,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 						if (remaining_words > 0) {
 							len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);	
            /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
-							if(ses->serverDomain)
-								kfree(ses->serverDomain);
+							kfree(ses->serverDomain);
 							ses->serverDomain =
 							    kzalloc(2 *
 								    (len +
@@ -2822,19 +2545,16 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 							    = 0;
 						} /* else no more room so create dummy domain string */
 						else {
-							if(ses->serverDomain)
-								kfree(ses->serverDomain);
+							kfree(ses->serverDomain);
 							ses->serverDomain =
 							    kzalloc(2,
 								    GFP_KERNEL);
 						}
 					} else {	/* no room so create dummy domain and NOS string */
-						if(ses->serverDomain);
-							kfree(ses->serverDomain);
+						kfree(ses->serverDomain);
 						ses->serverDomain =
 						    kzalloc(2, GFP_KERNEL);
-						if(ses->serverNOS)
-							kfree(ses->serverNOS);
+						kfree(ses->serverNOS);
 						ses->serverNOS =
 						    kzalloc(2, GFP_KERNEL);
 					}
@@ -2856,8 +2576,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 						bcc_ptr++;
 
 						len = strnlen(bcc_ptr, 1024);
-						if(ses->serverNOS)
-							kfree(ses->serverNOS);
+						kfree(ses->serverNOS);
 						ses->serverNOS =
 						    kzalloc(len + 1,
 							    GFP_KERNEL);
@@ -2867,8 +2586,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 						bcc_ptr++;
 
 						len = strnlen(bcc_ptr, 1024);
-						if(ses->serverDomain)
-							kfree(ses->serverDomain);
+						kfree(ses->serverDomain);
 						ses->serverDomain =
 						    kzalloc(len + 1,
 							    GFP_KERNEL);
@@ -2994,14 +2712,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 	SecurityBlob->LmChallengeResponse.Buffer = 0;
 
 	SecurityBlob->NtChallengeResponse.Length =
-	    cpu_to_le16(CIFS_SESSION_KEY_SIZE);
+	    cpu_to_le16(CIFS_SESS_KEY_SIZE);
 	SecurityBlob->NtChallengeResponse.MaximumLength =
-	    cpu_to_le16(CIFS_SESSION_KEY_SIZE);
-	memcpy(bcc_ptr, ntlm_session_key, CIFS_SESSION_KEY_SIZE);
+	    cpu_to_le16(CIFS_SESS_KEY_SIZE);
+	memcpy(bcc_ptr, ntlm_session_key, CIFS_SESS_KEY_SIZE);
 	SecurityBlob->NtChallengeResponse.Buffer =
 	    cpu_to_le32(SecurityBlobLength);
-	SecurityBlobLength += CIFS_SESSION_KEY_SIZE;
-	bcc_ptr += CIFS_SESSION_KEY_SIZE;
+	SecurityBlobLength += CIFS_SESS_KEY_SIZE;
+	bcc_ptr += CIFS_SESS_KEY_SIZE;
 
 	if (ses->capabilities & CAP_UNICODE) {
 		if (domain == NULL) {
@@ -3190,8 +2908,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 								 bcc_ptr,
 								 remaining_words
 								 - 1);
-						if(ses->serverNOS)
-							kfree(ses->serverNOS);
+						kfree(ses->serverNOS);
 						ses->serverNOS =
 						    kzalloc(2 * (len + 1),
 							    GFP_KERNEL);
@@ -3244,8 +2961,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 						if(ses->serverDomain)
 							kfree(ses->serverDomain);
 						ses->serverDomain = kzalloc(2, GFP_KERNEL);
-						if(ses->serverNOS)
-							kfree(ses->serverNOS);
+						kfree(ses->serverNOS);
 						ses->serverNOS = kzalloc(2, GFP_KERNEL);
 					}
 				} else {	/* ASCII */
@@ -3263,8 +2979,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 						bcc_ptr++;
 
 						len = strnlen(bcc_ptr, 1024);
-						if(ses->serverNOS)
-							kfree(ses->serverNOS);
+						kfree(ses->serverNOS);
 						ses->serverNOS = kzalloc(len+1,GFP_KERNEL);
 						strncpy(ses->serverNOS, bcc_ptr, len);	
 						bcc_ptr += len;
@@ -3340,22 +3055,33 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 	bcc_ptr = &pSMB->Password[0];
 	if((ses->server->secMode) & SECMODE_USER) {
 		pSMB->PasswordLength = cpu_to_le16(1);	/* minimum */
+		*bcc_ptr = 0; /* password is null byte */
 		bcc_ptr++;              /* skip password */
+		/* already aligned so no need to do it below */
 	} else {
-		pSMB->PasswordLength = cpu_to_le16(CIFS_SESSION_KEY_SIZE);
+		pSMB->PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
 		/* BB FIXME add code to fail this if NTLMv2 or Kerberos
 		   specified as required (when that support is added to
 		   the vfs in the future) as only NTLM or the much
-		   weaker LANMAN (which we do not send) is accepted
+		   weaker LANMAN (which we do not send by default) is accepted
 		   by Samba (not sure whether other servers allow
 		   NTLMv2 password here) */
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+		if((extended_security & CIFSSEC_MAY_LANMAN) && 
+			(ses->server->secType == LANMAN))
+			calc_lanman_hash(ses, bcc_ptr);
+		else
+#endif /* CIFS_WEAK_PW_HASH */
 		SMBNTencrypt(ses->password,
 			     ses->server->cryptKey,
 			     bcc_ptr);
 
-		bcc_ptr += CIFS_SESSION_KEY_SIZE;
-		*bcc_ptr = 0;
-		bcc_ptr++; /* align */
+		bcc_ptr += CIFS_SESS_KEY_SIZE;
+		if(ses->capabilities & CAP_UNICODE) {
+			/* must align unicode strings */
+			*bcc_ptr = 0; /* null byte password */
+			bcc_ptr++;
+		}
 	}
 
 	if(ses->server->secMode & 
@@ -3429,7 +3155,10 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 			}
 			/* else do not bother copying these informational fields */
 		}
-		tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport);
+		if(smb_buffer_response->WordCount == 3)
+			tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport);
+		else
+			tcon->Flags = 0;
 		cFYI(1, ("Tcon flags: 0x%x ", tcon->Flags));
 	} else if ((rc == 0) && tcon == NULL) {
         /* all we need to save for IPC$ connection */
@@ -3494,7 +3223,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
 					   struct nls_table * nls_info)
 {
 	int rc = 0;
-	char ntlm_session_key[CIFS_SESSION_KEY_SIZE];
+	char ntlm_session_key[CIFS_SESS_KEY_SIZE];
 	int ntlmv2_flag = FALSE;
 	int first_time = 0;
 
@@ -3526,20 +3255,13 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
 			pSesInfo->server->secMode,
 			pSesInfo->server->capabilities,
 			pSesInfo->server->timeZone));
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-		if(experimEnabled > 1)
-			rc = CIFS_SessSetup(xid, pSesInfo, CIFS_NTLM /* type */,
-					    &ntlmv2_flag, nls_info);	
-		else
-#endif
-		if (extended_security
+		if(experimEnabled < 2)
+			rc = CIFS_SessSetup(xid, pSesInfo,
+					    first_time, nls_info);
+		else if (extended_security
 				&& (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
 				&& (pSesInfo->server->secType == NTLMSSP)) {
-			cFYI(1, ("New style sesssetup"));
-			rc = CIFSSpnegoSessSetup(xid, pSesInfo,
-				NULL /* security blob */, 
-				0 /* blob length */,
-				nls_info);
+			rc = -EOPNOTSUPP;
 		} else if (extended_security
 			   && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
 			   && (pSesInfo->server->secType == RawNTLMSSP)) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 82315edc77d7..ba4cbe9b0684 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -113,7 +113,7 @@ cifs_bp_rename_retry:
 	full_path[namelen+2] = 0;
 BB remove above eight lines BB */
 
-/* Inode operations in similar order to how they appear in the Linux file fs.h */
+/* Inode operations in similar order to how they appear in Linux file fs.h */
 
 int
 cifs_create(struct inode *inode, struct dentry *direntry, int mode,
@@ -178,11 +178,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 		FreeXid(xid);
 		return -ENOMEM;
 	}
-
-	rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
+	if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) 
+		rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
 			 desiredAccess, CREATE_NOT_DIR,
 			 &fileHandle, &oplock, buf, cifs_sb->local_nls,
 			 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+	else
+		rc = -EIO; /* no NT SMB support fall into legacy open below */
+
 	if(rc == -EIO) {
 		/* old server, retry the open legacy style */
 		rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
@@ -191,7 +194,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
 	} 
 	if (rc) {
-		cFYI(1, ("cifs_create returned 0x%x ", rc));
+		cFYI(1, ("cifs_create returned 0x%x", rc));
 	} else {
 		/* If Open reported that we actually created a file
 		then we now have to set the mode if possible */
@@ -369,6 +372,10 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 					 cifs_sb->mnt_cifs_flags & 
 					    CIFS_MOUNT_MAP_SPECIAL_CHR);
 
+			/* BB FIXME - add handling for backlevel servers
+			   which need legacy open and check for all
+			   calls to SMBOpen for fallback to 
+			   SMBLeagcyOpen */
 			if(!rc) {
 				/* BB Do not bother to decode buf since no
 				   local inode yet to put timestamps in,
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index 633a93811328..d91a3d44e9e3 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -91,14 +91,14 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
 	if(full_path == NULL) {
 		rc = -ENOMEM;
 	} else {
-		cERROR(1,("cifs dir notify on file %s with arg 0x%lx",full_path,arg)); /* BB removeme BB */
+		cFYI(1,("dir notify on file %s Arg 0x%lx",full_path,arg));
 		rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, 
 			GENERIC_READ | SYNCHRONIZE, 0 /* create options */,
 			&netfid, &oplock,NULL, cifs_sb->local_nls,
 			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
 		/* BB fixme - add this handle to a notify handle list */
 		if(rc) {
-			cERROR(1,("Could not open directory for notify"));  /* BB remove BB */
+			cFYI(1,("Could not open directory for notify"));
 		} else {
 			filter = convert_to_cifs_notify_flags(arg);
 			if(filter != 0) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index b4a18c1cab0a..5861eb42e626 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -110,7 +110,6 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
 			 &pCifsInode->openFileList);
 	}
 	write_unlock(&GlobalSMBSeslock);
-	write_unlock(&file->f_owner.lock);
 	if (pCifsInode->clientCanCacheRead) {
 		/* we have the inode open somewhere else
 		   no need to discard cache data */
@@ -201,7 +200,7 @@ int cifs_open(struct inode *inode, struct file *file)
 		} else {
 			if (file->f_flags & O_EXCL)
 				cERROR(1, ("could not find file instance for "
-					   "new file %p ", file));
+					   "new file %p", file));
 		}
 	}
 
@@ -260,10 +259,15 @@ int cifs_open(struct inode *inode, struct file *file)
 		rc = -ENOMEM;
 		goto out;
 	}
-	rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess,
-			 CREATE_NOT_DIR, &netfid, &oplock, buf,
+
+	if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
+		rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, 
+			 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
 			 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
 				 & CIFS_MOUNT_MAP_SPECIAL_CHR);
+	else
+		rc = -EIO; /* no NT SMB support fall into legacy open below */
+
 	if (rc == -EIO) {
 		/* Old server, try legacy style OpenX */
 		rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
@@ -272,7 +276,7 @@ int cifs_open(struct inode *inode, struct file *file)
 				& CIFS_MOUNT_MAP_SPECIAL_CHR);
 	}
 	if (rc) {
-		cFYI(1, ("cifs_open returned 0x%x ", rc));
+		cFYI(1, ("cifs_open returned 0x%x", rc));
 		goto out;
 	}
 	file->private_data =
@@ -282,7 +286,6 @@ int cifs_open(struct inode *inode, struct file *file)
 		goto out;
 	}
 	pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
-	write_lock(&file->f_owner.lock);
 	write_lock(&GlobalSMBSeslock);
 	list_add(&pCifsFile->tlist, &pTcon->openFileList);
 
@@ -293,7 +296,6 @@ int cifs_open(struct inode *inode, struct file *file)
 					    &oplock, buf, full_path, xid);
 	} else {
 		write_unlock(&GlobalSMBSeslock);
-		write_unlock(&file->f_owner.lock);
 	}
 
 	if (oplock & CIFS_CREATE_ACTION) {           
@@ -409,8 +411,8 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
 				CIFS_MOUNT_MAP_SPECIAL_CHR);
 	if (rc) {
 		up(&pCifsFile->fh_sem);
-		cFYI(1, ("cifs_open returned 0x%x ", rc));
-		cFYI(1, ("oplock: %d ", oplock));
+		cFYI(1, ("cifs_open returned 0x%x", rc));
+		cFYI(1, ("oplock: %d", oplock));
 	} else {
 		pCifsFile->netfid = netfid;
 		pCifsFile->invalidHandle = FALSE;
@@ -472,7 +474,6 @@ int cifs_close(struct inode *inode, struct file *file)
 	pTcon = cifs_sb->tcon;
 	if (pSMBFile) {
 		pSMBFile->closePend = TRUE;
-		write_lock(&file->f_owner.lock);
 		if (pTcon) {
 			/* no sense reconnecting to close a file that is
 			   already closed */
@@ -487,23 +488,18 @@ int cifs_close(struct inode *inode, struct file *file)
 					the struct would be in each open file,
 					but this should give enough time to 
 					clear the socket */
-					write_unlock(&file->f_owner.lock);
 					cERROR(1,("close with pending writes"));
 					msleep(timeout);
-					write_lock(&file->f_owner.lock);
 					timeout *= 4;
 				} 
-				write_unlock(&file->f_owner.lock);
 				rc = CIFSSMBClose(xid, pTcon,
 						  pSMBFile->netfid);
-				write_lock(&file->f_owner.lock);
 			}
 		}
 		write_lock(&GlobalSMBSeslock);
 		list_del(&pSMBFile->flist);
 		list_del(&pSMBFile->tlist);
 		write_unlock(&GlobalSMBSeslock);
-		write_unlock(&file->f_owner.lock);
 		kfree(pSMBFile->search_resume_name);
 		kfree(file->private_data);
 		file->private_data = NULL;
@@ -531,7 +527,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
 	    (struct cifsFileInfo *)file->private_data;
 	char *ptmp;
 
-	cFYI(1, ("Closedir inode = 0x%p with ", inode));
+	cFYI(1, ("Closedir inode = 0x%p", inode));
 
 	xid = GetXid();
 
@@ -605,7 +601,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 	}
 	if (pfLock->fl_flags & FL_ACCESS)
 		cFYI(1, ("Process suspended by mandatory locking - "
-			 "not implemented yet "));
+			 "not implemented yet"));
 	if (pfLock->fl_flags & FL_LEASE)
 		cFYI(1, ("Lease on file - not implemented yet"));
 	if (pfLock->fl_flags & 
@@ -1375,7 +1371,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
 
 	xid = GetXid();
 
-	cFYI(1, ("Sync file - name: %s datasync: 0x%x ", 
+	cFYI(1, ("Sync file - name: %s datasync: 0x%x", 
 		dentry->d_name.name, datasync));
 	
 	rc = filemap_fdatawrite(inode->i_mapping);
@@ -1404,7 +1400,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
 /*	fill in rpages then 
 	result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
 
-/*	cFYI(1, ("rpages is %d for sync page of Index %ld ", rpages, index));
+/*	cFYI(1, ("rpages is %d for sync page of Index %ld", rpages, index));
 
 #if 0
 	if (rc < 0)
@@ -1836,7 +1832,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
 	if (rc < 0)
 		goto io_error;
 	else
-		cFYI(1, ("Bytes read %d ",rc));
+		cFYI(1, ("Bytes read %d",rc));
                                                                                                                            
 	file->f_dentry->d_inode->i_atime =
 		current_fs_time(file->f_dentry->d_inode->i_sb);
@@ -1946,7 +1942,7 @@ static int cifs_prepare_write(struct file *file, struct page *page,
 	return 0;
 }
 
-struct address_space_operations cifs_addr_ops = {
+const struct address_space_operations cifs_addr_ops = {
 	.readpage = cifs_readpage,
 	.readpages = cifs_readpages,
 	.writepage = cifs_writepage,
@@ -1957,3 +1953,19 @@ struct address_space_operations cifs_addr_ops = {
 	/* .sync_page = cifs_sync_page, */
 	/* .direct_IO = */
 };
+
+/*
+ * cifs_readpages requires the server to support a buffer large enough to
+ * contain the header plus one complete page of data.  Otherwise, we need
+ * to leave cifs_readpages out of the address space operations.
+ */
+const struct address_space_operations cifs_addr_ops_smallbuf = {
+	.readpage = cifs_readpage,
+	.writepage = cifs_writepage,
+	.writepages = cifs_writepages,
+	.prepare_write = cifs_prepare_write,
+	.commit_write = cifs_commit_write,
+	.set_page_dirty = __set_page_dirty_nobuffers,
+	/* .sync_page = cifs_sync_page, */
+	/* .direct_IO = */
+};
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 4093764ef461..b88147c1dc27 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -41,7 +41,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 	char *tmp_path;
 
 	pTcon = cifs_sb->tcon;
-	cFYI(1, ("Getting info on %s ", search_path));
+	cFYI(1, ("Getting info on %s", search_path));
 	/* could have done a find first instead but this returns more info */
 	rc = CIFSSMBUnixQPathInfo(xid, pTcon, search_path, &findData,
 				  cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
@@ -97,9 +97,9 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 		inode = *pinode;
 		cifsInfo = CIFS_I(inode);
 
-		cFYI(1, ("Old time %ld ", cifsInfo->time));
+		cFYI(1, ("Old time %ld", cifsInfo->time));
 		cifsInfo->time = jiffies;
-		cFYI(1, ("New time %ld ", cifsInfo->time));
+		cFYI(1, ("New time %ld", cifsInfo->time));
 		/* this is ok to set on every inode revalidate */
 		atomic_set(&cifsInfo->inUse,1);
 
@@ -180,11 +180,12 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 			else /* not direct, send byte range locks */ 
 				inode->i_fop = &cifs_file_ops;
 
-			inode->i_data.a_ops = &cifs_addr_ops;
 			/* check if server can support readpages */
 			if(pTcon->ses->server->maxBuf < 
-			    4096 + MAX_CIFS_HDR_SIZE)
-				inode->i_data.a_ops->readpages = NULL;
+			    PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
+				inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
+			else
+				inode->i_data.a_ops = &cifs_addr_ops;
 		} else if (S_ISDIR(inode->i_mode)) {
 			cFYI(1, ("Directory inode"));
 			inode->i_op = &cifs_dir_inode_ops;
@@ -421,23 +422,23 @@ int cifs_get_inode_info(struct inode **pinode,
 		inode = *pinode;
 		cifsInfo = CIFS_I(inode);
 		cifsInfo->cifsAttrs = attr;
-		cFYI(1, ("Old time %ld ", cifsInfo->time));
+		cFYI(1, ("Old time %ld", cifsInfo->time));
 		cifsInfo->time = jiffies;
-		cFYI(1, ("New time %ld ", cifsInfo->time));
+		cFYI(1, ("New time %ld", cifsInfo->time));
 
 		/* blksize needs to be multiple of two. So safer to default to
 		blksize and blkbits set in superblock so 2**blkbits and blksize
 		will match rather than setting to:
 		(pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/
 
-		/* Linux can not store file creation time unfortunately so we ignore it */
+		/* Linux can not store file creation time so ignore it */
 		inode->i_atime =
 		    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime));
 		inode->i_mtime =
 		    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime));
 		inode->i_ctime =
 		    cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
-		cFYI(0, ("Attributes came in as 0x%x ", attr));
+		cFYI(0, ("Attributes came in as 0x%x", attr));
 
 		/* set default mode. will override for dirs below */
 		if (atomic_read(&cifsInfo->inUse) == 0)
@@ -519,10 +520,11 @@ int cifs_get_inode_info(struct inode **pinode,
 			else /* not direct, send byte range locks */
 				inode->i_fop = &cifs_file_ops;
 
-			inode->i_data.a_ops = &cifs_addr_ops;
 			if(pTcon->ses->server->maxBuf < 
-			     4096 + MAX_CIFS_HDR_SIZE)
-				inode->i_data.a_ops->readpages = NULL;
+			     PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
+				inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
+			else
+				inode->i_data.a_ops = &cifs_addr_ops;
 		} else if (S_ISDIR(inode->i_mode)) {
 			cFYI(1, ("Directory inode"));
 			inode->i_op = &cifs_dir_inode_ops;
@@ -731,7 +733,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 	rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls,
 			  cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
 	if (rc) {
-		cFYI(1, ("cifs_mkdir returned 0x%x ", rc));
+		cFYI(1, ("cifs_mkdir returned 0x%x", rc));
 		d_drop(direntry);
 	} else {
 		inode->i_nlink++;
@@ -798,7 +800,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
 	char *full_path = NULL;
 	struct cifsInodeInfo *cifsInode;
 
-	cFYI(1, ("cifs_rmdir, inode = 0x%p with ", inode));
+	cFYI(1, ("cifs_rmdir, inode = 0x%p", inode));
 
 	xid = GetXid();
 
@@ -1121,7 +1123,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 
 	xid = GetXid();
 
-	cFYI(1, ("In cifs_setattr, name = %s attrs->iavalid 0x%x ",
+	cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
 		 direntry->d_name.name, attrs->ia_valid));
 
 	cifs_sb = CIFS_SB(direntry->d_inode->i_sb);
@@ -1157,6 +1159,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 		   when the local oplock break takes longer to flush
 		   writebehind data than the SMB timeout for the SetPathInfo
 		   request would allow */
+
 		open_file = find_writable_file(cifsInode);
 		if (open_file) {
 			__u16 nfid = open_file->netfid;
@@ -1289,7 +1292,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 		it may be useful to Windows - but we do
 		not want to set ctime unless some other
 		timestamp is changing */
-		cFYI(1, ("CIFS - CTIME changed "));
+		cFYI(1, ("CIFS - CTIME changed"));
 		time_buf.ChangeTime =
 		    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
 	} else
@@ -1356,7 +1359,7 @@ cifs_setattr_exit:
 
 void cifs_delete_inode(struct inode *inode)
 {
-	cFYI(1, ("In cifs_delete_inode, inode = 0x%p ", inode));
+	cFYI(1, ("In cifs_delete_inode, inode = 0x%p", inode));
 	/* may have to add back in if and when safe distributed caching of
 	   directories added e.g. via FindNotify */
 }
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 2ec99f833142..a57f5d6e6213 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -167,7 +167,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
 		return -ENOMEM;
 	}
 
-	cFYI(1, ("Full path: %s ", full_path));
+	cFYI(1, ("Full path: %s", full_path));
 	cFYI(1, ("symname is %s", symname));
 
 	/* BB what if DFS and this volume is on different share? BB */
@@ -186,8 +186,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
 						 inode->i_sb,xid);
 
 		if (rc != 0) {
-			cFYI(1,
-			     ("Create symlink worked but get_inode_info failed with rc = %d ",
+			cFYI(1, ("Create symlink ok, getinodeinfo fail rc = %d",
 			      rc));
 		} else {
 			if (pTcon->nocase)
@@ -289,7 +288,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
 					else {
 						cFYI(1,("num referral: %d",num_referrals));
 						if(referrals) {
-							cFYI(1,("referral string: %s ",referrals));
+							cFYI(1,("referral string: %s",referrals));
 							strncpy(tmpbuffer, referrals, len-1);                            
 						}
 					}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index fafd056426e4..22c937e5884f 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -101,6 +101,7 @@ sesInfoFree(struct cifsSesInfo *buf_to_free)
 	kfree(buf_to_free->serverDomain);
 	kfree(buf_to_free->serverNOS);
 	kfree(buf_to_free->password);
+	kfree(buf_to_free->domainName);
 	kfree(buf_to_free);
 }
 
@@ -499,11 +500,12 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
 		if(pSMBr->ByteCount > sizeof(struct file_notify_information)) {
 			data_offset = le32_to_cpu(pSMBr->DataOffset);
 
-			pnotify = (struct file_notify_information *)((char *)&pSMBr->hdr.Protocol
-				+ data_offset);
-			cFYI(1,("dnotify on %s with action: 0x%x",pnotify->FileName,
+			pnotify = (struct file_notify_information *)
+				((char *)&pSMBr->hdr.Protocol + data_offset);
+			cFYI(1,("dnotify on %s Action: 0x%x",pnotify->FileName,
 				pnotify->Action));  /* BB removeme BB */
-	             /*   cifs_dump_mem("Received notify Data is: ",buf,sizeof(struct smb_hdr)+60); */
+	             /*   cifs_dump_mem("Rcvd notify Data: ",buf,
+				sizeof(struct smb_hdr)+60); */
 			return TRUE;
 		}
 		if(pSMBr->hdr.Status.CifsError) {
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 5de74d216fdd..b66eff5dc624 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -84,11 +84,11 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
 
 static const struct smb_to_posix_error mapping_table_ERRSRV[] = {
 	{ERRerror, -EIO},
-	{ERRbadpw, -EPERM},
+	{ERRbadpw, -EACCES},  /* was EPERM */
 	{ERRbadtype, -EREMOTE},
 	{ERRaccess, -EACCES},
 	{ERRinvtid, -ENXIO},
-	{ERRinvnetname, -ENODEV},
+	{ERRinvnetname, -ENXIO},
 	{ERRinvdevice, -ENXIO},
 	{ERRqfull, -ENOSPC},
 	{ERRqtoobig, -ENOSPC},
diff --git a/fs/cifs/ntlmssp.c b/fs/cifs/ntlmssp.c
deleted file mode 100644
index 115359cc7a32..000000000000
--- a/fs/cifs/ntlmssp.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- *   fs/cifs/ntlmssp.h
- *
- *   Copyright (c) International Business Machines  Corp., 2006
- *   Author(s): Steve French (sfrench@us.ibm.com)
- *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "cifspdu.h"
-#include "cifsglob.h"
-#include "cifsproto.h"
-#include "cifs_unicode.h"
-#include "cifs_debug.h"
-#include "ntlmssp.h"
-#include "nterr.h"
-
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
-{
-	__u32 capabilities = 0;
-
-	/* init fields common to all four types of SessSetup */
-	/* note that header is initialized to zero in header_assemble */
-	pSMB->req.AndXCommand = 0xFF;
-	pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
-	pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
-
-	/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
-
-	/* BB verify whether signing required on neg or just on auth frame 
-	   (and NTLM case) */
-
-	capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
-			CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
-
-	if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
-		pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
-
-	if (ses->capabilities & CAP_UNICODE) {
-		pSMB->req.hdr.Flags2 |= SMBFLG2_UNICODE;
-		capabilities |= CAP_UNICODE;
-	}
-	if (ses->capabilities & CAP_STATUS32) {
-		pSMB->req.hdr.Flags2 |= SMBFLG2_ERR_STATUS;
-		capabilities |= CAP_STATUS32;
-	}
-	if (ses->capabilities & CAP_DFS) {
-		pSMB->req.hdr.Flags2 |= SMBFLG2_DFS;
-		capabilities |= CAP_DFS;
-	}
-
-	/* BB check whether to init vcnum BB */
-	return capabilities;
-}
-int 
-CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const int type,
-		  int * pNTLMv2_flg, const struct nls_table *nls_cp)
-{
-	int rc = 0;
-	int wct;
-	struct smb_hdr *smb_buffer;
-	char *bcc_ptr;
-	SESSION_SETUP_ANDX *pSMB;
-	__u32 capabilities;
-
-	if(ses == NULL)
-		return -EINVAL;
-
-	cFYI(1,("SStp type: %d",type));
-	if(type < CIFS_NTLM) {
-#ifndef CONFIG_CIFS_WEAK_PW_HASH
-		/* LANMAN and plaintext are less secure and off by default.
-		So we make this explicitly be turned on in kconfig (in the
-		build) and turned on at runtime (changed from the default)
-		in proc/fs/cifs or via mount parm.  Unfortunately this is
-		needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
-		return -EOPNOTSUPP;
-#endif
-		wct = 10; /* lanman 2 style sessionsetup */
-	} else if(type < CIFS_NTLMSSP_NEG)
-		wct = 13; /* old style NTLM sessionsetup */
-	else /* same size for negotiate or auth, NTLMSSP or extended security */
-		wct = 12;
-
-	rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses,
-			    (void **)&smb_buffer);
-	if(rc)
-		return rc;
-
-	pSMB = (SESSION_SETUP_ANDX *)smb_buffer;
-
-	capabilities = cifs_ssetup_hdr(ses, pSMB);
-	bcc_ptr = pByteArea(smb_buffer);
-	if(type > CIFS_NTLM) {
-		pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
-		capabilities |= CAP_EXTENDED_SECURITY;
-		pSMB->req.Capabilities = cpu_to_le32(capabilities);
-		/* BB set password lengths */
-	} else if(type < CIFS_NTLM) /* lanman */ {
-		/* no capabilities flags in old lanman negotiation */
-		/* pSMB->old_req.PasswordLength = */ /* BB fixme BB */
-	} else /* type CIFS_NTLM */ {
-		pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
-		pSMB->req_no_secext.CaseInsensitivePasswordLength =
-			cpu_to_le16(CIFS_SESSION_KEY_SIZE);
-		pSMB->req_no_secext.CaseSensitivePasswordLength =
-			cpu_to_le16(CIFS_SESSION_KEY_SIZE);
-	}
-
-
-	/* copy session key */
-
-	/* if Unicode, align strings to two byte boundary */
-
-	/* copy user name */ /* BB Do we need to special case null user name? */
-
-	/* copy domain name */
-
-	/* copy Linux version */
-
-	/* copy network operating system name */
-
-	/* update bcc and smb buffer length */
-
-/*	rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */
-	/* SMB request buf freed in SendReceive2 */
-
-	return rc;
-}
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b689c5035124..03bbcb377913 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -21,6 +21,7 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
+#include <linux/pagemap.h>
 #include <linux/stat.h>
 #include <linux/smp_lock.h>
 #include "cifspdu.h"
@@ -31,8 +32,8 @@
 #include "cifs_fs_sb.h"
 #include "cifsfs.h"
 
-/* BB fixme - add debug wrappers around this function to disable it fixme BB */
-/* static void dump_cifs_file_struct(struct file *file, char *label)
+#ifdef CONFIG_CIFS_DEBUG2
+static void dump_cifs_file_struct(struct file *file, char *label)
 {
 	struct cifsFileInfo * cf;
 
@@ -53,7 +54,8 @@
 		}
 		
 	}
-} */
+}
+#endif /* DEBUG2 */
 
 /* Returns one if new inode created (which therefore needs to be hashed) */
 /* Might check in the future if inode number changed so we can rehash inode */
@@ -107,32 +109,52 @@ static int construct_dentry(struct qstr *qstring, struct file *file,
 	return rc;
 }
 
-static void fill_in_inode(struct inode *tmp_inode,
-	FILE_DIRECTORY_INFO *pfindData, int *pobject_type, int isNewInode)
+static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
+		char * buf, int *pobject_type, int isNewInode)
 {
 	loff_t local_size;
 	struct timespec local_mtime;
 
 	struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
-	__u32 attr = le32_to_cpu(pfindData->ExtFileAttributes);
-	__u64 allocation_size = le64_to_cpu(pfindData->AllocationSize);
-	__u64 end_of_file = le64_to_cpu(pfindData->EndOfFile);
-
-	cifsInfo->cifsAttrs = attr;
-	cifsInfo->time = jiffies;
+	__u32 attr;
+	__u64 allocation_size;
+	__u64 end_of_file;
 
 	/* save mtime and size */
 	local_mtime = tmp_inode->i_mtime;
 	local_size  = tmp_inode->i_size;
 
+	if(new_buf_type) {
+		FILE_DIRECTORY_INFO *pfindData = (FILE_DIRECTORY_INFO *)buf;
+
+		attr = le32_to_cpu(pfindData->ExtFileAttributes);
+		allocation_size = le64_to_cpu(pfindData->AllocationSize);
+		end_of_file = le64_to_cpu(pfindData->EndOfFile);
+		tmp_inode->i_atime =
+		      cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime));
+		tmp_inode->i_mtime =
+		      cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime));
+		tmp_inode->i_ctime =
+		      cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
+	} else { /* legacy, OS2 and DOS style */
+		FIND_FILE_STANDARD_INFO * pfindData = 
+			(FIND_FILE_STANDARD_INFO *)buf;
+
+		attr = le16_to_cpu(pfindData->Attributes);
+		allocation_size = le32_to_cpu(pfindData->AllocationSize);
+		end_of_file = le32_to_cpu(pfindData->DataSize);
+		tmp_inode->i_atime = CURRENT_TIME;
+		/* tmp_inode->i_mtime =  BB FIXME - add dos time handling
+		tmp_inode->i_ctime = 0;   BB FIXME */
+
+	}
+
 	/* Linux can not store file creation time unfortunately so ignore it */
-	tmp_inode->i_atime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime));
-	tmp_inode->i_mtime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime));
-	tmp_inode->i_ctime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
+
+	cifsInfo->cifsAttrs = attr;
+	cifsInfo->time = jiffies;
+
 	/* treat dos attribute of read-only as read-only mode bit e.g. 555? */
 	/* 2767 perms - indicate mandatory locking */
 		/* BB fill in uid and gid here? with help from winbind? 
@@ -215,11 +237,13 @@ static void fill_in_inode(struct inode *tmp_inode,
 		else
 			tmp_inode->i_fop = &cifs_file_ops;
 
-		tmp_inode->i_data.a_ops = &cifs_addr_ops;
 		if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
 		   (cifs_sb->tcon->ses->server->maxBuf <
-			4096 + MAX_CIFS_HDR_SIZE))
-			tmp_inode->i_data.a_ops->readpages = NULL;
+			PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
+			tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
+		else
+			tmp_inode->i_data.a_ops = &cifs_addr_ops;
+
 		if(isNewInode)
 			return; /* No sense invalidating pages for new inode
 				   since have not started caching readahead file
@@ -338,11 +362,12 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
 		else
 			tmp_inode->i_fop = &cifs_file_ops;
 
-		tmp_inode->i_data.a_ops = &cifs_addr_ops;
 		if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
 		   (cifs_sb->tcon->ses->server->maxBuf < 
-			4096 + MAX_CIFS_HDR_SIZE))
-			tmp_inode->i_data.a_ops->readpages = NULL;
+			PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
+			tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
+		else
+			tmp_inode->i_data.a_ops = &cifs_addr_ops;
 
 		if(isNewInode)
 			return; /* No sense invalidating pages for new inode since we
@@ -415,7 +440,10 @@ static int initiate_cifs_search(const int xid, struct file *file)
 ffirst_retry:
 	/* test for Unix extensions */
 	if (pTcon->ses->capabilities & CAP_UNIX) {
-		cifsFile->srch_inf.info_level = SMB_FIND_FILE_UNIX; 
+		cifsFile->srch_inf.info_level = SMB_FIND_FILE_UNIX;
+	} else if ((pTcon->ses->capabilities & 
+			(CAP_NT_SMBS | CAP_NT_FIND)) == 0) {
+		cifsFile->srch_inf.info_level = SMB_FIND_FILE_INFO_STANDARD;
 	} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
 		cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
 	} else /* not srvinos - BB fixme add check for backlevel? */ {
@@ -451,12 +479,19 @@ static int cifs_unicode_bytelen(char *str)
 	return len << 1;
 }
 
-static char *nxt_dir_entry(char *old_entry, char *end_of_smb)
+static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level)
 {
 	char * new_entry;
 	FILE_DIRECTORY_INFO * pDirInfo = (FILE_DIRECTORY_INFO *)old_entry;
 
-	new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset);
+	if(level == SMB_FIND_FILE_INFO_STANDARD) {
+		FIND_FILE_STANDARD_INFO * pfData;
+		pfData = (FIND_FILE_STANDARD_INFO *)pDirInfo;
+
+		new_entry = old_entry + sizeof(FIND_FILE_STANDARD_INFO) +
+				pfData->FileNameLength;
+	} else
+		new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset);
 	cFYI(1,("new entry %p old entry %p",new_entry,old_entry));
 	/* validate that new_entry is not past end of SMB */
 	if(new_entry >= end_of_smb) {
@@ -464,7 +499,10 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb)
 		      ("search entry %p began after end of SMB %p old entry %p",
 			new_entry, end_of_smb, old_entry)); 
 		return NULL;
-	} else if (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb) {
+	} else if(((level == SMB_FIND_FILE_INFO_STANDARD) &&
+		   (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb)) ||
+		  ((level != SMB_FIND_FILE_INFO_STANDARD) &&
+		   (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb)))  {
 		cERROR(1,("search entry %p extends after end of SMB %p",
 			new_entry, end_of_smb));
 		return NULL;
@@ -482,7 +520,7 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
 	char * filename = NULL;
 	int len = 0; 
 
-	if(cfile->srch_inf.info_level == 0x202) {
+	if(cfile->srch_inf.info_level == SMB_FIND_FILE_UNIX) {
 		FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		if(cfile->srch_inf.unicode) {
@@ -491,26 +529,34 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
 			/* BB should we make this strnlen of PATH_MAX? */
 			len = strnlen(filename, 5);
 		}
-	} else if(cfile->srch_inf.info_level == 0x101) {
+	} else if(cfile->srch_inf.info_level == SMB_FIND_FILE_DIRECTORY_INFO) {
 		FILE_DIRECTORY_INFO * pFindData = 
 			(FILE_DIRECTORY_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		len = le32_to_cpu(pFindData->FileNameLength);
-	} else if(cfile->srch_inf.info_level == 0x102) {
+	} else if(cfile->srch_inf.info_level == 
+			SMB_FIND_FILE_FULL_DIRECTORY_INFO) {
 		FILE_FULL_DIRECTORY_INFO * pFindData = 
 			(FILE_FULL_DIRECTORY_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		len = le32_to_cpu(pFindData->FileNameLength);
-	} else if(cfile->srch_inf.info_level == 0x105) {
+	} else if(cfile->srch_inf.info_level ==
+			SMB_FIND_FILE_ID_FULL_DIR_INFO) {
 		SEARCH_ID_FULL_DIR_INFO * pFindData = 
 			(SEARCH_ID_FULL_DIR_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		len = le32_to_cpu(pFindData->FileNameLength);
-	} else if(cfile->srch_inf.info_level == 0x104) {
+	} else if(cfile->srch_inf.info_level == 
+			SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
 		FILE_BOTH_DIRECTORY_INFO * pFindData = 
 			(FILE_BOTH_DIRECTORY_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		len = le32_to_cpu(pFindData->FileNameLength);
+	} else if(cfile->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) {
+		FIND_FILE_STANDARD_INFO * pFindData =
+			(FIND_FILE_STANDARD_INFO *)current_entry;
+		filename = &pFindData->FileName[0];
+		len = le32_to_cpu(pFindData->FileNameLength);
 	} else {
 		cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level));
 	}
@@ -597,7 +643,9 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
 	. and .. for the root of a drive and for those we need
 	to start two entries earlier */
 
-/*	dump_cifs_file_struct(file, "In fce ");*/
+#ifdef CONFIG_CIFS_DEBUG2
+	dump_cifs_file_struct(file, "In fce ");
+#endif
 	if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) && 
 	     is_dir_changed(file)) || 
 	   (index_to_find < first_entry_in_buffer)) {
@@ -644,10 +692,12 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
 		first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry
 					- cifsFile->srch_inf.entries_in_buffer;
 		pos_in_buf = index_to_find - first_entry_in_buffer;
-		cFYI(1,("found entry - pos_in_buf %d",pos_in_buf)); 
+		cFYI(1,("found entry - pos_in_buf %d",pos_in_buf));
+
 		for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) {
 			/* go entry by entry figuring out which is first */
-			current_entry = nxt_dir_entry(current_entry,end_of_smb);
+			current_entry = nxt_dir_entry(current_entry,end_of_smb,
+						cifsFile->srch_inf.info_level);
 		}
 		if((current_entry == NULL) && (i < pos_in_buf)) {
 			/* BB fixme - check if we should flag this error */
@@ -674,7 +724,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
 /* inode num, inode type and filename returned */
 static int cifs_get_name_from_search_buf(struct qstr *pqst,
 	char *current_entry, __u16 level, unsigned int unicode,
-	struct cifs_sb_info * cifs_sb, ino_t *pinum)
+	struct cifs_sb_info * cifs_sb, int max_len, ino_t *pinum)
 {
 	int rc = 0;
 	unsigned int len = 0;
@@ -718,10 +768,22 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
 			(FILE_BOTH_DIRECTORY_INFO *)current_entry;
 		filename = &pFindData->FileName[0];
 		len = le32_to_cpu(pFindData->FileNameLength);
+	} else if(level == SMB_FIND_FILE_INFO_STANDARD) {
+		FIND_FILE_STANDARD_INFO * pFindData =
+			(FIND_FILE_STANDARD_INFO *)current_entry;
+		filename = &pFindData->FileName[0];
+		/* one byte length, no name conversion */
+		len = (unsigned int)pFindData->FileNameLength;
 	} else {
 		cFYI(1,("Unknown findfirst level %d",level));
 		return -EINVAL;
 	}
+
+	if(len > max_len) {
+		cERROR(1,("bad search response length %d past smb end", len));
+		return -EINVAL;
+	}
+
 	if(unicode) {
 		/* BB fixme - test with long names */
 		/* Note converted filename can be longer than in unicode */
@@ -741,7 +803,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
 }
 
 static int cifs_filldir(char *pfindEntry, struct file *file,
-	filldir_t filldir, void *direntry, char *scratch_buf)
+	filldir_t filldir, void *direntry, char *scratch_buf, int max_len)
 {
 	int rc = 0;
 	struct qstr qstring;
@@ -777,6 +839,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
 	rc = cifs_get_name_from_search_buf(&qstring,pfindEntry,
 			pCifsF->srch_inf.info_level,
 			pCifsF->srch_inf.unicode,cifs_sb,
+			max_len,
 			&inum /* returned */);
 
 	if(rc)
@@ -798,13 +861,16 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
 	/* we pass in rc below, indicating whether it is a new inode,
 	   so we can figure out whether to invalidate the inode cached
 	   data if the file has changed */
-	if(pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) {
+	if(pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX)
 		unix_fill_in_inode(tmp_inode,
-				   (FILE_UNIX_INFO *)pfindEntry,&obj_type, rc);
-	} else {
-		fill_in_inode(tmp_inode,
-			      (FILE_DIRECTORY_INFO *)pfindEntry,&obj_type, rc);
-	}
+				   (FILE_UNIX_INFO *)pfindEntry,
+				   &obj_type, rc);
+	else if(pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD)
+		fill_in_inode(tmp_inode, 0 /* old level 1 buffer type */,
+				pfindEntry, &obj_type, rc);
+	else
+		fill_in_inode(tmp_inode, 1 /* NT */, pfindEntry, &obj_type, rc);
+	
 	
 	rc = filldir(direntry,qstring.name,qstring.len,file->f_pos,
 		     tmp_inode->i_ino,obj_type);
@@ -864,6 +930,12 @@ static int cifs_save_resume_key(const char *current_entry,
 		filename = &pFindData->FileName[0];
 		len = le32_to_cpu(pFindData->FileNameLength);
 		cifsFile->srch_inf.resume_key = pFindData->FileIndex;
+	} else if(level == SMB_FIND_FILE_INFO_STANDARD) {
+		FIND_FILE_STANDARD_INFO * pFindData =
+			(FIND_FILE_STANDARD_INFO *)current_entry;
+		filename = &pFindData->FileName[0];
+		/* one byte length, no name conversion */
+		len = (unsigned int)pFindData->FileNameLength;
 	} else {
 		cFYI(1,("Unknown findfirst level %d",level));
 		return -EINVAL;
@@ -884,6 +956,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 	int num_to_fill = 0;
 	char * tmp_buf = NULL;
 	char * end_of_smb;
+	int max_len;
 
 	xid = GetXid();
 
@@ -909,7 +982,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 	case 1:
 		if (filldir(direntry, "..", 2, file->f_pos,
 		     file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
-			cERROR(1, ("Filldir for parent dir failed "));
+			cERROR(1, ("Filldir for parent dir failed"));
 			rc = -ENOMEM;
 			break;
 		}
@@ -959,10 +1032,11 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 			goto rddir2_exit;
 		}
 		cFYI(1,("loop through %d times filling dir for net buf %p",
-			num_to_fill,cifsFile->srch_inf.ntwrk_buf_start)); 
-		end_of_smb = cifsFile->srch_inf.ntwrk_buf_start +
-			smbCalcSize((struct smb_hdr *)
-				    cifsFile->srch_inf.ntwrk_buf_start);
+			num_to_fill,cifsFile->srch_inf.ntwrk_buf_start));
+		max_len = smbCalcSize((struct smb_hdr *)
+				cifsFile->srch_inf.ntwrk_buf_start);
+		end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
+
 		/* To be safe - for UCS to UTF-8 with strings loaded
 		with the rare long characters alloc more to account for
 		such multibyte target UTF-8 characters. cifs_unicode.c,
@@ -977,17 +1051,19 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 			}
 			/* if buggy server returns . and .. late do
 			we want to check for that here? */
-			rc = cifs_filldir(current_entry, file, 
-					filldir, direntry,tmp_buf);
+			rc = cifs_filldir(current_entry, file,
+					filldir, direntry, tmp_buf, max_len);
 			file->f_pos++;
-			if(file->f_pos == cifsFile->srch_inf.index_of_last_entry) {
+			if(file->f_pos == 
+				cifsFile->srch_inf.index_of_last_entry) {
 				cFYI(1,("last entry in buf at pos %lld %s",
-					file->f_pos,tmp_buf)); /* BB removeme BB */
+					file->f_pos,tmp_buf));
 				cifs_save_resume_key(current_entry,cifsFile);
 				break;
 			} else 
-				current_entry = nxt_dir_entry(current_entry,
-							      end_of_smb);
+				current_entry = 
+					nxt_dir_entry(current_entry, end_of_smb,
+						cifsFile->srch_inf.info_level);
 		}
 		kfree(tmp_buf);
 		break;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
new file mode 100644
index 000000000000..7202d534ef0b
--- /dev/null
+++ b/fs/cifs/sess.c
@@ -0,0 +1,538 @@
+/*
+ *   fs/cifs/sess.c
+ *
+ *   SMB/CIFS session setup handling routines
+ *
+ *   Copyright (c) International Business Machines  Corp., 2006
+ *   Author(s): Steve French (sfrench@us.ibm.com)
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "cifspdu.h"
+#include "cifsglob.h"
+#include "cifsproto.h"
+#include "cifs_unicode.h"
+#include "cifs_debug.h"
+#include "ntlmssp.h"
+#include "nterr.h"
+#include <linux/utsname.h>
+
+extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
+                         unsigned char *p24);
+
+static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
+{
+	__u32 capabilities = 0;
+
+	/* init fields common to all four types of SessSetup */
+	/* note that header is initialized to zero in header_assemble */
+	pSMB->req.AndXCommand = 0xFF;
+	pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
+	pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
+
+	/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
+
+	/* BB verify whether signing required on neg or just on auth frame 
+	   (and NTLM case) */
+
+	capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
+			CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
+
+	if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+		pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
+
+	if (ses->capabilities & CAP_UNICODE) {
+		pSMB->req.hdr.Flags2 |= SMBFLG2_UNICODE;
+		capabilities |= CAP_UNICODE;
+	}
+	if (ses->capabilities & CAP_STATUS32) {
+		pSMB->req.hdr.Flags2 |= SMBFLG2_ERR_STATUS;
+		capabilities |= CAP_STATUS32;
+	}
+	if (ses->capabilities & CAP_DFS) {
+		pSMB->req.hdr.Flags2 |= SMBFLG2_DFS;
+		capabilities |= CAP_DFS;
+	}
+	if (ses->capabilities & CAP_UNIX) {
+		capabilities |= CAP_UNIX;
+	}
+
+	/* BB check whether to init vcnum BB */
+	return capabilities;
+}
+
+static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
+			    const struct nls_table * nls_cp)
+{
+	char * bcc_ptr = *pbcc_area;
+	int bytes_ret = 0;
+
+	/* BB FIXME add check that strings total less
+	than 335 or will need to send them as arrays */
+
+	/* unicode strings, must be word aligned before the call */
+/*	if ((long) bcc_ptr % 2)	{
+		*bcc_ptr = 0;
+		bcc_ptr++;
+	} */
+	/* copy user */
+	if(ses->userName == NULL) {
+		/* BB what about null user mounts - check that we do this BB */
+	} else { /* 300 should be long enough for any conceivable user name */
+		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName,
+					  300, nls_cp);
+	}
+	bcc_ptr += 2 * bytes_ret;
+	bcc_ptr += 2; /* account for null termination */
+	/* copy domain */
+	if(ses->domainName == NULL)
+		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr,
+					  "CIFS_LINUX_DOM", 32, nls_cp);
+	else
+		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName, 
+					  256, nls_cp);
+	bcc_ptr += 2 * bytes_ret;
+	bcc_ptr += 2;  /* account for null terminator */
+
+	/* Copy OS version */
+	bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
+				  nls_cp);
+	bcc_ptr += 2 * bytes_ret;
+	bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release,
+				  32, nls_cp);
+	bcc_ptr += 2 * bytes_ret;
+	bcc_ptr += 2; /* trailing null */
+
+	bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
+                                  32, nls_cp);
+	bcc_ptr += 2 * bytes_ret;
+	bcc_ptr += 2; /* trailing null */
+
+	*pbcc_area = bcc_ptr;
+}
+
+static void ascii_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
+			  const struct nls_table * nls_cp)
+{
+	char * bcc_ptr = *pbcc_area;
+
+	/* copy user */
+	/* BB what about null user mounts - check that we do this BB */
+        /* copy user */
+        if(ses->userName == NULL) {
+                /* BB what about null user mounts - check that we do this BB */
+        } else { /* 300 should be long enough for any conceivable user name */
+                strncpy(bcc_ptr, ses->userName, 300);
+        }
+	/* BB improve check for overflow */
+        bcc_ptr += strnlen(ses->userName, 300);
+	*bcc_ptr = 0;
+        bcc_ptr++; /* account for null termination */
+
+        /* copy domain */
+	
+        if(ses->domainName == NULL) {
+                strcpy(bcc_ptr, "CIFS_LINUX_DOM");
+		bcc_ptr += 14;  /* strlen(CIFS_LINUX_DOM) */
+ 	} else {
+                strncpy(bcc_ptr, ses->domainName, 256); 
+		bcc_ptr += strnlen(ses->domainName, 256);
+	}
+	*bcc_ptr = 0;
+	bcc_ptr++;
+
+	/* BB check for overflow here */
+
+	strcpy(bcc_ptr, "Linux version ");
+	bcc_ptr += strlen("Linux version ");
+	strcpy(bcc_ptr, system_utsname.release);
+	bcc_ptr += strlen(system_utsname.release) + 1;
+
+	strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
+	bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
+
+        *pbcc_area = bcc_ptr;
+}
+
+static int decode_unicode_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo *ses,
+                            const struct nls_table * nls_cp)
+{
+	int rc = 0;
+	int words_left, len;
+	char * data = *pbcc_area;
+
+
+
+	cFYI(1,("bleft %d",bleft));
+
+
+	/* word align, if bytes remaining is not even */
+	if(bleft % 2) {
+		bleft--;
+		data++;
+	}
+	words_left = bleft / 2;
+
+	/* save off server operating system */
+	len = UniStrnlen((wchar_t *) data, words_left);
+
+/* We look for obvious messed up bcc or strings in response so we do not go off
+   the end since (at least) WIN2K and Windows XP have a major bug in not null
+   terminating last Unicode string in response  */
+	if(len >= words_left)
+		return rc;
+
+	if(ses->serverOS)
+		kfree(ses->serverOS);
+	/* UTF-8 string will not grow more than four times as big as UCS-16 */
+	ses->serverOS = kzalloc(4 * len, GFP_KERNEL);
+	if(ses->serverOS != NULL) {
+		cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len,
+				   nls_cp);
+	}
+	data += 2 * (len + 1);
+	words_left -= len + 1;
+
+	/* save off server network operating system */
+	len = UniStrnlen((wchar_t *) data, words_left);
+
+	if(len >= words_left)
+		return rc;
+
+	if(ses->serverNOS)
+		kfree(ses->serverNOS);
+	ses->serverNOS = kzalloc(4 * len, GFP_KERNEL); /* BB this is wrong length FIXME BB */
+	if(ses->serverNOS != NULL) {
+		cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len,
+				   nls_cp);
+		if(strncmp(ses->serverNOS, "NT LAN Manager 4",16) == 0) {
+			cFYI(1,("NT4 server"));
+			ses->flags |= CIFS_SES_NT4;
+		}
+	}
+	data += 2 * (len + 1);
+	words_left -= len + 1;
+
+        /* save off server domain */
+        len = UniStrnlen((wchar_t *) data, words_left);
+
+        if(len > words_left)
+                return rc;
+
+        if(ses->serverDomain)
+                kfree(ses->serverDomain);
+        ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
+        if(ses->serverDomain != NULL) {
+                cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
+                                   nls_cp);
+                ses->serverDomain[2*len] = 0;
+                ses->serverDomain[(2*len) + 1] = 0;
+        }
+        data += 2 * (len + 1);
+        words_left -= len + 1;
+	
+	cFYI(1,("words left: %d",words_left));
+
+	return rc;
+}
+
+static int decode_ascii_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo *ses,
+                            const struct nls_table * nls_cp)
+{
+	int rc = 0;
+	int len;
+	char * bcc_ptr = *pbcc_area;
+
+	cFYI(1,("decode sessetup ascii. bleft %d", bleft));
+	
+	len = strnlen(bcc_ptr, bleft);
+	if(len >= bleft)
+		return rc;
+	
+	if(ses->serverOS)
+		kfree(ses->serverOS);
+
+	ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
+	if(ses->serverOS)
+		strncpy(ses->serverOS, bcc_ptr, len);
+
+	bcc_ptr += len + 1;
+	bleft -= len + 1;
+
+	len = strnlen(bcc_ptr, bleft);
+	if(len >= bleft)
+		return rc;
+
+	if(ses->serverNOS)
+		kfree(ses->serverNOS);
+
+	ses->serverNOS = kzalloc(len + 1, GFP_KERNEL);
+	if(ses->serverNOS)
+		strncpy(ses->serverNOS, bcc_ptr, len);
+
+	bcc_ptr += len + 1;
+	bleft -= len + 1;
+
+        len = strnlen(bcc_ptr, bleft);
+        if(len > bleft)
+                return rc;
+
+        if(ses->serverDomain)
+                kfree(ses->serverDomain);
+
+        ses->serverDomain = kzalloc(len + 1, GFP_KERNEL);
+        if(ses->serverOS)
+                strncpy(ses->serverOS, bcc_ptr, len);
+
+        bcc_ptr += len + 1;
+	bleft -= len + 1;
+
+	cFYI(1,("ascii: bytes left %d",bleft));
+
+	return rc;
+}
+
+int 
+CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
+		const struct nls_table *nls_cp)
+{
+	int rc = 0;
+	int wct;
+	struct smb_hdr *smb_buf;
+	char *bcc_ptr;
+	char *str_area;
+	SESSION_SETUP_ANDX *pSMB;
+	__u32 capabilities;
+	int count;
+	int resp_buf_type = 0;
+	struct kvec iov[2];
+	enum securityEnum type;
+	__u16 action;
+	int bytes_remaining;
+
+	if(ses == NULL)
+		return -EINVAL;
+
+	type = ses->server->secType;
+
+	cFYI(1,("sess setup type %d",type));
+	if(type == LANMAN) {
+#ifndef CONFIG_CIFS_WEAK_PW_HASH
+		/* LANMAN and plaintext are less secure and off by default.
+		So we make this explicitly be turned on in kconfig (in the
+		build) and turned on at runtime (changed from the default)
+		in proc/fs/cifs or via mount parm.  Unfortunately this is
+		needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
+		return -EOPNOTSUPP;
+#endif
+		wct = 10; /* lanman 2 style sessionsetup */
+	} else if((type == NTLM) || (type == NTLMv2)) { 
+		/* For NTLMv2 failures eventually may need to retry NTLM */
+		wct = 13; /* old style NTLM sessionsetup */
+	} else /* same size for negotiate or auth, NTLMSSP or extended security */
+		wct = 12;
+
+	rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses,
+			    (void **)&smb_buf);
+	if(rc)
+		return rc;
+
+	pSMB = (SESSION_SETUP_ANDX *)smb_buf;
+
+	capabilities = cifs_ssetup_hdr(ses, pSMB);
+
+	/* we will send the SMB in two pieces,
+	a fixed length beginning part, and a
+	second part which will include the strings
+	and rest of bcc area, in order to avoid having
+	to do a large buffer 17K allocation */
+        iov[0].iov_base = (char *)pSMB;
+        iov[0].iov_len = smb_buf->smb_buf_length + 4;
+
+	/* 2000 big enough to fit max user, domain, NOS name etc. */
+	str_area = kmalloc(2000, GFP_KERNEL);
+	bcc_ptr = str_area;
+
+	if(type == LANMAN) {
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+		char lnm_session_key[CIFS_SESS_KEY_SIZE];
+
+		/* no capabilities flags in old lanman negotiation */
+
+		pSMB->old_req.PasswordLength = CIFS_SESS_KEY_SIZE; 
+		/* BB calculate hash with password */
+		/* and copy into bcc */
+
+		calc_lanman_hash(ses, lnm_session_key);
+
+/* #ifdef CONFIG_CIFS_DEBUG2
+		cifs_dump_mem("cryptkey: ",ses->server->cryptKey,
+			CIFS_SESS_KEY_SIZE);
+#endif */
+		memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE);
+		bcc_ptr += CIFS_SESS_KEY_SIZE;
+
+		/* can not sign if LANMAN negotiated so no need
+		to calculate signing key? but what if server
+		changed to do higher than lanman dialect and
+		we reconnected would we ever calc signing_key? */
+
+		cFYI(1,("Negotiating LANMAN setting up strings"));
+		/* Unicode not allowed for LANMAN dialects */
+		ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
+#endif    
+	} else if (type == NTLM) {
+		char ntlm_session_key[CIFS_SESS_KEY_SIZE];
+
+		pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
+		pSMB->req_no_secext.CaseInsensitivePasswordLength =
+			cpu_to_le16(CIFS_SESS_KEY_SIZE);
+		pSMB->req_no_secext.CaseSensitivePasswordLength =
+			cpu_to_le16(CIFS_SESS_KEY_SIZE);
+	
+		/* calculate session key */
+		SMBNTencrypt(ses->password, ses->server->cryptKey,
+			     ntlm_session_key);
+
+		if(first_time) /* should this be moved into common code 
+				  with similar ntlmv2 path? */
+			cifs_calculate_mac_key(ses->server->mac_signing_key,
+				ntlm_session_key, ses->password);
+		/* copy session key */
+
+		memcpy(bcc_ptr, (char *)ntlm_session_key,CIFS_SESS_KEY_SIZE);
+		bcc_ptr += CIFS_SESS_KEY_SIZE;
+		memcpy(bcc_ptr, (char *)ntlm_session_key,CIFS_SESS_KEY_SIZE);
+		bcc_ptr += CIFS_SESS_KEY_SIZE;
+		if(ses->capabilities & CAP_UNICODE) {
+			/* unicode strings must be word aligned */
+			if (iov[0].iov_len % 2) {
+				*bcc_ptr = 0;
+				bcc_ptr++;		
+			}	
+			unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
+		} else
+			ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
+	} else if (type == NTLMv2) {
+		char * v2_sess_key = 
+			kmalloc(sizeof(struct ntlmv2_resp), GFP_KERNEL);
+
+		/* BB FIXME change all users of v2_sess_key to
+		   struct ntlmv2_resp */
+
+		if(v2_sess_key == NULL) {
+			cifs_small_buf_release(smb_buf);
+			return -ENOMEM;
+		}
+
+		pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
+
+		/* LM2 password would be here if we supported it */
+		pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
+		/*	cpu_to_le16(LM2_SESS_KEY_SIZE); */
+
+		pSMB->req_no_secext.CaseSensitivePasswordLength =
+			cpu_to_le16(sizeof(struct ntlmv2_resp));
+
+		/* calculate session key */
+		setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
+		if(first_time) /* should this be moved into common code
+			          with similar ntlmv2 path? */
+		/*   cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
+				response BB FIXME, v2_sess_key); */
+
+		/* copy session key */
+
+	/*	memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
+		bcc_ptr += LM2_SESS_KEY_SIZE; */
+		memcpy(bcc_ptr, (char *)v2_sess_key, sizeof(struct ntlmv2_resp));
+		bcc_ptr += sizeof(struct ntlmv2_resp);
+		kfree(v2_sess_key);
+		if(ses->capabilities & CAP_UNICODE) {
+			if(iov[0].iov_len % 2) {
+				*bcc_ptr = 0;
+			}	bcc_ptr++;
+			unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
+		} else
+			ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
+	} else /* NTLMSSP or SPNEGO */ {
+		pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
+		capabilities |= CAP_EXTENDED_SECURITY;
+		pSMB->req.Capabilities = cpu_to_le32(capabilities);
+		/* BB set password lengths */
+	}
+
+	count = (long) bcc_ptr - (long) str_area;
+	smb_buf->smb_buf_length += count;
+
+	BCC_LE(smb_buf) = cpu_to_le16(count);
+
+	iov[1].iov_base = str_area;
+	iov[1].iov_len = count; 
+	rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type, 0);
+	/* SMB request buf freed in SendReceive2 */
+
+	cFYI(1,("ssetup rc from sendrecv2 is %d",rc));
+	if(rc)
+		goto ssetup_exit;
+
+	pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
+	smb_buf = (struct smb_hdr *)iov[0].iov_base;
+
+	if((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) {
+		rc = -EIO;
+		cERROR(1,("bad word count %d", smb_buf->WordCount));
+		goto ssetup_exit;
+	}
+	action = le16_to_cpu(pSMB->resp.Action);
+	if (action & GUEST_LOGIN)
+		cFYI(1, ("Guest login")); /* BB mark SesInfo struct? */
+	ses->Suid = smb_buf->Uid;   /* UID left in wire format (le) */
+	cFYI(1, ("UID = %d ", ses->Suid));
+	/* response can have either 3 or 4 word count - Samba sends 3 */
+	/* and lanman response is 3 */
+	bytes_remaining = BCC(smb_buf);
+	bcc_ptr = pByteArea(smb_buf);
+
+	if(smb_buf->WordCount == 4) {
+		__u16 blob_len;
+		blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
+		bcc_ptr += blob_len;
+		if(blob_len > bytes_remaining) {
+			cERROR(1,("bad security blob length %d", blob_len));
+			rc = -EINVAL;
+			goto ssetup_exit;
+		}
+		bytes_remaining -= blob_len;
+	}	
+
+	/* BB check if Unicode and decode strings */
+	if(smb_buf->Flags2 & SMBFLG2_UNICODE)
+		rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining,
+						   ses, nls_cp);
+	else
+		rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,nls_cp);
+	
+ssetup_exit:
+	kfree(str_area);
+	if(resp_buf_type == CIFS_SMALL_BUFFER) {
+		cFYI(1,("ssetup freeing small buf %p", iov[0].iov_base));
+		cifs_small_buf_release(iov[0].iov_base);
+	} else if(resp_buf_type == CIFS_LARGE_BUFFER)
+		cifs_buf_release(iov[0].iov_base);
+
+	return rc;
+}
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 6103bcdfb16d..f518c5e45035 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -30,6 +30,7 @@
 #include <linux/random.h>
 #include "cifs_unicode.h"
 #include "cifspdu.h"
+#include "cifsglob.h"
 #include "md5.h"
 #include "cifs_debug.h"
 #include "cifsencrypt.h"
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 3da80409466c..17ba329e2b3d 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -654,8 +654,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 
 	if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
 		up(&ses->server->tcpSem);
-		cERROR(1,
-		       ("Illegal length, greater than maximum frame, %d ",
+		cERROR(1, ("Illegal length, greater than maximum frame, %d",
 			in_buf->smb_buf_length));
 		DeleteMidQEntry(midQ);
 		/* If not lock req, update # of requests on wire to server */
diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c
index b35e5bbd9c99..76e00a65a75b 100644
--- a/fs/coda/symlink.c
+++ b/fs/coda/symlink.c
@@ -50,6 +50,6 @@ fail:
 	return error;
 }
 
-struct address_space_operations coda_symlink_aops = {
+const struct address_space_operations coda_symlink_aops = {
 	.readpage	= coda_symlink_filler,
 };
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index c153bd9534cb..e14488ca6411 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -38,7 +38,7 @@
 
 extern struct super_block * configfs_sb;
 
-static struct address_space_operations configfs_aops = {
+static const struct address_space_operations configfs_aops = {
 	.readpage	= simple_readpage,
 	.prepare_write	= simple_prepare_write,
 	.commit_write	= simple_commit_write
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index c45d73860803..223c0431042d 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -30,7 +30,7 @@
 static struct super_operations cramfs_ops;
 static struct inode_operations cramfs_dir_inode_operations;
 static const struct file_operations cramfs_directory_operations;
-static struct address_space_operations cramfs_aops;
+static const struct address_space_operations cramfs_aops;
 
 static DEFINE_MUTEX(read_mutex);
 
@@ -501,7 +501,7 @@ static int cramfs_readpage(struct file *file, struct page * page)
 	return 0;
 }
 
-static struct address_space_operations cramfs_aops = {
+static const struct address_space_operations cramfs_aops = {
 	.readpage = cramfs_readpage
 };
 
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 180607f9314d..174696f9bf14 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -21,7 +21,7 @@ static sector_t _efs_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,efs_get_block);
 }
-static struct address_space_operations efs_aops = {
+static const struct address_space_operations efs_aops = {
 	.readpage = efs_readpage,
 	.sync_page = block_sync_page,
 	.bmap = _efs_bmap
diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c
index 3d9a350e3e7f..e249cf733a6b 100644
--- a/fs/efs/symlink.c
+++ b/fs/efs/symlink.c
@@ -53,6 +53,6 @@ fail:
 	return err;
 }
 
-struct address_space_operations efs_symlink_aops = {
+const struct address_space_operations efs_symlink_aops = {
 	.readpage	= efs_symlink_readpage
 };
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 9f74a62be555..e65a019fc7a5 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -162,9 +162,9 @@ extern const struct file_operations ext2_file_operations;
 extern const struct file_operations ext2_xip_file_operations;
 
 /* inode.c */
-extern struct address_space_operations ext2_aops;
-extern struct address_space_operations ext2_aops_xip;
-extern struct address_space_operations ext2_nobh_aops;
+extern const struct address_space_operations ext2_aops;
+extern const struct address_space_operations ext2_aops_xip;
+extern const struct address_space_operations ext2_nobh_aops;
 
 /* namei.c */
 extern struct inode_operations ext2_dir_inode_operations;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 04af9c45dce2..fb4d3220eb8d 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -684,7 +684,7 @@ ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	return mpage_writepages(mapping, wbc, ext2_get_block);
 }
 
-struct address_space_operations ext2_aops = {
+const struct address_space_operations ext2_aops = {
 	.readpage		= ext2_readpage,
 	.readpages		= ext2_readpages,
 	.writepage		= ext2_writepage,
@@ -697,12 +697,12 @@ struct address_space_operations ext2_aops = {
 	.migratepage		= buffer_migrate_page,
 };
 
-struct address_space_operations ext2_aops_xip = {
+const struct address_space_operations ext2_aops_xip = {
 	.bmap			= ext2_bmap,
 	.get_xip_page		= ext2_get_xip_page,
 };
 
-struct address_space_operations ext2_nobh_aops = {
+const struct address_space_operations ext2_nobh_aops = {
 	.readpage		= ext2_readpage,
 	.readpages		= ext2_readpages,
 	.writepage		= ext2_nobh_writepage,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0321e1b9034a..f804d5e9d60c 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1698,7 +1698,7 @@ static int ext3_journalled_set_page_dirty(struct page *page)
 	return __set_page_dirty_nobuffers(page);
 }
 
-static struct address_space_operations ext3_ordered_aops = {
+static const struct address_space_operations ext3_ordered_aops = {
 	.readpage	= ext3_readpage,
 	.readpages	= ext3_readpages,
 	.writepage	= ext3_ordered_writepage,
@@ -1712,7 +1712,7 @@ static struct address_space_operations ext3_ordered_aops = {
 	.migratepage	= buffer_migrate_page,
 };
 
-static struct address_space_operations ext3_writeback_aops = {
+static const struct address_space_operations ext3_writeback_aops = {
 	.readpage	= ext3_readpage,
 	.readpages	= ext3_readpages,
 	.writepage	= ext3_writeback_writepage,
@@ -1726,7 +1726,7 @@ static struct address_space_operations ext3_writeback_aops = {
 	.migratepage	= buffer_migrate_page,
 };
 
-static struct address_space_operations ext3_journalled_aops = {
+static const struct address_space_operations ext3_journalled_aops = {
 	.readpage	= ext3_readpage,
 	.readpages	= ext3_readpages,
 	.writepage	= ext3_journalled_writepage,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 7c35d582ec10..31b7174176ba 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -196,7 +196,7 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, fat_get_block);
 }
 
-static struct address_space_operations fat_aops = {
+static const struct address_space_operations fat_aops = {
 	.readpage	= fat_readpage,
 	.readpages	= fat_readpages,
 	.writepage	= fat_writepage,
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index 6f5df1700e95..4e25f3fbed86 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -56,7 +56,7 @@ struct inode_operations vxfs_immed_symlink_iops = {
 /*
  * Adress space operations for immed files and directories.
  */
-struct address_space_operations vxfs_immed_aops = {
+const struct address_space_operations vxfs_immed_aops = {
 	.readpage =		vxfs_immed_readpage,
 };
 
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index f544aae9169f..ca6a39714771 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -41,8 +41,8 @@
 #include "vxfs_extern.h"
 
 
-extern struct address_space_operations vxfs_aops;
-extern struct address_space_operations vxfs_immed_aops;
+extern const struct address_space_operations vxfs_aops;
+extern const struct address_space_operations vxfs_immed_aops;
 
 extern struct inode_operations vxfs_immed_symlink_iops;
 
@@ -295,7 +295,7 @@ vxfs_read_inode(struct inode *ip)
 {
 	struct super_block		*sbp = ip->i_sb;
 	struct vxfs_inode_info		*vip;
-	struct address_space_operations	*aops;
+	const struct address_space_operations	*aops;
 	ino_t				ino = ip->i_ino;
 
 	if (!(vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_ilist)))
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index c1be118fc067..decac62efe57 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -42,7 +42,7 @@
 static int		vxfs_readpage(struct file *, struct page *);
 static sector_t		vxfs_bmap(struct address_space *, sector_t);
 
-struct address_space_operations vxfs_aops = {
+const struct address_space_operations vxfs_aops = {
 	.readpage =		vxfs_readpage,
 	.bmap =			vxfs_bmap,
 	.sync_page =		block_sync_page,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 28aa81eae2cc..63614ed16336 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -770,7 +770,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
 	/* no mmap and sendfile */
 };
 
-static struct address_space_operations fuse_file_aops  = {
+static const struct address_space_operations fuse_file_aops  = {
 	.readpage	= fuse_readpage,
 	.prepare_write	= fuse_prepare_write,
 	.commit_write	= fuse_commit_write,
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 3ed8663a8db1..735332dfd1b8 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -182,8 +182,8 @@ extern void hfs_file_truncate(struct inode *);
 extern int hfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 /* inode.c */
-extern struct address_space_operations hfs_aops;
-extern struct address_space_operations hfs_btree_aops;
+extern const struct address_space_operations hfs_aops;
+extern const struct address_space_operations hfs_btree_aops;
 
 extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int);
 extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2d4ced22201b..315cf44a90b2 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -114,7 +114,7 @@ static int hfs_writepages(struct address_space *mapping,
 	return mpage_writepages(mapping, wbc, hfs_get_block);
 }
 
-struct address_space_operations hfs_btree_aops = {
+const struct address_space_operations hfs_btree_aops = {
 	.readpage	= hfs_readpage,
 	.writepage	= hfs_writepage,
 	.sync_page	= block_sync_page,
@@ -124,7 +124,7 @@ struct address_space_operations hfs_btree_aops = {
 	.releasepage	= hfs_releasepage,
 };
 
-struct address_space_operations hfs_aops = {
+const struct address_space_operations hfs_aops = {
 	.readpage	= hfs_readpage,
 	.writepage	= hfs_writepage,
 	.sync_page	= block_sync_page,
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 7ae393637a0c..8a1ca5ef7ada 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -323,8 +323,8 @@ int hfsplus_file_extend(struct inode *);
 void hfsplus_file_truncate(struct inode *);
 
 /* inode.c */
-extern struct address_space_operations hfsplus_aops;
-extern struct address_space_operations hfsplus_btree_aops;
+extern const struct address_space_operations hfsplus_aops;
+extern const struct address_space_operations hfsplus_btree_aops;
 
 void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *);
 void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index acf66dba3e01..924ecdef8091 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -109,7 +109,7 @@ static int hfsplus_writepages(struct address_space *mapping,
 	return mpage_writepages(mapping, wbc, hfsplus_get_block);
 }
 
-struct address_space_operations hfsplus_btree_aops = {
+const struct address_space_operations hfsplus_btree_aops = {
 	.readpage	= hfsplus_readpage,
 	.writepage	= hfsplus_writepage,
 	.sync_page	= block_sync_page,
@@ -119,7 +119,7 @@ struct address_space_operations hfsplus_btree_aops = {
 	.releasepage	= hfsplus_releasepage,
 };
 
-struct address_space_operations hfsplus_aops = {
+const struct address_space_operations hfsplus_aops = {
 	.readpage	= hfsplus_readpage,
 	.writepage	= hfsplus_writepage,
 	.sync_page	= block_sync_page,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 8e0d37743e7c..b82e3d9c8790 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -54,7 +54,7 @@ static int append = 0;
 
 static struct inode_operations hostfs_iops;
 static struct inode_operations hostfs_dir_iops;
-static struct address_space_operations hostfs_link_aops;
+static const struct address_space_operations hostfs_link_aops;
 
 #ifndef MODULE
 static int __init hostfs_args(char *options, int *add)
@@ -518,7 +518,7 @@ int hostfs_commit_write(struct file *file, struct page *page, unsigned from,
 	return(err);
 }
 
-static struct address_space_operations hostfs_aops = {
+static const struct address_space_operations hostfs_aops = {
 	.writepage 	= hostfs_writepage,
 	.readpage	= hostfs_readpage,
 	.set_page_dirty = __set_page_dirty_nobuffers,
@@ -935,7 +935,7 @@ int hostfs_link_readpage(struct file *file, struct page *page)
 	return(err);
 }
 
-static struct address_space_operations hostfs_link_aops = {
+static const struct address_space_operations hostfs_link_aops = {
 	.readpage	= hostfs_link_readpage,
 };
 
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index d3b9fffe45a1..d9eb19b7b8ae 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -99,7 +99,7 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,hpfs_get_block);
 }
-struct address_space_operations hpfs_aops = {
+const struct address_space_operations hpfs_aops = {
 	.readpage = hpfs_readpage,
 	.writepage = hpfs_writepage,
 	.sync_page = block_sync_page,
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 29b7a3e55173..f687d54ed442 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -268,7 +268,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int);
 int hpfs_file_fsync(struct file *, struct dentry *, int);
 extern const struct file_operations hpfs_file_ops;
 extern struct inode_operations hpfs_file_iops;
-extern struct address_space_operations hpfs_aops;
+extern const struct address_space_operations hpfs_aops;
 
 /* inode.c */
 
@@ -304,7 +304,7 @@ void hpfs_decide_conv(struct inode *, unsigned char *, unsigned);
 /* namei.c */
 
 extern struct inode_operations hpfs_dir_iops;
-extern struct address_space_operations hpfs_symlink_aops;
+extern const struct address_space_operations hpfs_symlink_aops;
 
 static inline struct hpfs_inode_info *hpfs_i(struct inode *inode)
 {
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index a03abb12c610..59e7dc182a0c 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -538,7 +538,7 @@ fail:
 	return err;
 }
 
-struct address_space_operations hpfs_symlink_aops = {
+const struct address_space_operations hpfs_symlink_aops = {
 	.readpage	= hpfs_symlink_readpage
 };
 	
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e6410d8edd0e..6449cb697967 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -34,7 +34,7 @@
 #define HUGETLBFS_MAGIC	0x958458f6
 
 static struct super_operations hugetlbfs_ops;
-static struct address_space_operations hugetlbfs_aops;
+static const struct address_space_operations hugetlbfs_aops;
 const struct file_operations hugetlbfs_file_operations;
 static struct inode_operations hugetlbfs_dir_inode_operations;
 static struct inode_operations hugetlbfs_inode_operations;
@@ -547,7 +547,7 @@ static void hugetlbfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
 }
 
-static struct address_space_operations hugetlbfs_aops = {
+static const struct address_space_operations hugetlbfs_aops = {
 	.readpage	= hugetlbfs_readpage,
 	.prepare_write	= hugetlbfs_prepare_write,
 	.commit_write	= hugetlbfs_commit_write,
diff --git a/fs/inode.c b/fs/inode.c
index 3a2446a27d2c..f42961eb983b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -102,7 +102,7 @@ static kmem_cache_t * inode_cachep __read_mostly;
 
 static struct inode *alloc_inode(struct super_block *sb)
 {
-	static struct address_space_operations empty_aops;
+	static const struct address_space_operations empty_aops;
 	static struct inode_operations empty_iops;
 	static const struct file_operations empty_fops;
 	struct inode *inode;
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 4917315db732..3a39158cca96 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -312,7 +312,7 @@ eio:
 	return err;
 }
 
-struct address_space_operations zisofs_aops = {
+const struct address_space_operations zisofs_aops = {
 	.readpage = zisofs_readpage,
 	/* No sync_page operation supported? */
 	/* No bmap operation supported */
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 3f9c8ba1fa1f..bb11c7fb4019 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1054,7 +1054,7 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping,block,isofs_get_block);
 }
 
-static struct address_space_operations isofs_aops = {
+static const struct address_space_operations isofs_aops = {
 	.readpage = isofs_readpage,
 	.sync_page = block_sync_page,
 	.bmap = _isofs_bmap
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index b87ba066f5e7..e6308c8b5735 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -176,5 +176,5 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de,
 
 extern struct inode_operations isofs_dir_inode_operations;
 extern const struct file_operations isofs_dir_operations;
-extern struct address_space_operations isofs_symlink_aops;
+extern const struct address_space_operations isofs_symlink_aops;
 extern struct export_operations isofs_export_ops;
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 4326cb47f8fa..f3a1db3098de 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -754,6 +754,6 @@ error:
 	return -EIO;
 }
 
-struct address_space_operations isofs_symlink_aops = {
+const struct address_space_operations isofs_symlink_aops = {
 	.readpage = rock_ridge_symlink_readpage
 };
diff --git a/fs/isofs/zisofs.h b/fs/isofs/zisofs.h
index d78485d101c2..273795709155 100644
--- a/fs/isofs/zisofs.h
+++ b/fs/isofs/zisofs.h
@@ -15,7 +15,7 @@
  */
 
 #ifdef CONFIG_ZISOFS
-extern struct address_space_operations zisofs_aops;
+extern const struct address_space_operations zisofs_aops;
 extern int __init zisofs_init(void);
 extern void zisofs_cleanup(void);
 #endif
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 7f96b5cb6781..8c9b28dff119 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -34,6 +34,7 @@
 #include <linux/suspend.h>
 #include <linux/pagemap.h>
 #include <linux/kthread.h>
+#include <linux/poison.h>
 #include <linux/proc_fs.h>
 
 #include <asm/uaccess.h>
@@ -1675,7 +1676,7 @@ static void journal_free_journal_head(struct journal_head *jh)
 {
 #ifdef CONFIG_JBD_DEBUG
 	atomic_dec(&nr_journal_heads);
-	memset(jh, 0x5b, sizeof(*jh));
+	memset(jh, JBD_POISON_FREE, sizeof(*jh));
 #endif
 	kmem_cache_free(journal_head_cache, jh);
 }
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 9e46ea6da752..93068697a9bf 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -59,7 +59,7 @@ static const struct file_operations jffs_file_operations;
 static struct inode_operations jffs_file_inode_operations;
 static const struct file_operations jffs_dir_operations;
 static struct inode_operations jffs_dir_inode_operations;
-static struct address_space_operations jffs_address_operations;
+static const struct address_space_operations jffs_address_operations;
 
 kmem_cache_t     *node_cache = NULL;
 kmem_cache_t     *fm_cache = NULL;
@@ -1614,7 +1614,7 @@ jffs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 } /* jffs_ioctl()  */
 
 
-static struct address_space_operations jffs_address_operations = {
+static const struct address_space_operations jffs_address_operations = {
 	.readpage	= jffs_readpage,
 	.prepare_write	= jffs_prepare_write,
 	.commit_write	= jffs_commit_write,
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 320dd48b834e..9c2077e7e081 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -267,6 +267,8 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	}
 
 	rc = do_jffs2_setxattr(inode, xprefix, "", value, size, 0);
+	if (!value && rc == -ENODATA)
+		rc = 0;
 	if (value)
 		kfree(value);
 	if (!rc) {
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index b8886f048eaa..ad0121088dde 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -225,7 +225,6 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
 			   at the end of the linked list. Stash it and continue
 			   from the beginning of the list */
 			ic = (struct jffs2_inode_cache *)(*prev);
-			BUG_ON(ic->class != RAWNODE_CLASS_INODE_CACHE);
 			prev = &ic->nodes;
 			continue;
 		}
@@ -249,7 +248,8 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
 
 	/* PARANOIA */
 	if (!ic) {
-		printk(KERN_WARNING "inode_cache not found in remove_node_refs()!!\n");
+		JFFS2_WARNING("inode_cache/xattr_datum/xattr_ref"
+			      " not found in remove_node_refs()!!\n");
 		return;
 	}
 
@@ -274,8 +274,19 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
 		printk("\n");
 	});
 
-	if (ic->nodes == (void *)ic && ic->nlink == 0)
-		jffs2_del_ino_cache(c, ic);
+	switch (ic->class) {
+#ifdef CONFIG_JFFS2_FS_XATTR
+		case RAWNODE_CLASS_XATTR_DATUM:
+			jffs2_release_xattr_datum(c, (struct jffs2_xattr_datum *)ic);
+			break;
+		case RAWNODE_CLASS_XATTR_REF:
+			jffs2_release_xattr_ref(c, (struct jffs2_xattr_ref *)ic);
+			break;
+#endif
+		default:
+			if (ic->nodes == (void *)ic && ic->nlink == 0)
+				jffs2_del_ino_cache(c, ic);
+	}
 }
 
 void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index bb8844f40e48..3ed6e3e120b6 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -62,7 +62,7 @@ struct inode_operations jffs2_file_inode_operations =
 	.removexattr =	jffs2_removexattr
 };
 
-struct address_space_operations jffs2_file_address_operations =
+const struct address_space_operations jffs2_file_address_operations =
 {
 	.readpage =	jffs2_readpage,
 	.prepare_write =jffs2_prepare_write,
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 2900ec3ec3af..97caa77d60cf 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -227,8 +227,6 @@ void jffs2_clear_inode (struct inode *inode)
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 
 	D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode));
-
-	jffs2_xattr_delete_inode(c, f->inocache);
 	jffs2_do_clear_inode(c, f);
 }
 
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 477c526d638b..daff3341ff92 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -165,6 +165,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
 			D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
 				  ic->ino));
 			spin_unlock(&c->inocache_lock);
+			jffs2_xattr_delete_inode(c, ic);
 			continue;
 		}
 		switch(ic->state) {
@@ -275,13 +276,12 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
 	 * We can decide whether this node is inode or xattr by ic->class.     */
 	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
 	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
-		BUG_ON(raw->next_in_ino != (void *)ic);
 		spin_unlock(&c->erase_completion_lock);
 
 		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
-			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic);
+			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
 		} else {
-			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic);
+			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
 		}
 		goto release_sem;
 	}
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 935fec1b1201..b98594992eed 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -119,8 +119,11 @@ struct jffs2_sb_info {
 #ifdef CONFIG_JFFS2_FS_XATTR
 #define XATTRINDEX_HASHSIZE	(57)
 	uint32_t highest_xid;
+	uint32_t highest_xseqno;
 	struct list_head xattrindex[XATTRINDEX_HASHSIZE];
 	struct list_head xattr_unchecked;
+	struct list_head xattr_dead_list;
+	struct jffs2_xattr_ref *xref_dead_list;
 	struct jffs2_xattr_ref *xref_temp;
 	struct rw_semaphore xattr_sem;
 	uint32_t xdatum_mem_usage;
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c
index 4889d0700c0e..8310c95478e9 100644
--- a/fs/jffs2/malloc.c
+++ b/fs/jffs2/malloc.c
@@ -291,6 +291,7 @@ struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void)
 
 	memset(xd, 0, sizeof(struct jffs2_xattr_datum));
 	xd->class = RAWNODE_CLASS_XATTR_DATUM;
+	xd->node = (void *)xd;
 	INIT_LIST_HEAD(&xd->xindex);
 	return xd;
 }
@@ -309,6 +310,7 @@ struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void)
 
 	memset(ref, 0, sizeof(struct jffs2_xattr_ref));
 	ref->class = RAWNODE_CLASS_XATTR_REF;
+	ref->node = (void *)ref;
 	return ref;
 }
 
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 927dfe42ba76..7675b33396c7 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -906,6 +906,9 @@ void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old)
 {
 	struct jffs2_inode_cache **prev;
 
+#ifdef CONFIG_JFFS2_FS_XATTR
+	BUG_ON(old->xref);
+#endif
 	dbg_inocache("del %p (ino #%u)\n", old, old->ino);
 	spin_lock(&c->inocache_lock);
 
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index ac0c350ed7d7..d88376992ed9 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -683,19 +683,26 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
 		spin_lock(&c->erase_completion_lock);
 
 		ic = jffs2_raw_ref_to_ic(ref);
-		/* It seems we should never call jffs2_mark_node_obsolete() for
-		   XATTR nodes.... yet. Make sure we notice if/when we change
-		   that :) */
-		BUG_ON(ic->class != RAWNODE_CLASS_INODE_CACHE);
 		for (p = &ic->nodes; (*p) != ref; p = &((*p)->next_in_ino))
 			;
 
 		*p = ref->next_in_ino;
 		ref->next_in_ino = NULL;
 
-		if (ic->nodes == (void *)ic && ic->nlink == 0)
-			jffs2_del_ino_cache(c, ic);
-
+		switch (ic->class) {
+#ifdef CONFIG_JFFS2_FS_XATTR
+			case RAWNODE_CLASS_XATTR_DATUM:
+				jffs2_release_xattr_datum(c, (struct jffs2_xattr_datum *)ic);
+				break;
+			case RAWNODE_CLASS_XATTR_REF:
+				jffs2_release_xattr_ref(c, (struct jffs2_xattr_ref *)ic);
+				break;
+#endif
+			default:
+				if (ic->nodes == (void *)ic && ic->nlink == 0)
+					jffs2_del_ino_cache(c, ic);
+				break;
+		}
 		spin_unlock(&c->erase_completion_lock);
 	}
 
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 6b5223565405..9f41fc01a371 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -158,7 +158,7 @@ extern struct inode_operations jffs2_dir_inode_operations;
 /* file.c */
 extern const struct file_operations jffs2_file_operations;
 extern struct inode_operations jffs2_file_inode_operations;
-extern struct address_space_operations jffs2_file_address_operations;
+extern const struct address_space_operations jffs2_file_address_operations;
 int jffs2_fsync(struct file *, struct dentry *, int);
 int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
 
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 5fec012b02ed..cc1899268c43 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -968,6 +968,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
 	struct jffs2_full_dirent *fd, *fds;
 	int deleted;
 
+	jffs2_xattr_delete_inode(c, f->inocache);
 	down(&f->sem);
 	deleted = f->inocache && !f->inocache->nlink;
 
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 61618080b86f..2bfdc33752d3 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -317,20 +317,23 @@ static int jffs2_scan_xattr_node(struct jffs2_sb_info *c, struct jffs2_erasebloc
 				 struct jffs2_summary *s)
 {
 	struct jffs2_xattr_datum *xd;
-	uint32_t totlen, crc;
+	uint32_t xid, version, totlen, crc;
 	int err;
 
 	crc = crc32(0, rx, sizeof(struct jffs2_raw_xattr) - 4);
 	if (crc != je32_to_cpu(rx->node_crc)) {
-		if (je32_to_cpu(rx->node_crc) != 0xffffffff)
-			JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
-				      ofs, je32_to_cpu(rx->node_crc), crc);
+		JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
+			      ofs, je32_to_cpu(rx->node_crc), crc);
 		if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(rx->totlen))))
 			return err;
 		return 0;
 	}
 
-	totlen = PAD(sizeof(*rx) + rx->name_len + 1 + je16_to_cpu(rx->value_len));
+	xid = je32_to_cpu(rx->xid);
+	version = je32_to_cpu(rx->version);
+
+	totlen = PAD(sizeof(struct jffs2_raw_xattr)
+			+ rx->name_len + 1 + je16_to_cpu(rx->value_len));
 	if (totlen != je32_to_cpu(rx->totlen)) {
 		JFFS2_WARNING("node length mismatch at %#08x, read=%u, calc=%u\n",
 			      ofs, je32_to_cpu(rx->totlen), totlen);
@@ -339,22 +342,24 @@ static int jffs2_scan_xattr_node(struct jffs2_sb_info *c, struct jffs2_erasebloc
 		return 0;
 	}
 
-	xd = jffs2_setup_xattr_datum(c, je32_to_cpu(rx->xid), je32_to_cpu(rx->version));
-	if (IS_ERR(xd)) {
-		if (PTR_ERR(xd) == -EEXIST) {
-			if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rx->totlen)))))
-				return err;
-			return 0;
-		}
+	xd = jffs2_setup_xattr_datum(c, xid, version);
+	if (IS_ERR(xd))
 		return PTR_ERR(xd);
-	}
-	xd->xprefix = rx->xprefix;
-	xd->name_len = rx->name_len;
-	xd->value_len = je16_to_cpu(rx->value_len);
-	xd->data_crc = je32_to_cpu(rx->data_crc);
 
-	xd->node = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, totlen, NULL);
-	/* FIXME */ xd->node->next_in_ino = (void *)xd;
+	if (xd->version > version) {
+		struct jffs2_raw_node_ref *raw
+			= jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, totlen, NULL);
+		raw->next_in_ino = xd->node->next_in_ino;
+		xd->node->next_in_ino = raw;
+	} else {
+		xd->version = version;
+		xd->xprefix = rx->xprefix;
+		xd->name_len = rx->name_len;
+		xd->value_len = je16_to_cpu(rx->value_len);
+		xd->data_crc = je32_to_cpu(rx->data_crc);
+
+		jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, totlen, (void *)xd);
+	}
 
 	if (jffs2_sum_active())
 		jffs2_sum_add_xattr_mem(s, rx, ofs - jeb->offset);
@@ -373,9 +378,8 @@ static int jffs2_scan_xref_node(struct jffs2_sb_info *c, struct jffs2_eraseblock
 
 	crc = crc32(0, rr, sizeof(*rr) - 4);
 	if (crc != je32_to_cpu(rr->node_crc)) {
-		if (je32_to_cpu(rr->node_crc) != 0xffffffff)
-			JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
-				      ofs, je32_to_cpu(rr->node_crc), crc);
+		JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
+			      ofs, je32_to_cpu(rr->node_crc), crc);
 		if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rr->totlen)))))
 			return err;
 		return 0;
@@ -395,6 +399,7 @@ static int jffs2_scan_xref_node(struct jffs2_sb_info *c, struct jffs2_eraseblock
 		return -ENOMEM;
 
 	/* BEFORE jffs2_build_xattr_subsystem() called, 
+	 * and AFTER xattr_ref is marked as a dead xref,
 	 * ref->xid is used to store 32bit xid, xd is not used
 	 * ref->ino is used to store 32bit inode-number, ic is not used
 	 * Thoes variables are declared as union, thus using those
@@ -404,11 +409,13 @@ static int jffs2_scan_xref_node(struct jffs2_sb_info *c, struct jffs2_eraseblock
 	 */
 	ref->ino = je32_to_cpu(rr->ino);
 	ref->xid = je32_to_cpu(rr->xid);
+	ref->xseqno = je32_to_cpu(rr->xseqno);
+	if (ref->xseqno > c->highest_xseqno)
+		c->highest_xseqno = (ref->xseqno & ~XREF_DELETE_MARKER);
 	ref->next = c->xref_temp;
 	c->xref_temp = ref;
 
-	ref->node = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(rr->totlen)), NULL);
-	/* FIXME */ ref->node->next_in_ino = (void *)ref;
+	jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(rr->totlen)), (void *)ref);
 
 	if (jffs2_sum_active())
 		jffs2_sum_add_xref_mem(s, rr, ofs - jeb->offset);
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 0b02fc79e4d1..c19bd476e8ec 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -5,7 +5,7 @@
  *                     Zoltan Sogor <weth@inf.u-szeged.hu>,
  *                     Patrik Kluba <pajko@halom.u-szeged.hu>,
  *                     University of Szeged, Hungary
- *               2005  KaiGai Kohei <kaigai@ak.jp.nec.com>
+ *               2006  KaiGai Kohei <kaigai@ak.jp.nec.com>
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
@@ -43,7 +43,7 @@ int jffs2_sum_init(struct jffs2_sb_info *c)
 		return -ENOMEM;
 	}
 
-	dbg_summary("returned succesfully\n");
+	dbg_summary("returned successfully\n");
 
 	return 0;
 }
@@ -310,8 +310,6 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs,
 #ifdef CONFIG_JFFS2_FS_XATTR
 		case JFFS2_NODETYPE_XATTR: {
 			struct jffs2_sum_xattr_mem *temp;
-			if (je32_to_cpu(node->x.version) == 0xffffffff)
-				return 0;
 			temp = kmalloc(sizeof(struct jffs2_sum_xattr_mem), GFP_KERNEL);
 			if (!temp)
 				goto no_mem;
@@ -327,10 +325,6 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs,
 		}
 		case JFFS2_NODETYPE_XREF: {
 			struct jffs2_sum_xref_mem *temp;
-
-			if (je32_to_cpu(node->r.ino) == 0xffffffff
-			    && je32_to_cpu(node->r.xid) == 0xffffffff)
-				return 0;
 			temp = kmalloc(sizeof(struct jffs2_sum_xref_mem), GFP_KERNEL);
 			if (!temp)
 				goto no_mem;
@@ -483,22 +477,20 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras
 
 				xd = jffs2_setup_xattr_datum(c, je32_to_cpu(spx->xid),
 								je32_to_cpu(spx->version));
-				if (IS_ERR(xd)) {
-					if (PTR_ERR(xd) == -EEXIST) {
-						/* a newer version of xd exists */
-						if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(spx->totlen))))
-							return err;
-						sp += JFFS2_SUMMARY_XATTR_SIZE;
-						break;
-					}
-					JFFS2_NOTICE("allocation of xattr_datum failed\n");
+				if (IS_ERR(xd))
 					return PTR_ERR(xd);
+				if (xd->version > je32_to_cpu(spx->version)) {
+					/* node is not the newest one */
+					struct jffs2_raw_node_ref *raw
+						= sum_link_node_ref(c, jeb, je32_to_cpu(spx->offset) | REF_UNCHECKED,
+								    PAD(je32_to_cpu(spx->totlen)), NULL);
+					raw->next_in_ino = xd->node->next_in_ino;
+					xd->node->next_in_ino = raw;
+				} else {
+					xd->version = je32_to_cpu(spx->version);
+					sum_link_node_ref(c, jeb, je32_to_cpu(spx->offset) | REF_UNCHECKED,
+							  PAD(je32_to_cpu(spx->totlen)), (void *)xd);
 				}
-
-				xd->node = sum_link_node_ref(c, jeb, je32_to_cpu(spx->offset) | REF_UNCHECKED,
-							     PAD(je32_to_cpu(spx->totlen)), NULL);
-				/* FIXME */ xd->node->next_in_ino = (void *)xd;
-
 				*pseudo_random += je32_to_cpu(spx->xid);
 				sp += JFFS2_SUMMARY_XATTR_SIZE;
 
@@ -519,14 +511,11 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras
 					JFFS2_NOTICE("allocation of xattr_datum failed\n");
 					return -ENOMEM;
 				}
-				ref->ino = 0xfffffffe;
-				ref->xid = 0xfffffffd;
 				ref->next = c->xref_temp;
 				c->xref_temp = ref;
 
-				ref->node = sum_link_node_ref(c, jeb, je32_to_cpu(spr->offset) | REF_UNCHECKED,
-							      PAD(sizeof(struct jffs2_raw_xref)), NULL);
-				/* FIXME */ ref->node->next_in_ino = (void *)ref;
+				sum_link_node_ref(c, jeb, je32_to_cpu(spr->offset) | REF_UNCHECKED,
+						  PAD(sizeof(struct jffs2_raw_xref)), (void *)ref);
 
 				*pseudo_random += ref->node->flash_offset;
 				sp += JFFS2_SUMMARY_XREF_SIZE;
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 2d82e250be34..18e66dbf23b4 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -23,18 +23,15 @@
  * xattr_datum_hashkey(xprefix, xname, xvalue, xsize)
  *   is used to calcurate xdatum hashkey. The reminder of hashkey into XATTRINDEX_HASHSIZE is
  *   the index of the xattr name/value pair cache (c->xattrindex).
+ * is_xattr_datum_unchecked(c, xd)
+ *   returns 1, if xdatum contains any unchecked raw nodes. if all raw nodes are not
+ *   unchecked, it returns 0.
  * unload_xattr_datum(c, xd)
  *   is used to release xattr name/value pair and detach from c->xattrindex.
  * reclaim_xattr_datum(c)
  *   is used to reclaim xattr name/value pairs on the xattr name/value pair cache when
  *   memory usage by cache is over c->xdatum_mem_threshold. Currentry, this threshold 
  *   is hard coded as 32KiB.
- * delete_xattr_datum_node(c, xd)
- *   is used to delete a jffs2 node is dominated by xdatum. When EBS(Erase Block Summary) is
- *   enabled, it overwrites the obsolete node by myself.
- * delete_xattr_datum(c, xd)
- *   is used to delete jffs2_xattr_datum object. It must be called with 0-value of reference
- *   counter. (It means how many jffs2_xattr_ref object refers this xdatum.)
  * do_verify_xattr_datum(c, xd)
  *   is used to load the xdatum informations without name/value pair from the medium.
  *   It's necessary once, because those informations are not collected during mounting
@@ -53,8 +50,10 @@
  *   is used to write xdatum to medium. xd->version will be incremented.
  * create_xattr_datum(c, xprefix, xname, xvalue, xsize)
  *   is used to create new xdatum and write to medium.
+ * delete_xattr_datum(c, xd)
+ *   is used to delete a xdatum. It marks xd JFFS2_XFLAGS_DEAD, and allows
+ *   GC to reclaim those physical nodes.
  * -------------------------------------------------- */
-
 static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize)
 {
 	int name_len = strlen(xname);
@@ -62,6 +61,22 @@ static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *
 	return crc32(xprefix, xname, name_len) ^ crc32(xprefix, xvalue, xsize);
 }
 
+static int is_xattr_datum_unchecked(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
+{
+	struct jffs2_raw_node_ref *raw;
+	int rc = 0;
+
+	spin_lock(&c->erase_completion_lock);
+	for (raw=xd->node; raw != (void *)xd; raw=raw->next_in_ino) {
+		if (ref_flags(raw) == REF_UNCHECKED) {
+			rc = 1;
+			break;
+		}
+	}
+	spin_unlock(&c->erase_completion_lock);
+	return rc;
+}
+
 static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
 {
 	/* must be called under down_write(xattr_sem) */
@@ -107,77 +122,33 @@ static void reclaim_xattr_datum(struct jffs2_sb_info *c)
 		     before, c->xdatum_mem_usage, before - c->xdatum_mem_usage);
 }
 
-static void delete_xattr_datum_node(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
-{
-	/* must be called under down_write(xattr_sem) */
-	struct jffs2_raw_xattr rx;
-	size_t length;
-	int rc;
-
-	if (!xd->node) {
-		JFFS2_WARNING("xdatum (xid=%u) is removed twice.\n", xd->xid);
-		return;
-	}
-	if (jffs2_sum_active()) {
-		memset(&rx, 0xff, sizeof(struct jffs2_raw_xattr));
-		rc = jffs2_flash_read(c, ref_offset(xd->node),
-				      sizeof(struct jffs2_unknown_node),
-				      &length, (char *)&rx);
-		if (rc || length != sizeof(struct jffs2_unknown_node)) {
-			JFFS2_ERROR("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n",
-				    rc, sizeof(struct jffs2_unknown_node),
-				    length, ref_offset(xd->node));
-		}
-		rc = jffs2_flash_write(c, ref_offset(xd->node), sizeof(rx),
-				       &length, (char *)&rx);
-		if (rc || length != sizeof(struct jffs2_raw_xattr)) {
-			JFFS2_ERROR("jffs2_flash_write()=%d, req=%zu, wrote=%zu ar %#08x\n",
-				    rc, sizeof(rx), length, ref_offset(xd->node));
-		}
-	}
-	spin_lock(&c->erase_completion_lock);
-	xd->node->next_in_ino = NULL;
-	spin_unlock(&c->erase_completion_lock);
-	jffs2_mark_node_obsolete(c, xd->node);
-	xd->node = NULL;
-}
-
-static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
-{
-	/* must be called under down_write(xattr_sem) */
-	BUG_ON(xd->refcnt);
-
-	unload_xattr_datum(c, xd);
-	if (xd->node) {
-		delete_xattr_datum_node(c, xd);
-		xd->node = NULL;
-	}
-	jffs2_free_xattr_datum(xd);
-}
-
 static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
 {
 	/* must be called under down_write(xattr_sem) */
 	struct jffs2_eraseblock *jeb;
+	struct jffs2_raw_node_ref *raw;
 	struct jffs2_raw_xattr rx;
 	size_t readlen;
-	uint32_t crc, totlen;
+	uint32_t crc, offset, totlen;
 	int rc;
 
-	BUG_ON(!xd->node);
-	BUG_ON(ref_flags(xd->node) != REF_UNCHECKED);
+	spin_lock(&c->erase_completion_lock);
+	offset = ref_offset(xd->node);
+	if (ref_flags(xd->node) == REF_PRISTINE)
+		goto complete;
+	spin_unlock(&c->erase_completion_lock);
 
-	rc = jffs2_flash_read(c, ref_offset(xd->node), sizeof(rx), &readlen, (char *)&rx);
+	rc = jffs2_flash_read(c, offset, sizeof(rx), &readlen, (char *)&rx);
 	if (rc || readlen != sizeof(rx)) {
 		JFFS2_WARNING("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n",
-			      rc, sizeof(rx), readlen, ref_offset(xd->node));
+			      rc, sizeof(rx), readlen, offset);
 		return rc ? rc : -EIO;
 	}
 	crc = crc32(0, &rx, sizeof(rx) - 4);
 	if (crc != je32_to_cpu(rx.node_crc)) {
-		if (je32_to_cpu(rx.node_crc) != 0xffffffff)
-			JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
-				    ref_offset(xd->node), je32_to_cpu(rx.hdr_crc), crc);
+		JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
+			    offset, je32_to_cpu(rx.hdr_crc), crc);
+		xd->flags |= JFFS2_XFLAGS_INVALID;
 		return EIO;
 	}
 	totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len));
@@ -188,11 +159,12 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
 	    || je32_to_cpu(rx.version) != xd->version) {
 		JFFS2_ERROR("inconsistent xdatum at %#08x, magic=%#04x/%#04x, "
 			    "nodetype=%#04x/%#04x, totlen=%u/%u, xid=%u/%u, version=%u/%u\n",
-			    ref_offset(xd->node), je16_to_cpu(rx.magic), JFFS2_MAGIC_BITMASK,
+			    offset, je16_to_cpu(rx.magic), JFFS2_MAGIC_BITMASK,
 			    je16_to_cpu(rx.nodetype), JFFS2_NODETYPE_XATTR,
 			    je32_to_cpu(rx.totlen), totlen,
 			    je32_to_cpu(rx.xid), xd->xid,
 			    je32_to_cpu(rx.version), xd->version);
+		xd->flags |= JFFS2_XFLAGS_INVALID;
 		return EIO;
 	}
 	xd->xprefix = rx.xprefix;
@@ -200,14 +172,17 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
 	xd->value_len = je16_to_cpu(rx.value_len);
 	xd->data_crc = je32_to_cpu(rx.data_crc);
 
-	/* This JFFS2_NODETYPE_XATTR node is checked */
-	jeb = &c->blocks[ref_offset(xd->node) / c->sector_size];
-	totlen = PAD(je32_to_cpu(rx.totlen));
-
 	spin_lock(&c->erase_completion_lock);
-	c->unchecked_size -= totlen; c->used_size += totlen;
-	jeb->unchecked_size -= totlen; jeb->used_size += totlen;
-	xd->node->flash_offset = ref_offset(xd->node) | REF_PRISTINE;
+ complete:
+	for (raw=xd->node; raw != (void *)xd; raw=raw->next_in_ino) {
+		jeb = &c->blocks[ref_offset(raw) / c->sector_size];
+		totlen = PAD(ref_totlen(c, jeb, raw));
+		if (ref_flags(raw) == REF_UNCHECKED) {
+			c->unchecked_size -= totlen; c->used_size += totlen;
+			jeb->unchecked_size -= totlen; jeb->used_size += totlen;
+		}
+		raw->flash_offset = ref_offset(raw) | ((xd->node==raw) ? REF_PRISTINE : REF_NORMAL);
+	}
 	spin_unlock(&c->erase_completion_lock);
 
 	/* unchecked xdatum is chained with c->xattr_unchecked */
@@ -227,7 +202,6 @@ static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum
 	uint32_t crc, length;
 	int i, ret, retry = 0;
 
-	BUG_ON(!xd->node);
 	BUG_ON(ref_flags(xd->node) != REF_PRISTINE);
 	BUG_ON(!list_empty(&xd->xindex));
  retry:
@@ -253,6 +227,7 @@ static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum
 			      " at %#08x, read: 0x%08x calculated: 0x%08x\n",
 			      ref_offset(xd->node), xd->data_crc, crc);
 		kfree(data);
+		xd->flags |= JFFS2_XFLAGS_INVALID;
 		return EIO;
 	}
 
@@ -286,16 +261,14 @@ static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x
 	 * rc > 0 : Unrecoverable error, this node should be deleted.
 	 */
 	int rc = 0;
-	BUG_ON(xd->xname);
-	if (!xd->node)
+
+	BUG_ON(xd->flags & JFFS2_XFLAGS_DEAD);
+	if (xd->xname)
+		return 0;
+	if (xd->flags & JFFS2_XFLAGS_INVALID)
 		return EIO;
-	if (unlikely(ref_flags(xd->node) != REF_PRISTINE)) {
+	if (unlikely(is_xattr_datum_unchecked(c, xd)))
 		rc = do_verify_xattr_datum(c, xd);
-		if (rc > 0) {
-			list_del_init(&xd->xindex);
-			delete_xattr_datum_node(c, xd);
-		}
-	}
 	if (!rc)
 		rc = do_load_xattr_datum(c, xd);
 	return rc;
@@ -304,7 +277,6 @@ static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x
 static int save_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
 {
 	/* must be called under down_write(xattr_sem) */
-	struct jffs2_raw_node_ref *raw;
 	struct jffs2_raw_xattr rx;
 	struct kvec vecs[2];
 	size_t length;
@@ -312,14 +284,16 @@ static int save_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x
 	uint32_t phys_ofs = write_ofs(c);
 
 	BUG_ON(!xd->xname);
+	BUG_ON(xd->flags & (JFFS2_XFLAGS_DEAD|JFFS2_XFLAGS_INVALID));
 
 	vecs[0].iov_base = &rx;
-	vecs[0].iov_len = PAD(sizeof(rx));
+	vecs[0].iov_len = sizeof(rx);
 	vecs[1].iov_base = xd->xname;
 	vecs[1].iov_len = xd->name_len + 1 + xd->value_len;
 	totlen = vecs[0].iov_len + vecs[1].iov_len;
 
 	/* Setup raw-xattr */
+	memset(&rx, 0, sizeof(rx));
 	rx.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 	rx.nodetype = cpu_to_je16(JFFS2_NODETYPE_XATTR);
 	rx.totlen = cpu_to_je32(PAD(totlen));
@@ -343,14 +317,8 @@ static int save_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x
 
 		return rc;
 	}
-
 	/* success */
-	raw = jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(totlen), NULL);
-	/* FIXME */ raw->next_in_ino = (void *)xd;
-
-	if (xd->node)
-		delete_xattr_datum_node(c, xd);
-	xd->node = raw;
+	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(totlen), (void *)xd);
 
 	dbg_xattr("success on saving xdatum (xid=%u, version=%u, xprefix=%u, xname='%s')\n",
 		  xd->xid, xd->version, xd->xprefix, xd->xname);
@@ -377,7 +345,7 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,
 		    && xd->value_len==xsize
 		    && !strcmp(xd->xname, xname)
 		    && !memcmp(xd->xvalue, xvalue, xsize)) {
-			xd->refcnt++;
+			atomic_inc(&xd->refcnt);
 			return xd;
 		}
 	}
@@ -397,7 +365,7 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,
 	strcpy(data, xname);
 	memcpy(data + name_len + 1, xvalue, xsize);
 
-	xd->refcnt = 1;
+	atomic_set(&xd->refcnt, 1);
 	xd->xid = ++c->highest_xid;
 	xd->flags |= JFFS2_XFLAGS_HOT;
 	xd->xprefix = xprefix;
@@ -426,20 +394,36 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,
 	return xd;
 }
 
+static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
+{
+	/* must be called under down_write(xattr_sem) */
+	BUG_ON(atomic_read(&xd->refcnt));
+
+	unload_xattr_datum(c, xd);
+	xd->flags |= JFFS2_XFLAGS_DEAD;
+	spin_lock(&c->erase_completion_lock);
+	if (xd->node == (void *)xd) {
+		BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID));
+		jffs2_free_xattr_datum(xd);
+	} else {
+		list_add(&xd->xindex, &c->xattr_dead_list);
+	}
+	spin_unlock(&c->erase_completion_lock);
+	dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xd->xid, xd->version);
+}
+
 /* -------- xref related functions ------------------
  * verify_xattr_ref(c, ref)
  *   is used to load xref information from medium. Because summary data does not
  *   contain xid/ino, it's necessary to verify once while mounting process.
- * delete_xattr_ref_node(c, ref)
- *   is used to delete a jffs2 node is dominated by xref. When EBS is enabled,
- *   it overwrites the obsolete node by myself. 
- * delete_xattr_ref(c, ref)
- *   is used to delete jffs2_xattr_ref object. If the reference counter of xdatum
- *   is refered by this xref become 0, delete_xattr_datum() is called later.
  * save_xattr_ref(c, ref)
- *   is used to write xref to medium.
+ *   is used to write xref to medium. If delete marker is marked, it write
+ *   a delete marker of xref into medium.
  * create_xattr_ref(c, ic, xd)
  *   is used to create a new xref and write to medium.
+ * delete_xattr_ref(c, ref)
+ *   is used to delete jffs2_xattr_ref. It marks xref XREF_DELETE_MARKER,
+ *   and allows GC to reclaim those physical nodes.
  * jffs2_xattr_delete_inode(c, ic)
  *   is called to remove xrefs related to obsolete inode when inode is unlinked.
  * jffs2_xattr_free_inode(c, ic)
@@ -450,25 +434,29 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,
 static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
 {
 	struct jffs2_eraseblock *jeb;
+	struct jffs2_raw_node_ref *raw;
 	struct jffs2_raw_xref rr;
 	size_t readlen;
-	uint32_t crc, totlen;
+	uint32_t crc, offset, totlen;
 	int rc;
 
-	BUG_ON(ref_flags(ref->node) != REF_UNCHECKED);
+	spin_lock(&c->erase_completion_lock);
+	if (ref_flags(ref->node) != REF_UNCHECKED)
+		goto complete;
+	offset = ref_offset(ref->node);
+	spin_unlock(&c->erase_completion_lock);
 
-	rc = jffs2_flash_read(c, ref_offset(ref->node), sizeof(rr), &readlen, (char *)&rr);
+	rc = jffs2_flash_read(c, offset, sizeof(rr), &readlen, (char *)&rr);
 	if (rc || sizeof(rr) != readlen) {
 		JFFS2_WARNING("jffs2_flash_read()=%d, req=%zu, read=%zu, at %#08x\n",
-			      rc, sizeof(rr), readlen, ref_offset(ref->node));
+			      rc, sizeof(rr), readlen, offset);
 		return rc ? rc : -EIO;
 	}
 	/* obsolete node */
 	crc = crc32(0, &rr, sizeof(rr) - 4);
 	if (crc != je32_to_cpu(rr.node_crc)) {
-		if (je32_to_cpu(rr.node_crc) != 0xffffffff)
-			JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
-				    ref_offset(ref->node), je32_to_cpu(rr.node_crc), crc);
+		JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
+			    offset, je32_to_cpu(rr.node_crc), crc);
 		return EIO;
 	}
 	if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK
@@ -476,22 +464,28 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref
 	    || je32_to_cpu(rr.totlen) != PAD(sizeof(rr))) {
 		JFFS2_ERROR("inconsistent xref at %#08x, magic=%#04x/%#04x, "
 			    "nodetype=%#04x/%#04x, totlen=%u/%zu\n",
-			    ref_offset(ref->node), je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK,
+			    offset, je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK,
 			    je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF,
 			    je32_to_cpu(rr.totlen), PAD(sizeof(rr)));
 		return EIO;
 	}
 	ref->ino = je32_to_cpu(rr.ino);
 	ref->xid = je32_to_cpu(rr.xid);
-
-	/* fixup superblock/eraseblock info */
-	jeb = &c->blocks[ref_offset(ref->node) / c->sector_size];
-	totlen = PAD(sizeof(rr));
+	ref->xseqno = je32_to_cpu(rr.xseqno);
+	if (ref->xseqno > c->highest_xseqno)
+		c->highest_xseqno = (ref->xseqno & ~XREF_DELETE_MARKER);
 
 	spin_lock(&c->erase_completion_lock);
-	c->unchecked_size -= totlen; c->used_size += totlen;
-	jeb->unchecked_size -= totlen; jeb->used_size += totlen;
-	ref->node->flash_offset = ref_offset(ref->node) | REF_PRISTINE;
+ complete:
+	for (raw=ref->node; raw != (void *)ref; raw=raw->next_in_ino) {
+		jeb = &c->blocks[ref_offset(raw) / c->sector_size];
+		totlen = PAD(ref_totlen(c, jeb, raw));
+		if (ref_flags(raw) == REF_UNCHECKED) {
+			c->unchecked_size -= totlen; c->used_size += totlen;
+			jeb->unchecked_size -= totlen; jeb->used_size += totlen;
+		}
+		raw->flash_offset = ref_offset(raw) | ((ref->node==raw) ? REF_PRISTINE : REF_NORMAL);
+	}
 	spin_unlock(&c->erase_completion_lock);
 
 	dbg_xattr("success on verifying xref (ino=%u, xid=%u) at %#08x\n",
@@ -499,58 +493,12 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref
 	return 0;
 }
 
-static void delete_xattr_ref_node(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
-{
-	struct jffs2_raw_xref rr;
-	size_t length;
-	int rc;
-
-	if (jffs2_sum_active()) {
-		memset(&rr, 0xff, sizeof(rr));
-		rc = jffs2_flash_read(c, ref_offset(ref->node),
-				      sizeof(struct jffs2_unknown_node),
-				      &length, (char *)&rr);
-		if (rc || length != sizeof(struct jffs2_unknown_node)) {
-			JFFS2_ERROR("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n",
-				    rc, sizeof(struct jffs2_unknown_node),
-				    length, ref_offset(ref->node));
-		}
-		rc = jffs2_flash_write(c, ref_offset(ref->node), sizeof(rr),
-				       &length, (char *)&rr);
-		if (rc || length != sizeof(struct jffs2_raw_xref)) {
-			JFFS2_ERROR("jffs2_flash_write()=%d, req=%zu, wrote=%zu at %#08x\n",
-				    rc, sizeof(rr), length, ref_offset(ref->node));
-		}
-	}
-	spin_lock(&c->erase_completion_lock);
-	ref->node->next_in_ino = NULL;
-	spin_unlock(&c->erase_completion_lock);
-	jffs2_mark_node_obsolete(c, ref->node);
-	ref->node = NULL;
-}
-
-static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
-{
-	/* must be called under down_write(xattr_sem) */
-	struct jffs2_xattr_datum *xd;
-
-	BUG_ON(!ref->node);
-	delete_xattr_ref_node(c, ref);
-
-	xd = ref->xd;
-	xd->refcnt--;
-	if (!xd->refcnt)
-		delete_xattr_datum(c, xd);
-	jffs2_free_xattr_ref(ref);
-}
-
 static int save_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
 {
 	/* must be called under down_write(xattr_sem) */
-	struct jffs2_raw_node_ref *raw;
 	struct jffs2_raw_xref rr;
 	size_t length;
-	uint32_t phys_ofs = write_ofs(c);
+	uint32_t xseqno, phys_ofs = write_ofs(c);
 	int ret;
 
 	rr.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -558,8 +506,16 @@ static int save_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
 	rr.totlen = cpu_to_je32(PAD(sizeof(rr)));
 	rr.hdr_crc = cpu_to_je32(crc32(0, &rr, sizeof(struct jffs2_unknown_node) - 4));
 
-	rr.ino = cpu_to_je32(ref->ic->ino);
-	rr.xid = cpu_to_je32(ref->xd->xid);
+	xseqno = (c->highest_xseqno += 2);
+	if (is_xattr_ref_dead(ref)) {
+		xseqno |= XREF_DELETE_MARKER;
+		rr.ino = cpu_to_je32(ref->ino);
+		rr.xid = cpu_to_je32(ref->xid);
+	} else {
+		rr.ino = cpu_to_je32(ref->ic->ino);
+		rr.xid = cpu_to_je32(ref->xd->xid);
+	}
+	rr.xseqno = cpu_to_je32(xseqno);
 	rr.node_crc = cpu_to_je32(crc32(0, &rr, sizeof(rr) - 4));
 
 	ret = jffs2_flash_write(c, phys_ofs, sizeof(rr), &length, (char *)&rr);
@@ -572,12 +528,9 @@ static int save_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
 
 		return ret;
 	}
-
-	raw = jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(sizeof(rr)), NULL);
-	/* FIXME */ raw->next_in_ino = (void *)ref;
-	if (ref->node)
-		delete_xattr_ref_node(c, ref);
-	ref->node = raw;
+	/* success */
+	ref->xseqno = xseqno;
+	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(sizeof(rr)), (void *)ref);
 
 	dbg_xattr("success on saving xref (ino=%u, xid=%u)\n", ref->ic->ino, ref->xd->xid);
 
@@ -610,6 +563,27 @@ static struct jffs2_xattr_ref *create_xattr_ref(struct jffs2_sb_info *c, struct
 	return ref; /* success */
 }
 
+static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
+{
+	/* must be called under down_write(xattr_sem) */
+	struct jffs2_xattr_datum *xd;
+
+	xd = ref->xd;
+	ref->xseqno |= XREF_DELETE_MARKER;
+	ref->ino = ref->ic->ino;
+	ref->xid = ref->xd->xid;
+	spin_lock(&c->erase_completion_lock);
+	ref->next = c->xref_dead_list;
+	c->xref_dead_list = ref;
+	spin_unlock(&c->erase_completion_lock);
+
+	dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) was removed.\n",
+		  ref->ino, ref->xid, ref->xseqno);
+
+	if (atomic_dec_and_test(&xd->refcnt))
+		delete_xattr_datum(c, xd);
+}
+
 void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
 {
 	/* It's called from jffs2_clear_inode() on inode removing.
@@ -638,8 +612,7 @@ void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i
 	for (ref = ic->xref; ref; ref = _ref) {
 		_ref = ref->next;
 		xd = ref->xd;
-		xd->refcnt--;
-		if (!xd->refcnt) {
+		if (atomic_dec_and_test(&xd->refcnt)) {
 			unload_xattr_datum(c, xd);
 			jffs2_free_xattr_datum(xd);
 		}
@@ -655,7 +628,7 @@ static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cac
 	 * duplicate name/value pairs. If duplicate name/value pair would be found,
 	 * one will be removed.
 	 */
-	struct jffs2_xattr_ref *ref, *cmp, **pref;
+	struct jffs2_xattr_ref *ref, *cmp, **pref, **pcmp;
 	int rc = 0;
 
 	if (likely(ic->flags & INO_FLAGS_XATTR_CHECKED))
@@ -673,13 +646,13 @@ static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cac
 			} else if (unlikely(rc < 0))
 				goto out;
 		}
-		for (cmp=ref->next, pref=&ref->next; cmp; pref=&cmp->next, cmp=cmp->next) {
+		for (cmp=ref->next, pcmp=&ref->next; cmp; pcmp=&cmp->next, cmp=cmp->next) {
 			if (!cmp->xd->xname) {
 				ref->xd->flags |= JFFS2_XFLAGS_BIND;
 				rc = load_xattr_datum(c, cmp->xd);
 				ref->xd->flags &= ~JFFS2_XFLAGS_BIND;
 				if (unlikely(rc > 0)) {
-					*pref = cmp->next;
+					*pcmp = cmp->next;
 					delete_xattr_ref(c, cmp);
 					goto retry;
 				} else if (unlikely(rc < 0))
@@ -687,8 +660,13 @@ static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cac
 			}
 			if (ref->xd->xprefix == cmp->xd->xprefix
 			    && !strcmp(ref->xd->xname, cmp->xd->xname)) {
-				*pref = cmp->next;
-				delete_xattr_ref(c, cmp);
+				if (ref->xseqno > cmp->xseqno) {
+					*pcmp = cmp->next;
+					delete_xattr_ref(c, cmp);
+				} else {
+					*pref = ref->next;
+					delete_xattr_ref(c, ref);
+				}
 				goto retry;
 			}
 		}
@@ -719,9 +697,13 @@ void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c)
 	for (i=0; i < XATTRINDEX_HASHSIZE; i++)
 		INIT_LIST_HEAD(&c->xattrindex[i]);
 	INIT_LIST_HEAD(&c->xattr_unchecked);
+	INIT_LIST_HEAD(&c->xattr_dead_list);
+	c->xref_dead_list = NULL;
 	c->xref_temp = NULL;
 
 	init_rwsem(&c->xattr_sem);
+	c->highest_xid = 0;
+	c->highest_xseqno = 0;
 	c->xdatum_mem_usage = 0;
 	c->xdatum_mem_threshold = 32 * 1024;	/* Default 32KB */
 }
@@ -751,7 +733,11 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
 		_ref = ref->next;
 		jffs2_free_xattr_ref(ref);
 	}
-	c->xref_temp = NULL;
+
+	for (ref=c->xref_dead_list; ref; ref = _ref) {
+		_ref = ref->next;
+		jffs2_free_xattr_ref(ref);
+	}
 
 	for (i=0; i < XATTRINDEX_HASHSIZE; i++) {
 		list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) {
@@ -761,100 +747,143 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
 			jffs2_free_xattr_datum(xd);
 		}
 	}
+
+	list_for_each_entry_safe(xd, _xd, &c->xattr_dead_list, xindex) {
+		list_del(&xd->xindex);
+		jffs2_free_xattr_datum(xd);
+	}
 }
 
+#define XREF_TMPHASH_SIZE	(128)
 void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
 {
 	struct jffs2_xattr_ref *ref, *_ref;
+	struct jffs2_xattr_ref *xref_tmphash[XREF_TMPHASH_SIZE];
 	struct jffs2_xattr_datum *xd, *_xd;
 	struct jffs2_inode_cache *ic;
-	int i, xdatum_count =0, xdatum_unchecked_count = 0, xref_count = 0;
+	struct jffs2_raw_node_ref *raw;
+	int i, xdatum_count = 0, xdatum_unchecked_count = 0, xref_count = 0;
+	int xdatum_orphan_count = 0, xref_orphan_count = 0, xref_dead_count = 0;
 
 	BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING));
 
-	/* Phase.1 */
+	/* Phase.1 : Merge same xref */
+	for (i=0; i < XREF_TMPHASH_SIZE; i++)
+		xref_tmphash[i] = NULL;
 	for (ref=c->xref_temp; ref; ref=_ref) {
+		struct jffs2_xattr_ref *tmp;
+
 		_ref = ref->next;
-		/* checking REF_UNCHECKED nodes */
 		if (ref_flags(ref->node) != REF_PRISTINE) {
 			if (verify_xattr_ref(c, ref)) {
-				delete_xattr_ref_node(c, ref);
+				BUG_ON(ref->node->next_in_ino != (void *)ref);
+				ref->node->next_in_ino = NULL;
+				jffs2_mark_node_obsolete(c, ref->node);
 				jffs2_free_xattr_ref(ref);
 				continue;
 			}
 		}
-		/* At this point, ref->xid and ref->ino contain XID and inode number.
-		   ref->xd and ref->ic are not valid yet. */
-		xd = jffs2_find_xattr_datum(c, ref->xid);
-		ic = jffs2_get_ino_cache(c, ref->ino);
-		if (!xd || !ic) {
-			if (ref_flags(ref->node) != REF_UNCHECKED)
-				JFFS2_WARNING("xref(ino=%u, xid=%u) is orphan. \n",
-					      ref->ino, ref->xid);
-			delete_xattr_ref_node(c, ref);
+
+		i = (ref->ino ^ ref->xid) % XREF_TMPHASH_SIZE;
+		for (tmp=xref_tmphash[i]; tmp; tmp=tmp->next) {
+			if (tmp->ino == ref->ino && tmp->xid == ref->xid)
+				break;
+		}
+		if (tmp) {
+			raw = ref->node;
+			if (ref->xseqno > tmp->xseqno) {
+				tmp->xseqno = ref->xseqno;
+				raw->next_in_ino = tmp->node;
+				tmp->node = raw;
+			} else {
+				raw->next_in_ino = tmp->node->next_in_ino;
+				tmp->node->next_in_ino = raw;
+			}
 			jffs2_free_xattr_ref(ref);
 			continue;
+		} else {
+			ref->next = xref_tmphash[i];
+			xref_tmphash[i] = ref;
 		}
-		ref->xd = xd;
-		ref->ic = ic;
-		xd->refcnt++;
-		ref->next = ic->xref;
-		ic->xref = ref;
-		xref_count++;
 	}
 	c->xref_temp = NULL;
-	/* After this, ref->xid/ino are NEVER used. */
 
-	/* Phase.2 */
+	/* Phase.2 : Bind xref with inode_cache and xattr_datum */
+	for (i=0; i < XREF_TMPHASH_SIZE; i++) {
+		for (ref=xref_tmphash[i]; ref; ref=_ref) {
+			xref_count++;
+			_ref = ref->next;
+			if (is_xattr_ref_dead(ref)) {
+				ref->next = c->xref_dead_list;
+				c->xref_dead_list = ref;
+				xref_dead_count++;
+				continue;
+			}
+			/* At this point, ref->xid and ref->ino contain XID and inode number.
+			   ref->xd and ref->ic are not valid yet. */
+			xd = jffs2_find_xattr_datum(c, ref->xid);
+			ic = jffs2_get_ino_cache(c, ref->ino);
+			if (!xd || !ic) {
+				dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n",
+					  ref->ino, ref->xid, ref->xseqno);
+				ref->xseqno |= XREF_DELETE_MARKER;
+				ref->next = c->xref_dead_list;
+				c->xref_dead_list = ref;
+				xref_orphan_count++;
+				continue;
+			}
+			ref->xd = xd;
+			ref->ic = ic;
+			atomic_inc(&xd->refcnt);
+			ref->next = ic->xref;
+			ic->xref = ref;
+		}
+	}
+
+	/* Phase.3 : Link unchecked xdatum to xattr_unchecked list */
 	for (i=0; i < XATTRINDEX_HASHSIZE; i++) {
 		list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) {
+			xdatum_count++;
 			list_del_init(&xd->xindex);
-			if (!xd->refcnt) {
-				if (ref_flags(xd->node) != REF_UNCHECKED)
-					JFFS2_WARNING("orphan xdatum(xid=%u, version=%u) at %#08x\n",
-						      xd->xid, xd->version, ref_offset(xd->node));
-				delete_xattr_datum(c, xd);
+			if (!atomic_read(&xd->refcnt)) {
+				dbg_xattr("xdatum(xid=%u, version=%u) is orphan.\n",
+					  xd->xid, xd->version);
+				xd->flags |= JFFS2_XFLAGS_DEAD;
+				list_add(&xd->xindex, &c->xattr_unchecked);
+				xdatum_orphan_count++;
 				continue;
 			}
-			if (ref_flags(xd->node) != REF_PRISTINE) {
-				dbg_xattr("unchecked xdatum(xid=%u) at %#08x\n",
-					  xd->xid, ref_offset(xd->node));
+			if (is_xattr_datum_unchecked(c, xd)) {
+				dbg_xattr("unchecked xdatum(xid=%u, version=%u)\n",
+					  xd->xid, xd->version);
 				list_add(&xd->xindex, &c->xattr_unchecked);
 				xdatum_unchecked_count++;
 			}
-			xdatum_count++;
 		}
 	}
 	/* build complete */
-	JFFS2_NOTICE("complete building xattr subsystem, %u of xdatum (%u unchecked) and "
-		     "%u of xref found.\n", xdatum_count, xdatum_unchecked_count, xref_count);
+	JFFS2_NOTICE("complete building xattr subsystem, %u of xdatum"
+		     " (%u unchecked, %u orphan) and "
+		     "%u of xref (%u dead, %u orphan) found.\n",
+		     xdatum_count, xdatum_unchecked_count, xdatum_orphan_count,
+		     xref_count, xref_dead_count, xref_orphan_count);
 }
 
 struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
 						  uint32_t xid, uint32_t version)
 {
-	struct jffs2_xattr_datum *xd, *_xd;
+	struct jffs2_xattr_datum *xd;
 
-	_xd = jffs2_find_xattr_datum(c, xid);
-	if (_xd) {
-		dbg_xattr("duplicate xdatum (xid=%u, version=%u/%u) at %#08x\n",
-			  xid, version, _xd->version, ref_offset(_xd->node));
-		if (version < _xd->version)
-			return ERR_PTR(-EEXIST);
-	}
-	xd = jffs2_alloc_xattr_datum();
-	if (!xd)
-		return ERR_PTR(-ENOMEM);
-	xd->xid = xid;
-	xd->version = version;
-	if (xd->xid > c->highest_xid)
-		c->highest_xid = xd->xid;
-	list_add_tail(&xd->xindex, &c->xattrindex[xid % XATTRINDEX_HASHSIZE]);
-
-	if (_xd) {
-		list_del_init(&_xd->xindex);
-		delete_xattr_datum_node(c, _xd);
-		jffs2_free_xattr_datum(_xd);
+	xd = jffs2_find_xattr_datum(c, xid);
+	if (!xd) {
+		xd = jffs2_alloc_xattr_datum();
+		if (!xd)
+			return ERR_PTR(-ENOMEM);
+		xd->xid = xid;
+		xd->version = version;
+		if (xd->xid > c->highest_xid)
+			c->highest_xid = xd->xid;
+		list_add_tail(&xd->xindex, &c->xattrindex[xid % XATTRINDEX_HASHSIZE]);
 	}
 	return xd;
 }
@@ -1080,9 +1109,23 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
 				goto out;
 			}
 			if (!buffer) {
-				*pref = ref->next;
-				delete_xattr_ref(c, ref);
-				rc = 0;
+				ref->ino = ic->ino;
+				ref->xid = xd->xid;
+				ref->xseqno |= XREF_DELETE_MARKER;
+				rc = save_xattr_ref(c, ref);
+				if (!rc) {
+					*pref = ref->next;
+					spin_lock(&c->erase_completion_lock);
+					ref->next = c->xref_dead_list;
+					c->xref_dead_list = ref;
+					spin_unlock(&c->erase_completion_lock);
+					if (atomic_dec_and_test(&xd->refcnt))
+						delete_xattr_datum(c, xd);
+				} else {
+					ref->ic = ic;
+					ref->xd = xd;
+					ref->xseqno &= ~XREF_DELETE_MARKER;
+				}
 				goto out;
 			}
 			goto found;
@@ -1094,7 +1137,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
 		goto out;
 	}
 	if (!buffer) {
-		rc = -EINVAL;
+		rc = -ENODATA;
 		goto out;
 	}
  found:
@@ -1110,16 +1153,14 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
 	request = PAD(sizeof(struct jffs2_raw_xref));
 	rc = jffs2_reserve_space(c, request, &length,
 				 ALLOC_NORMAL, JFFS2_SUMMARY_XREF_SIZE);
+	down_write(&c->xattr_sem);
 	if (rc) {
 		JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request);
-		down_write(&c->xattr_sem);
-		xd->refcnt--;
-		if (!xd->refcnt)
+		if (atomic_dec_and_test(&xd->refcnt))
 			delete_xattr_datum(c, xd);
 		up_write(&c->xattr_sem);
 		return rc;
 	}
-	down_write(&c->xattr_sem);
 	if (ref)
 		*pref = ref->next;
 	newref = create_xattr_ref(c, ic, xd);
@@ -1129,8 +1170,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
 			ic->xref = ref;
 		}
 		rc = PTR_ERR(newref);
-		xd->refcnt--;
-		if (!xd->refcnt)
+		if (atomic_dec_and_test(&xd->refcnt))
 			delete_xattr_datum(c, xd);
 	} else if (ref) {
 		delete_xattr_ref(c, ref);
@@ -1142,38 +1182,40 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
 }
 
 /* -------- garbage collector functions -------------
- * jffs2_garbage_collect_xattr_datum(c, xd)
+ * jffs2_garbage_collect_xattr_datum(c, xd, raw)
  *   is used to move xdatum into new node.
- * jffs2_garbage_collect_xattr_ref(c, ref)
+ * jffs2_garbage_collect_xattr_ref(c, ref, raw)
  *   is used to move xref into new node.
  * jffs2_verify_xattr(c)
  *   is used to call do_verify_xattr_datum() before garbage collecting.
+ * jffs2_release_xattr_datum(c, xd)
+ *   is used to release an in-memory object of xdatum.
+ * jffs2_release_xattr_ref(c, ref)
+ *   is used to release an in-memory object of xref.
  * -------------------------------------------------- */
-int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
+int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd,
+				      struct jffs2_raw_node_ref *raw)
 {
 	uint32_t totlen, length, old_ofs;
-	int rc = -EINVAL;
+	int rc = 0;
 
 	down_write(&c->xattr_sem);
-	BUG_ON(!xd->node);
-
-	old_ofs = ref_offset(xd->node);
-	totlen = ref_totlen(c, c->gcblock, xd->node);
-	if (totlen < sizeof(struct jffs2_raw_xattr))
+	if (xd->node != raw)
+		goto out;
+	if (xd->flags & (JFFS2_XFLAGS_DEAD|JFFS2_XFLAGS_INVALID))
 		goto out;
 
-	if (!xd->xname) {
-		rc = load_xattr_datum(c, xd);
-		if (unlikely(rc > 0)) {
-			delete_xattr_datum_node(c, xd);
-			rc = 0;
-			goto out;
-		} else if (unlikely(rc < 0))
-			goto out;
+	rc = load_xattr_datum(c, xd);
+	if (unlikely(rc)) {
+		rc = (rc > 0) ? 0 : rc;
+		goto out;
 	}
+	old_ofs = ref_offset(xd->node);
+	totlen = PAD(sizeof(struct jffs2_raw_xattr)
+			+ xd->name_len + 1 + xd->value_len);
 	rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE);
-	if (rc || length < totlen) {
-		JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, totlen);
+	if (rc) {
+		JFFS2_WARNING("jffs2_reserve_space_gc()=%d, request=%u\n", rc, totlen);
 		rc = rc ? rc : -EBADFD;
 		goto out;
 	}
@@ -1182,27 +1224,32 @@ int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xatt
 		dbg_xattr("xdatum (xid=%u, version=%u) GC'ed from %#08x to %08x\n",
 			  xd->xid, xd->version, old_ofs, ref_offset(xd->node));
  out:
+	if (!rc)
+		jffs2_mark_node_obsolete(c, raw);
 	up_write(&c->xattr_sem);
 	return rc;
 }
 
-
-int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
+int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref,
+				    struct jffs2_raw_node_ref *raw)
 {
 	uint32_t totlen, length, old_ofs;
-	int rc = -EINVAL;
+	int rc = 0;
 
 	down_write(&c->xattr_sem);
 	BUG_ON(!ref->node);
 
+	if (ref->node != raw)
+		goto out;
+	if (is_xattr_ref_dead(ref) && (raw->next_in_ino == (void *)ref))
+		goto out;
+
 	old_ofs = ref_offset(ref->node);
 	totlen = ref_totlen(c, c->gcblock, ref->node);
-	if (totlen != sizeof(struct jffs2_raw_xref))
-		goto out;
 
 	rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE);
-	if (rc || length < totlen) {
-		JFFS2_WARNING("%s: jffs2_reserve_space() = %d, request = %u\n",
+	if (rc) {
+		JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n",
 			      __FUNCTION__, rc, totlen);
 		rc = rc ? rc : -EBADFD;
 		goto out;
@@ -1212,6 +1259,8 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
 		dbg_xattr("xref (ino=%u, xid=%u) GC'ed from %#08x to %08x\n",
 			  ref->ic->ino, ref->xd->xid, old_ofs, ref_offset(ref->node));
  out:
+	if (!rc)
+		jffs2_mark_node_obsolete(c, raw);
 	up_write(&c->xattr_sem);
 	return rc;
 }
@@ -1219,20 +1268,59 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
 int jffs2_verify_xattr(struct jffs2_sb_info *c)
 {
 	struct jffs2_xattr_datum *xd, *_xd;
+	struct jffs2_eraseblock *jeb;
+	struct jffs2_raw_node_ref *raw;
+	uint32_t totlen;
 	int rc;
 
 	down_write(&c->xattr_sem);
 	list_for_each_entry_safe(xd, _xd, &c->xattr_unchecked, xindex) {
 		rc = do_verify_xattr_datum(c, xd);
-		if (rc == 0) {
-			list_del_init(&xd->xindex);
-			break;
-		} else if (rc > 0) {
-			list_del_init(&xd->xindex);
-			delete_xattr_datum_node(c, xd);
+		if (rc < 0)
+			continue;
+		list_del_init(&xd->xindex);
+		spin_lock(&c->erase_completion_lock);
+		for (raw=xd->node; raw != (void *)xd; raw=raw->next_in_ino) {
+			if (ref_flags(raw) != REF_UNCHECKED)
+				continue;
+			jeb = &c->blocks[ref_offset(raw) / c->sector_size];
+			totlen = PAD(ref_totlen(c, jeb, raw));
+			c->unchecked_size -= totlen; c->used_size += totlen;
+			jeb->unchecked_size -= totlen; jeb->used_size += totlen;
+			raw->flash_offset = ref_offset(raw)
+				| ((xd->node == (void *)raw) ? REF_PRISTINE : REF_NORMAL);
 		}
+		if (xd->flags & JFFS2_XFLAGS_DEAD)
+			list_add(&xd->xindex, &c->xattr_dead_list);
+		spin_unlock(&c->erase_completion_lock);
 	}
 	up_write(&c->xattr_sem);
-
 	return list_empty(&c->xattr_unchecked) ? 1 : 0;
 }
+
+void jffs2_release_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
+{
+	/* must be called under spin_lock(&c->erase_completion_lock) */
+	if (atomic_read(&xd->refcnt) || xd->node != (void *)xd)
+		return;
+
+	list_del(&xd->xindex);
+	jffs2_free_xattr_datum(xd);
+}
+
+void jffs2_release_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref)
+{
+	/* must be called under spin_lock(&c->erase_completion_lock) */
+	struct jffs2_xattr_ref *tmp, **ptmp;
+
+	if (ref->node != (void *)ref)
+		return;
+
+	for (tmp=c->xref_dead_list, ptmp=&c->xref_dead_list; tmp; ptmp=&tmp->next, tmp=tmp->next) {
+		if (ref == tmp) {
+			*ptmp = tmp->next;
+			break;
+		}
+	}
+	jffs2_free_xattr_ref(ref);
+}
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
index 2c199856c582..06a5c69dcf8b 100644
--- a/fs/jffs2/xattr.h
+++ b/fs/jffs2/xattr.h
@@ -16,6 +16,8 @@
 
 #define JFFS2_XFLAGS_HOT	(0x01)	/* This datum is HOT */
 #define JFFS2_XFLAGS_BIND	(0x02)	/* This datum is not reclaimed */
+#define JFFS2_XFLAGS_DEAD	(0x40)	/* This datum is already dead */
+#define JFFS2_XFLAGS_INVALID	(0x80)	/* This datum contains crc error */
 
 struct jffs2_xattr_datum
 {
@@ -23,10 +25,10 @@ struct jffs2_xattr_datum
 	struct jffs2_raw_node_ref *node;
 	uint8_t class;
 	uint8_t flags;
-	uint16_t xprefix;			/* see JFFS2_XATTR_PREFIX_* */
+	uint16_t xprefix;		/* see JFFS2_XATTR_PREFIX_* */
 
 	struct list_head xindex;	/* chained from c->xattrindex[n] */
-	uint32_t refcnt;		/* # of xattr_ref refers this */
+	atomic_t refcnt;		/* # of xattr_ref refers this */
 	uint32_t xid;
 	uint32_t version;
 
@@ -47,6 +49,7 @@ struct jffs2_xattr_ref
 	uint8_t flags;		/* Currently unused */
 	u16 unused;
 
+	uint32_t xseqno;
 	union {
 		struct jffs2_inode_cache *ic;	/* reference to jffs2_inode_cache */
 		uint32_t ino;			/* only used in scanning/building  */
@@ -58,6 +61,12 @@ struct jffs2_xattr_ref
 	struct jffs2_xattr_ref *next;		/* chained from ic->xref_list */
 };
 
+#define XREF_DELETE_MARKER	(0x00000001)
+static inline int is_xattr_ref_dead(struct jffs2_xattr_ref *ref)
+{
+	return ((ref->xseqno & XREF_DELETE_MARKER) != 0);
+}
+
 #ifdef CONFIG_JFFS2_FS_XATTR
 
 extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c);
@@ -70,9 +79,13 @@ extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c
 extern void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic);
 extern void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic);
 
-extern int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd);
-extern int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref);
+extern int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd,
+					     struct jffs2_raw_node_ref *raw);
+extern int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref,
+					   struct jffs2_raw_node_ref *raw);
 extern int jffs2_verify_xattr(struct jffs2_sb_info *c);
+extern void jffs2_release_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd);
+extern void jffs2_release_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref);
 
 extern int do_jffs2_getxattr(struct inode *inode, int xprefix, const char *xname,
 			     char *buffer, size_t size);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 04eb78f1252e..43e3f566aad6 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -305,7 +305,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
 				offset, nr_segs, jfs_get_block, NULL);
 }
 
-struct address_space_operations jfs_aops = {
+const struct address_space_operations jfs_aops = {
 	.readpage	= jfs_readpage,
 	.readpages	= jfs_readpages,
 	.writepage	= jfs_writepage,
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 5549378358bf..4d52593a5fc6 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -126,7 +126,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 
 	/* allocate the disk blocks for the extent.  initially, extBalloc()
 	 * will try to allocate disk blocks for the requested size (xlen). 
-	 * if this fails (xlen contigious free blocks not avaliable), it'll
+	 * if this fails (xlen contiguous free blocks not avaliable), it'll
 	 * try to allocate a smaller number of blocks (producing a smaller
 	 * extent), with this smaller number of blocks consisting of the
 	 * requested number of blocks rounded down to the next smaller
@@ -493,7 +493,7 @@ int extFill(struct inode *ip, xad_t * xp)
  *
  *		initially, we will try to allocate disk blocks for the
  *		requested size (nblocks).  if this fails (nblocks 
- *		contigious free blocks not avaliable), we'll try to allocate
+ *		contiguous free blocks not avaliable), we'll try to allocate
  *		a smaller number of blocks (producing a smaller extent), with
  *		this smaller number of blocks consisting of the requested
  *		number of blocks rounded down to the next smaller power of 2
@@ -529,7 +529,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
 
 	/* get the number of blocks to initially attempt to allocate.
 	 * we'll first try the number of blocks requested unless this
-	 * number is greater than the maximum number of contigious free
+	 * number is greater than the maximum number of contiguous free
 	 * blocks in the map. in that case, we'll start off with the 
 	 * maximum free.
 	 */
@@ -586,7 +586,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
  *		in place.  if this fails, we'll try to move the extent
  *		to a new set of blocks. if moving the extent, we initially
  *		will try to allocate disk blocks for the requested size
- *		(nnew).  if this fails 	(nnew contigious free blocks not
+ *		(nnew).  if this fails 	(new contiguous free blocks not
  *		avaliable), we'll try  to allocate a smaller number of
  *		blocks (producing a smaller extent), with this smaller
  *		number of blocks consisting of the requested number of
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index c30072674464..b5c7da6190dc 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -33,7 +33,7 @@ extern void jfs_free_zero_link(struct inode *);
 extern struct dentry *jfs_get_parent(struct dentry *dentry);
 extern void jfs_set_inode_flags(struct inode *);
 
-extern struct address_space_operations jfs_aops;
+extern const struct address_space_operations jfs_aops;
 extern struct inode_operations jfs_dir_inode_operations;
 extern const struct file_operations jfs_dir_operations;
 extern struct inode_operations jfs_file_inode_operations;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 7f6e88039700..e1e0a6e6ebdf 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -577,7 +577,7 @@ static void metapage_invalidatepage(struct page *page, unsigned long offset)
 	metapage_releasepage(page, 0);
 }
 
-struct address_space_operations jfs_metapage_aops = {
+const struct address_space_operations jfs_metapage_aops = {
 	.readpage	= metapage_readpage,
 	.writepage	= metapage_writepage,
 	.sync_page	= block_sync_page,
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index f0b7d3282b07..d17a3290f5aa 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -139,7 +139,7 @@ static inline void metapage_homeok(struct metapage *mp)
 	put_metapage(mp);
 }
 
-extern struct address_space_operations jfs_metapage_aops;
+extern const struct address_space_operations jfs_metapage_aops;
 
 /*
  * This routines invalidate all pages for an extent.
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index a6fb509b7341..9ea91c5eeb7b 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -335,7 +335,7 @@ static sector_t minix_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,minix_get_block);
 }
-static struct address_space_operations minix_aops = {
+static const struct address_space_operations minix_aops = {
 	.readpage = minix_readpage,
 	.writepage = minix_writepage,
 	.sync_page = block_sync_page,
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 90d2ea28f333..6c51c1198464 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -105,7 +105,7 @@ static struct super_operations ncp_sops =
 
 extern struct dentry_operations ncp_root_dentry_operations;
 #if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
-extern struct address_space_operations ncp_symlink_aops;
+extern const struct address_space_operations ncp_symlink_aops;
 extern int ncp_symlink(struct inode*, struct dentry*, const char*);
 #endif
 
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index e935f1b34bc2..f76b1392a012 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -99,7 +99,7 @@ fail:
 /*
  * symlinks can't do much...
  */
-struct address_space_operations ncp_symlink_aops = {
+const struct address_space_operations ncp_symlink_aops = {
 	.readpage	= ncp_symlink_readpage,
 };
 	
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 402005c35ab3..8ca9707be6c9 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -909,7 +909,7 @@ int __init nfs_init_directcache(void)
  * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures
  *
  */
-void __exit nfs_destroy_directcache(void)
+void nfs_destroy_directcache(void)
 {
 	if (kmem_cache_destroy(nfs_direct_cachep))
 		printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index add289138836..cc2b874ad5a4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -315,7 +315,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
 	return !nfs_wb_page(page->mapping->host, page);
 }
 
-struct address_space_operations nfs_file_aops = {
+const struct address_space_operations nfs_file_aops = {
 	.readpage = nfs_readpage,
 	.readpages = nfs_readpages,
 	.set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 51bc88b662fe..c5b916605fb0 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1132,7 +1132,7 @@ static int __init nfs_init_inodecache(void)
 	return 0;
 }
 
-static void __exit nfs_destroy_inodecache(void)
+static void nfs_destroy_inodecache(void)
 {
 	if (kmem_cache_destroy(nfs_inode_cachep))
 		printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n");
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index bd2815e2dec1..4fe51c1292bb 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -31,15 +31,15 @@ extern struct svc_version nfs4_callback_version1;
 
 /* pagelist.c */
 extern int __init nfs_init_nfspagecache(void);
-extern void __exit nfs_destroy_nfspagecache(void);
+extern void nfs_destroy_nfspagecache(void);
 extern int __init nfs_init_readpagecache(void);
-extern void __exit nfs_destroy_readpagecache(void);
+extern void nfs_destroy_readpagecache(void);
 extern int __init nfs_init_writepagecache(void);
-extern void __exit nfs_destroy_writepagecache(void);
+extern void nfs_destroy_writepagecache(void);
 
 #ifdef CONFIG_NFS_DIRECTIO
 extern int __init nfs_init_directcache(void);
-extern void __exit nfs_destroy_directcache(void);
+extern void nfs_destroy_directcache(void);
 #else
 #define nfs_init_directcache() (0)
 #define nfs_destroy_directcache() do {} while(0)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ef9429643ebc..d89f6fb3b3a3 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -390,7 +390,7 @@ int __init nfs_init_nfspagecache(void)
 	return 0;
 }
 
-void __exit nfs_destroy_nfspagecache(void)
+void nfs_destroy_nfspagecache(void)
 {
 	if (kmem_cache_destroy(nfs_page_cachep))
 		printk(KERN_INFO "nfs_page: not all structures were freed\n");
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 41c2ffee24f5..32cf3773af0c 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -711,7 +711,7 @@ int __init nfs_init_readpagecache(void)
 	return 0;
 }
 
-void __exit nfs_destroy_readpagecache(void)
+void nfs_destroy_readpagecache(void)
 {
 	mempool_destroy(nfs_rdata_mempool);
 	if (kmem_cache_destroy(nfs_rdata_cachep))
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b383fdd3a15c..8fccb9cb173b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1551,7 +1551,7 @@ int __init nfs_init_writepagecache(void)
 	return 0;
 }
 
-void __exit nfs_destroy_writepagecache(void)
+void nfs_destroy_writepagecache(void)
 {
 	mempool_destroy(nfs_commit_mempool);
 	mempool_destroy(nfs_wdata_mempool);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1630b5670dc2..7c7d01672d35 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -123,7 +123,7 @@ static void release_stateid(struct nfs4_stateid *stp, int flags);
  */
 
 /* recall_lock protects the del_recall_lru */
-static spinlock_t recall_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(recall_lock);
 static struct list_head del_recall_lru;
 
 static void
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 580412d330cb..bc579bfdfbd8 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1544,7 +1544,7 @@ err_out:
 /**
  * ntfs_aops - general address space operations for inodes and attributes
  */
-struct address_space_operations ntfs_aops = {
+const struct address_space_operations ntfs_aops = {
 	.readpage	= ntfs_readpage,	/* Fill page with data. */
 	.sync_page	= block_sync_page,	/* Currently, just unplugs the
 						   disk request queue. */
@@ -1560,7 +1560,7 @@ struct address_space_operations ntfs_aops = {
  * ntfs_mst_aops - general address space operations for mst protecteed inodes
  *		   and attributes
  */
-struct address_space_operations ntfs_mst_aops = {
+const struct address_space_operations ntfs_mst_aops = {
 	.readpage	= ntfs_readpage,	/* Fill page with data. */
 	.sync_page	= block_sync_page,	/* Currently, just unplugs the
 						   disk request queue. */
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index bf7b3d7c0930..ddd3d503097c 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -57,8 +57,8 @@ extern struct kmem_cache *ntfs_attr_ctx_cache;
 extern struct kmem_cache *ntfs_index_ctx_cache;
 
 /* The various operations structs defined throughout the driver files. */
-extern struct address_space_operations ntfs_aops;
-extern struct address_space_operations ntfs_mst_aops;
+extern const struct address_space_operations ntfs_aops;
+extern const struct address_space_operations ntfs_mst_aops;
 
 extern const struct  file_operations ntfs_file_ops;
 extern struct inode_operations ntfs_file_inode_ops;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 47152bf9a7f2..cca71317b6d6 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -666,7 +666,7 @@ out:
 	return ret;
 }
 
-struct address_space_operations ocfs2_aops = {
+const struct address_space_operations ocfs2_aops = {
 	.readpage	= ocfs2_readpage,
 	.writepage	= ocfs2_writepage,
 	.prepare_write	= ocfs2_prepare_write,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 21f38accd039..1d26cfcd9f84 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -54,7 +54,7 @@ static DECLARE_RWSEM(o2hb_callback_sem);
  * multiple hb threads are watching multiple regions.  A node is live
  * whenever any of the threads sees activity from the node in its region.
  */
-static spinlock_t o2hb_live_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(o2hb_live_lock);
 static struct list_head o2hb_live_slots[O2NM_MAX_NODES];
 static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
 static LIST_HEAD(o2hb_node_events);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 0f60cc0d3985..1591eb37a723 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -108,7 +108,7 @@
 	    ##args);							\
 } while (0)
 
-static rwlock_t o2net_handler_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(o2net_handler_lock);
 static struct rb_root o2net_handler_tree = RB_ROOT;
 
 static struct o2net_node o2net_nodes[O2NM_MAX_NODES];
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 87ee29cad50b..42775e2bbe2c 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -197,12 +197,14 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 				  lock->ml.node == dlm->node_num ? "master" :
 				  "remote");
 			memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN);
-		} else if (lksb->flags & DLM_LKSB_PUT_LVB) {
-			mlog(0, "setting lvb from lockres for %s node\n",
-				  lock->ml.node == dlm->node_num ? "master" :
-				  "remote");
-			memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
 		}
+		/* Do nothing for lvb put requests - they should be done in
+ 		 * place when the lock is downconverted - otherwise we risk
+ 		 * racing gets and puts which could result in old lvb data
+ 		 * being propagated. We leave the put flag set and clear it
+ 		 * here. In the future we might want to clear it at the time
+ 		 * the put is actually done.
+		 */
 		spin_unlock(&res->spinlock);
 	}
 
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 88cc43df18f1..9bdc9cf65991 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -37,7 +37,17 @@
 #define DLM_THREAD_SHUFFLE_INTERVAL    5     // flush everything every 5 passes
 #define DLM_THREAD_MS                  200   // flush at least every 200 ms
 
-#define DLM_HASH_BUCKETS     (PAGE_SIZE / sizeof(struct hlist_head))
+#define DLM_HASH_SIZE_DEFAULT	(1 << 14)
+#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE
+# define DLM_HASH_PAGES		1
+#else
+# define DLM_HASH_PAGES		(DLM_HASH_SIZE_DEFAULT / PAGE_SIZE)
+#endif
+#define DLM_BUCKETS_PER_PAGE	(PAGE_SIZE / sizeof(struct hlist_head))
+#define DLM_HASH_BUCKETS	(DLM_HASH_PAGES * DLM_BUCKETS_PER_PAGE)
+
+/* Intended to make it easier for us to switch out hash functions */
+#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l)
 
 enum dlm_ast_type {
 	DLM_AST = 0,
@@ -61,7 +71,8 @@ static inline int dlm_is_recovery_lock(const char *lock_name, int name_len)
 	return 0;
 }
 
-#define DLM_RECO_STATE_ACTIVE  0x0001
+#define DLM_RECO_STATE_ACTIVE    0x0001
+#define DLM_RECO_STATE_FINALIZE  0x0002
 
 struct dlm_recovery_ctxt
 {
@@ -85,7 +96,7 @@ enum dlm_ctxt_state {
 struct dlm_ctxt
 {
 	struct list_head list;
-	struct hlist_head *lockres_hash;
+	struct hlist_head **lockres_hash;
 	struct list_head dirty_list;
 	struct list_head purge_list;
 	struct list_head pending_asts;
@@ -120,6 +131,7 @@ struct dlm_ctxt
 	struct o2hb_callback_func dlm_hb_down;
 	struct task_struct *dlm_thread_task;
 	struct task_struct *dlm_reco_thread_task;
+	struct workqueue_struct *dlm_worker;
 	wait_queue_head_t dlm_thread_wq;
 	wait_queue_head_t dlm_reco_thread_wq;
 	wait_queue_head_t ast_wq;
@@ -132,6 +144,11 @@ struct dlm_ctxt
 	struct list_head	dlm_eviction_callbacks;
 };
 
+static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned i)
+{
+	return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE);
+}
+
 /* these keventd work queue items are for less-frequently
  * called functions that cannot be directly called from the
  * net message handlers for some reason, usually because
@@ -216,20 +233,29 @@ struct dlm_lock_resource
 	/* WARNING: Please see the comment in dlm_init_lockres before
 	 * adding fields here. */
 	struct hlist_node hash_node;
+	struct qstr lockname;
 	struct kref      refs;
 
-	/* please keep these next 3 in this order
-	 * some funcs want to iterate over all lists */
+	/*
+	 * Please keep granted, converting, and blocked in this order,
+	 * as some funcs want to iterate over all lists.
+	 *
+	 * All four lists are protected by the hash's reference.
+	 */
 	struct list_head granted;
 	struct list_head converting;
 	struct list_head blocked;
+	struct list_head purge;
 
+	/*
+	 * These two lists require you to hold an additional reference
+	 * while they are on the list.
+	 */
 	struct list_head dirty;
 	struct list_head recovering; // dlm_recovery_ctxt.resources list
 
 	/* unused lock resources have their last_used stamped and are
 	 * put on a list for the dlm thread to run. */
-	struct list_head purge;
 	unsigned long    last_used;
 
 	unsigned migration_pending:1;
@@ -238,7 +264,6 @@ struct dlm_lock_resource
 	wait_queue_head_t wq;
 	u8  owner;              //node which owns the lock resource, or unknown
 	u16 state;
-	struct qstr lockname;
 	char lvb[DLM_LVB_LEN];
 };
 
@@ -300,6 +325,15 @@ enum dlm_lockres_list {
 	DLM_BLOCKED_LIST
 };
 
+static inline int dlm_lvb_is_empty(char *lvb)
+{
+	int i;
+	for (i=0; i<DLM_LVB_LEN; i++)
+		if (lvb[i])
+			return 0;
+	return 1;
+}
+
 static inline struct list_head *
 dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx)
 {
@@ -609,7 +643,8 @@ struct dlm_finalize_reco
 {
 	u8 node_idx;
 	u8 dead_node;
-	__be16 pad1;
+	u8 flags;
+	u8 pad1;
 	__be32 pad2;
 };
 
@@ -676,6 +711,7 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
 void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
 int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
 int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
+int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout);
 
 void dlm_put(struct dlm_ctxt *dlm);
 struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
@@ -687,14 +723,20 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
 			    struct dlm_lock_resource *res);
 void dlm_purge_lockres(struct dlm_ctxt *dlm,
 		       struct dlm_lock_resource *lockres);
-void dlm_lockres_get(struct dlm_lock_resource *res);
+static inline void dlm_lockres_get(struct dlm_lock_resource *res)
+{
+	/* This is called on every lookup, so it might be worth
+	 * inlining. */
+	kref_get(&res->refs);
+}
 void dlm_lockres_put(struct dlm_lock_resource *res);
 void __dlm_unhash_lockres(struct dlm_lock_resource *res);
 void __dlm_insert_lockres(struct dlm_ctxt *dlm,
 			  struct dlm_lock_resource *res);
 struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
 						const char *name,
-						unsigned int len);
+						unsigned int len,
+						unsigned int hash);
 struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
 					      const char *name,
 					      unsigned int len);
@@ -819,6 +861,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm,
 			   u8 dead_node);
 int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
 
+int __dlm_lockres_unused(struct dlm_lock_resource *res);
 
 static inline const char * dlm_lock_mode_name(int mode)
 {
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 70888b31e751..c764dc8e40a2 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -214,6 +214,9 @@ grant:
 	if (lock->ml.node == dlm->node_num)
 		mlog(0, "doing in-place convert for nonlocal lock\n");
 	lock->ml.type = type;
+	if (lock->lksb->flags & DLM_LKSB_PUT_LVB)
+		memcpy(res->lvb, lock->lksb->lvb, DLM_LVB_LEN);
+
 	status = DLM_NORMAL;
 	*call_ast = 1;
 	goto unlock_exit;
@@ -461,6 +464,12 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
 	}
 
 	spin_lock(&res->spinlock);
+	status = __dlm_lockres_state_to_status(res);
+	if (status != DLM_NORMAL) {
+		spin_unlock(&res->spinlock);
+		dlm_error(status);
+		goto leave;
+	}
 	list_for_each(iter, &res->granted) {
 		lock = list_entry(iter, struct dlm_lock, list);
 		if (lock->ml.cookie == cnv->cookie &&
@@ -470,6 +479,21 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
 		}
 		lock = NULL;
 	}
+	if (!lock) {
+		__dlm_print_one_lock_resource(res);
+		list_for_each(iter, &res->granted) {
+			lock = list_entry(iter, struct dlm_lock, list);
+			if (lock->ml.node == cnv->node_idx) {
+				mlog(ML_ERROR, "There is something here "
+				     "for node %u, lock->ml.cookie=%llu, "
+				     "cnv->cookie=%llu\n", cnv->node_idx,
+				     (unsigned long long)lock->ml.cookie,
+				     (unsigned long long)cnv->cookie);
+				break;
+			}
+		}
+		lock = NULL;
+	}
 	spin_unlock(&res->spinlock);
 	if (!lock) {
 		status = DLM_IVLOCKID;
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index c7eae5d3324e..3f6c8d88f7af 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -37,10 +37,8 @@
 
 #include "dlmapi.h"
 #include "dlmcommon.h"
-#include "dlmdebug.h"
 
 #include "dlmdomain.h"
-#include "dlmdebug.h"
 
 #define MLOG_MASK_PREFIX ML_DLM
 #include "cluster/masklog.h"
@@ -120,6 +118,7 @@ void dlm_print_one_lock(struct dlm_lock *lockid)
 }
 EXPORT_SYMBOL_GPL(dlm_print_one_lock);
 
+#if 0
 void dlm_dump_lock_resources(struct dlm_ctxt *dlm)
 {
 	struct dlm_lock_resource *res;
@@ -136,12 +135,13 @@ void dlm_dump_lock_resources(struct dlm_ctxt *dlm)
 
 	spin_lock(&dlm->spinlock);
 	for (i=0; i<DLM_HASH_BUCKETS; i++) {
-		bucket = &(dlm->lockres_hash[i]);
+		bucket = dlm_lockres_hash(dlm, i);
 		hlist_for_each_entry(res, iter, bucket, hash_node)
 			dlm_print_one_lock_resource(res);
 	}
 	spin_unlock(&dlm->spinlock);
 }
+#endif  /*  0  */
 
 static const char *dlm_errnames[] = {
 	[DLM_NORMAL] =			"DLM_NORMAL",
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
deleted file mode 100644
index 6858510c3ccd..000000000000
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmdebug.h
- *
- * Copyright (C) 2004 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- */
-
-#ifndef DLMDEBUG_H
-#define DLMDEBUG_H
-
-void dlm_dump_lock_resources(struct dlm_ctxt *dlm);
-
-#endif
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 8f3a9e3106fd..b8c23f7ba67e 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -41,7 +41,6 @@
 #include "dlmapi.h"
 #include "dlmcommon.h"
 
-#include "dlmdebug.h"
 #include "dlmdomain.h"
 
 #include "dlmver.h"
@@ -49,6 +48,33 @@
 #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
 #include "cluster/masklog.h"
 
+static void dlm_free_pagevec(void **vec, int pages)
+{
+	while (pages--)
+		free_page((unsigned long)vec[pages]);
+	kfree(vec);
+}
+
+static void **dlm_alloc_pagevec(int pages)
+{
+	void **vec = kmalloc(pages * sizeof(void *), GFP_KERNEL);
+	int i;
+
+	if (!vec)
+		return NULL;
+
+	for (i = 0; i < pages; i++)
+		if (!(vec[i] = (void *)__get_free_page(GFP_KERNEL)))
+			goto out_free;
+
+	mlog(0, "Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n",
+	     pages, DLM_HASH_PAGES, (unsigned long)DLM_BUCKETS_PER_PAGE);
+	return vec;
+out_free:
+	dlm_free_pagevec(vec, i);
+	return NULL;
+}
+
 /*
  *
  * spinlock lock ordering: if multiple locks are needed, obey this ordering:
@@ -62,7 +88,7 @@
  *
  */
 
-spinlock_t dlm_domain_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(dlm_domain_lock);
 LIST_HEAD(dlm_domains);
 static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
 
@@ -90,8 +116,7 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
 	assert_spin_locked(&dlm->spinlock);
 
 	q = &res->lockname;
-	q->hash = full_name_hash(q->name, q->len);
-	bucket = &(dlm->lockres_hash[q->hash % DLM_HASH_BUCKETS]);
+	bucket = dlm_lockres_hash(dlm, q->hash);
 
 	/* get a reference for our hashtable */
 	dlm_lockres_get(res);
@@ -100,34 +125,32 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
 }
 
 struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
-					 const char *name,
-					 unsigned int len)
+						const char *name,
+						unsigned int len,
+						unsigned int hash)
 {
-	unsigned int hash;
-	struct hlist_node *iter;
-	struct dlm_lock_resource *tmpres=NULL;
 	struct hlist_head *bucket;
+	struct hlist_node *list;
 
 	mlog_entry("%.*s\n", len, name);
 
 	assert_spin_locked(&dlm->spinlock);
 
-	hash = full_name_hash(name, len);
-
-	bucket = &(dlm->lockres_hash[hash % DLM_HASH_BUCKETS]);
-
-	/* check for pre-existing lock */
-	hlist_for_each(iter, bucket) {
-		tmpres = hlist_entry(iter, struct dlm_lock_resource, hash_node);
-		if (tmpres->lockname.len == len &&
-		    memcmp(tmpres->lockname.name, name, len) == 0) {
-			dlm_lockres_get(tmpres);
-			break;
-		}
+	bucket = dlm_lockres_hash(dlm, hash);
 
-		tmpres = NULL;
+	hlist_for_each(list, bucket) {
+		struct dlm_lock_resource *res = hlist_entry(list,
+			struct dlm_lock_resource, hash_node);
+		if (res->lockname.name[0] != name[0])
+			continue;
+		if (unlikely(res->lockname.len != len))
+			continue;
+		if (memcmp(res->lockname.name + 1, name + 1, len - 1))
+			continue;
+		dlm_lockres_get(res);
+		return res;
 	}
-	return tmpres;
+	return NULL;
 }
 
 struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
@@ -135,9 +158,10 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
 				    unsigned int len)
 {
 	struct dlm_lock_resource *res;
+	unsigned int hash = dlm_lockid_hash(name, len);
 
 	spin_lock(&dlm->spinlock);
-	res = __dlm_lookup_lockres(dlm, name, len);
+	res = __dlm_lookup_lockres(dlm, name, len, hash);
 	spin_unlock(&dlm->spinlock);
 	return res;
 }
@@ -194,7 +218,7 @@ static int dlm_wait_on_domain_helper(const char *domain)
 static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
 {
 	if (dlm->lockres_hash)
-		free_page((unsigned long) dlm->lockres_hash);
+		dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
 
 	if (dlm->name)
 		kfree(dlm->name);
@@ -278,11 +302,21 @@ int dlm_domain_fully_joined(struct dlm_ctxt *dlm)
 	return ret;
 }
 
+static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm)
+{
+	if (dlm->dlm_worker) {
+		flush_workqueue(dlm->dlm_worker);
+		destroy_workqueue(dlm->dlm_worker);
+		dlm->dlm_worker = NULL;
+	}
+}
+
 static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
 {
 	dlm_unregister_domain_handlers(dlm);
 	dlm_complete_thread(dlm);
 	dlm_complete_recovery_thread(dlm);
+	dlm_destroy_dlm_worker(dlm);
 
 	/* We've left the domain. Now we can take ourselves out of the
 	 * list and allow the kref stuff to help us free the
@@ -304,8 +338,8 @@ static void dlm_migrate_all_locks(struct dlm_ctxt *dlm)
 restart:
 	spin_lock(&dlm->spinlock);
 	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
-		while (!hlist_empty(&dlm->lockres_hash[i])) {
-			res = hlist_entry(dlm->lockres_hash[i].first,
+		while (!hlist_empty(dlm_lockres_hash(dlm, i))) {
+			res = hlist_entry(dlm_lockres_hash(dlm, i)->first,
 					  struct dlm_lock_resource, hash_node);
 			/* need reference when manually grabbing lockres */
 			dlm_lockres_get(res);
@@ -1126,6 +1160,13 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
 		goto bail;
 	}
 
+	dlm->dlm_worker = create_singlethread_workqueue("dlm_wq");
+	if (!dlm->dlm_worker) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto bail;
+	}
+
 	do {
 		unsigned int backoff;
 		status = dlm_try_to_join_domain(dlm);
@@ -1166,6 +1207,7 @@ bail:
 		dlm_unregister_domain_handlers(dlm);
 		dlm_complete_thread(dlm);
 		dlm_complete_recovery_thread(dlm);
+		dlm_destroy_dlm_worker(dlm);
 	}
 
 	return status;
@@ -1191,7 +1233,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 		goto leave;
 	}
 
-	dlm->lockres_hash = (struct hlist_head *) __get_free_page(GFP_KERNEL);
+	dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES);
 	if (!dlm->lockres_hash) {
 		mlog_errno(-ENOMEM);
 		kfree(dlm->name);
@@ -1200,8 +1242,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 		goto leave;
 	}
 
-	for (i=0; i<DLM_HASH_BUCKETS; i++)
-		INIT_HLIST_HEAD(&dlm->lockres_hash[i]);
+	for (i = 0; i < DLM_HASH_BUCKETS; i++)
+		INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i));
 
 	strcpy(dlm->name, domain);
 	dlm->key = key;
@@ -1231,6 +1273,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 
 	dlm->dlm_thread_task = NULL;
 	dlm->dlm_reco_thread_task = NULL;
+	dlm->dlm_worker = NULL;
 	init_waitqueue_head(&dlm->dlm_thread_wq);
 	init_waitqueue_head(&dlm->dlm_reco_thread_wq);
 	init_waitqueue_head(&dlm->reco.event);
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 7273d9fa6bab..033ad1701232 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -116,7 +116,7 @@ static int dlmfs_file_open(struct inode *inode,
 	 * doesn't make sense for LVB writes. */
 	file->f_flags &= ~O_APPEND;
 
-	fp = kmalloc(sizeof(*fp), GFP_KERNEL);
+	fp = kmalloc(sizeof(*fp), GFP_NOFS);
 	if (!fp) {
 		status = -ENOMEM;
 		goto bail;
@@ -196,7 +196,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
 	else
 		readlen = count - *ppos;
 
-	lvb_buf = kmalloc(readlen, GFP_KERNEL);
+	lvb_buf = kmalloc(readlen, GFP_NOFS);
 	if (!lvb_buf)
 		return -ENOMEM;
 
@@ -240,7 +240,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
 	else
 		writelen = count - *ppos;
 
-	lvb_buf = kmalloc(writelen, GFP_KERNEL);
+	lvb_buf = kmalloc(writelen, GFP_NOFS);
 	if (!lvb_buf)
 		return -ENOMEM;
 
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 55cda25ae11b..5ca57ec650c7 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -53,7 +53,7 @@
 #define MLOG_MASK_PREFIX ML_DLM
 #include "cluster/masklog.h"
 
-static spinlock_t dlm_cookie_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(dlm_cookie_lock);
 static u64 dlm_next_cookie = 1;
 
 static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
@@ -201,6 +201,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
 				      struct dlm_lock *lock, int flags)
 {
 	enum dlm_status status = DLM_DENIED;
+	int lockres_changed = 1;
 
 	mlog_entry("type=%d\n", lock->ml.type);
 	mlog(0, "lockres %.*s, flags = 0x%x\n", res->lockname.len,
@@ -226,8 +227,25 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
 	res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
 	lock->lock_pending = 0;
 	if (status != DLM_NORMAL) {
-		if (status != DLM_NOTQUEUED)
+		if (status == DLM_RECOVERING &&
+		    dlm_is_recovery_lock(res->lockname.name,
+					 res->lockname.len)) {
+			/* recovery lock was mastered by dead node.
+			 * we need to have calc_usage shoot down this
+			 * lockres and completely remaster it. */
+			mlog(0, "%s: recovery lock was owned by "
+			     "dead node %u, remaster it now.\n",
+			     dlm->name, res->owner);
+		} else if (status != DLM_NOTQUEUED) {
+			/*
+			 * DO NOT call calc_usage, as this would unhash
+			 * the remote lockres before we ever get to use
+			 * it.  treat as if we never made any change to
+			 * the lockres.
+			 */
+			lockres_changed = 0;
 			dlm_error(status);
+		}
 		dlm_revert_pending_lock(res, lock);
 		dlm_lock_put(lock);
 	} else if (dlm_is_recovery_lock(res->lockname.name, 
@@ -243,7 +261,8 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
 	}
 	spin_unlock(&res->spinlock);
 
-	dlm_lockres_calc_usage(dlm, res);
+	if (lockres_changed)
+		dlm_lockres_calc_usage(dlm, res);
 
 	wake_up(&res->wq);
 	return status;
@@ -280,6 +299,14 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
 	if (tmpret >= 0) {
 		// successfully sent and received
 		ret = status;  // this is already a dlm_status
+		if (ret == DLM_REJECTED) {
+			mlog(ML_ERROR, "%s:%.*s: BUG.  this is a stale lockres "
+			     "no longer owned by %u.  that node is coming back "
+			     "up currently.\n", dlm->name, create.namelen,
+			     create.name, res->owner);
+			dlm_print_one_lock_resource(res);
+			BUG();
+		}
 	} else {
 		mlog_errno(tmpret);
 		if (dlm_is_host_down(tmpret)) {
@@ -381,13 +408,13 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
 	struct dlm_lock *lock;
 	int kernel_allocated = 0;
 
-	lock = kcalloc(1, sizeof(*lock), GFP_KERNEL);
+	lock = kcalloc(1, sizeof(*lock), GFP_NOFS);
 	if (!lock)
 		return NULL;
 
 	if (!lksb) {
 		/* zero memory only if kernel-allocated */
-		lksb = kcalloc(1, sizeof(*lksb), GFP_KERNEL);
+		lksb = kcalloc(1, sizeof(*lksb), GFP_NOFS);
 		if (!lksb) {
 			kfree(lock);
 			return NULL;
@@ -428,11 +455,16 @@ int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data)
 	if (!dlm_grab(dlm))
 		return DLM_REJECTED;
 
-	mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
-			"Domain %s not fully joined!\n", dlm->name);
-
 	name = create->name;
 	namelen = create->namelen;
+	status = DLM_REJECTED;
+	if (!dlm_domain_fully_joined(dlm)) {
+		mlog(ML_ERROR, "Domain %s not fully joined, but node %u is "
+		     "sending a create_lock message for lock %.*s!\n",
+		     dlm->name, create->node_idx, namelen, name);
+		dlm_error(status);
+		goto leave;
+	}
 
 	status = DLM_IVBUFLEN;
 	if (namelen > DLM_LOCKID_NAME_MAX) {
@@ -668,18 +700,22 @@ retry_lock:
 			msleep(100);
 			/* no waiting for dlm_reco_thread */
 			if (recovery) {
-				if (status == DLM_RECOVERING) {
-					mlog(0, "%s: got RECOVERING "
-					     "for $REOCVERY lock, master "
-					     "was %u\n", dlm->name, 
-					     res->owner);
-					dlm_wait_for_node_death(dlm, res->owner, 
-							DLM_NODE_DEATH_WAIT_MAX);
-				}
+				if (status != DLM_RECOVERING)
+					goto retry_lock;
+
+				mlog(0, "%s: got RECOVERING "
+				     "for $RECOVERY lock, master "
+				     "was %u\n", dlm->name,
+				     res->owner);
+				/* wait to see the node go down, then
+				 * drop down and allow the lockres to
+				 * get cleaned up.  need to remaster. */
+				dlm_wait_for_node_death(dlm, res->owner,
+						DLM_NODE_DEATH_WAIT_MAX);
 			} else {
 				dlm_wait_for_recovery(dlm);
+				goto retry_lock;
 			}
-			goto retry_lock;
 		}
 
 		if (status != DLM_NORMAL) {
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 940be4c13b1f..1b8346dd0572 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -47,7 +47,6 @@
 
 #include "dlmapi.h"
 #include "dlmcommon.h"
-#include "dlmdebug.h"
 #include "dlmdomain.h"
 
 #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER)
@@ -74,6 +73,7 @@ struct dlm_master_list_entry
 	wait_queue_head_t wq;
 	atomic_t woken;
 	struct kref mle_refs;
+	int inuse;
 	unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
 	unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
 	unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
@@ -127,18 +127,30 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
 	return 1;
 }
 
-#if 0
-/* Code here is included but defined out as it aids debugging */
+#define dlm_print_nodemap(m)  _dlm_print_nodemap(m,#m)
+static void _dlm_print_nodemap(unsigned long *map, const char *mapname)
+{
+	int i;
+	printk("%s=[ ", mapname);
+	for (i=0; i<O2NM_MAX_NODES; i++)
+		if (test_bit(i, map))
+			printk("%d ", i);
+	printk("]");
+}
 
-void dlm_print_one_mle(struct dlm_master_list_entry *mle)
+static void dlm_print_one_mle(struct dlm_master_list_entry *mle)
 {
-	int i = 0, refs;
+	int refs;
 	char *type;
 	char attached;
 	u8 master;
 	unsigned int namelen;
 	const char *name;
 	struct kref *k;
+	unsigned long *maybe = mle->maybe_map,
+		      *vote = mle->vote_map,
+		      *resp = mle->response_map,
+		      *node = mle->node_map;
 
 	k = &mle->mle_refs;
 	if (mle->type == DLM_MLE_BLOCK)
@@ -159,18 +171,29 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle)
 		name = mle->u.res->lockname.name;
 	}
 
-	mlog(ML_NOTICE, "  #%3d: %3s  %3d  %3u   %3u %c    (%d)%.*s\n",
-		  i, type, refs, master, mle->new_master, attached,
-		  namelen, namelen, name);
+	mlog(ML_NOTICE, "%.*s: %3s refs=%3d mas=%3u new=%3u evt=%c inuse=%d ",
+		  namelen, name, type, refs, master, mle->new_master, attached,
+		  mle->inuse);
+	dlm_print_nodemap(maybe);
+	printk(", ");
+	dlm_print_nodemap(vote);
+	printk(", ");
+	dlm_print_nodemap(resp);
+	printk(", ");
+	dlm_print_nodemap(node);
+	printk(", ");
+	printk("\n");
 }
 
+#if 0
+/* Code here is included but defined out as it aids debugging */
+
 static void dlm_dump_mles(struct dlm_ctxt *dlm)
 {
 	struct dlm_master_list_entry *mle;
 	struct list_head *iter;
 	
 	mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name);
-	mlog(ML_NOTICE, "  ####: type refs owner new events? lockname nodemap votemap respmap maybemap\n");
 	spin_lock(&dlm->master_lock);
 	list_for_each(iter, &dlm->master_list) {
 		mle = list_entry(iter, struct dlm_master_list_entry, list);
@@ -314,6 +337,31 @@ static inline void dlm_mle_detach_hb_events(struct dlm_ctxt *dlm,
 	spin_unlock(&dlm->spinlock);
 }
 
+static void dlm_get_mle_inuse(struct dlm_master_list_entry *mle)
+{
+	struct dlm_ctxt *dlm;
+	dlm = mle->dlm;
+
+	assert_spin_locked(&dlm->spinlock);
+	assert_spin_locked(&dlm->master_lock);
+	mle->inuse++;
+	kref_get(&mle->mle_refs);
+}
+
+static void dlm_put_mle_inuse(struct dlm_master_list_entry *mle)
+{
+	struct dlm_ctxt *dlm;
+	dlm = mle->dlm;
+
+	spin_lock(&dlm->spinlock);
+	spin_lock(&dlm->master_lock);
+	mle->inuse--;
+	__dlm_put_mle(mle);
+	spin_unlock(&dlm->master_lock);
+	spin_unlock(&dlm->spinlock);
+
+}
+
 /* remove from list and free */
 static void __dlm_put_mle(struct dlm_master_list_entry *mle)
 {
@@ -322,9 +370,14 @@ static void __dlm_put_mle(struct dlm_master_list_entry *mle)
 
 	assert_spin_locked(&dlm->spinlock);
 	assert_spin_locked(&dlm->master_lock);
-	BUG_ON(!atomic_read(&mle->mle_refs.refcount));
-
-	kref_put(&mle->mle_refs, dlm_mle_release);
+	if (!atomic_read(&mle->mle_refs.refcount)) {
+		/* this may or may not crash, but who cares.
+		 * it's a BUG. */
+		mlog(ML_ERROR, "bad mle: %p\n", mle);
+		dlm_print_one_mle(mle);
+		BUG();
+	} else
+		kref_put(&mle->mle_refs, dlm_mle_release);
 }
 
 
@@ -367,6 +420,7 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
 	memset(mle->response_map, 0, sizeof(mle->response_map));
 	mle->master = O2NM_MAX_NODES;
 	mle->new_master = O2NM_MAX_NODES;
+	mle->inuse = 0;
 
 	if (mle->type == DLM_MLE_MASTER) {
 		BUG_ON(!res);
@@ -564,6 +618,28 @@ static void dlm_lockres_release(struct kref *kref)
 	mlog(0, "destroying lockres %.*s\n", res->lockname.len,
 	     res->lockname.name);
 
+	if (!hlist_unhashed(&res->hash_node) ||
+	    !list_empty(&res->granted) ||
+	    !list_empty(&res->converting) ||
+	    !list_empty(&res->blocked) ||
+	    !list_empty(&res->dirty) ||
+	    !list_empty(&res->recovering) ||
+	    !list_empty(&res->purge)) {
+		mlog(ML_ERROR,
+		     "Going to BUG for resource %.*s."
+		     "  We're on a list! [%c%c%c%c%c%c%c]\n",
+		     res->lockname.len, res->lockname.name,
+		     !hlist_unhashed(&res->hash_node) ? 'H' : ' ',
+		     !list_empty(&res->granted) ? 'G' : ' ',
+		     !list_empty(&res->converting) ? 'C' : ' ',
+		     !list_empty(&res->blocked) ? 'B' : ' ',
+		     !list_empty(&res->dirty) ? 'D' : ' ',
+		     !list_empty(&res->recovering) ? 'R' : ' ',
+		     !list_empty(&res->purge) ? 'P' : ' ');
+
+		dlm_print_one_lock_resource(res);
+	}
+
 	/* By the time we're ready to blow this guy away, we shouldn't
 	 * be on any lists. */
 	BUG_ON(!hlist_unhashed(&res->hash_node));
@@ -579,11 +655,6 @@ static void dlm_lockres_release(struct kref *kref)
 	kfree(res);
 }
 
-void dlm_lockres_get(struct dlm_lock_resource *res)
-{
-	kref_get(&res->refs);
-}
-
 void dlm_lockres_put(struct dlm_lock_resource *res)
 {
 	kref_put(&res->refs, dlm_lockres_release);
@@ -603,7 +674,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
 	memcpy(qname, name, namelen);
 
 	res->lockname.len = namelen;
-	res->lockname.hash = full_name_hash(name, namelen);
+	res->lockname.hash = dlm_lockid_hash(name, namelen);
 
 	init_waitqueue_head(&res->wq);
 	spin_lock_init(&res->spinlock);
@@ -637,11 +708,11 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
 {
 	struct dlm_lock_resource *res;
 
-	res = kmalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
+	res = kmalloc(sizeof(struct dlm_lock_resource), GFP_NOFS);
 	if (!res)
 		return NULL;
 
-	res->lockname.name = kmalloc(namelen, GFP_KERNEL);
+	res->lockname.name = kmalloc(namelen, GFP_NOFS);
 	if (!res->lockname.name) {
 		kfree(res);
 		return NULL;
@@ -677,19 +748,20 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
 	int blocked = 0;
 	int ret, nodenum;
 	struct dlm_node_iter iter;
-	unsigned int namelen;
+	unsigned int namelen, hash;
 	int tries = 0;
 	int bit, wait_on_recovery = 0;
 
 	BUG_ON(!lockid);
 
 	namelen = strlen(lockid);
+	hash = dlm_lockid_hash(lockid, namelen);
 
 	mlog(0, "get lockres %s (len %d)\n", lockid, namelen);
 
 lookup:
 	spin_lock(&dlm->spinlock);
-	tmpres = __dlm_lookup_lockres(dlm, lockid, namelen);
+	tmpres = __dlm_lookup_lockres(dlm, lockid, namelen, hash);
 	if (tmpres) {
 		spin_unlock(&dlm->spinlock);
 		mlog(0, "found in hash!\n");
@@ -704,7 +776,7 @@ lookup:
 		mlog(0, "allocating a new resource\n");
 		/* nothing found and we need to allocate one. */
 		alloc_mle = (struct dlm_master_list_entry *)
-			kmem_cache_alloc(dlm_mle_cache, GFP_KERNEL);
+			kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
 		if (!alloc_mle)
 			goto leave;
 		res = dlm_new_lockres(dlm, lockid, namelen);
@@ -790,10 +862,11 @@ lookup:
 	 * if so, the creator of the BLOCK may try to put the last
 	 * ref at this time in the assert master handler, so we
 	 * need an extra one to keep from a bad ptr deref. */
-	dlm_get_mle(mle);
+	dlm_get_mle_inuse(mle);
 	spin_unlock(&dlm->master_lock);
 	spin_unlock(&dlm->spinlock);
 
+redo_request:
 	while (wait_on_recovery) {
 		/* any cluster changes that occurred after dropping the
 		 * dlm spinlock would be detectable be a change on the mle,
@@ -812,7 +885,7 @@ lookup:
 		} 
 
 		dlm_kick_recovery_thread(dlm);
-		msleep(100);
+		msleep(1000);
 		dlm_wait_for_recovery(dlm);
 
 		spin_lock(&dlm->spinlock);
@@ -825,13 +898,15 @@ lookup:
 		} else
 			wait_on_recovery = 0;
 		spin_unlock(&dlm->spinlock);
+
+		if (wait_on_recovery)
+			dlm_wait_for_node_recovery(dlm, bit, 10000);
 	}
 
 	/* must wait for lock to be mastered elsewhere */
 	if (blocked)
 		goto wait;
 
-redo_request:
 	ret = -EINVAL;
 	dlm_node_iter_init(mle->vote_map, &iter);
 	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
@@ -856,6 +931,7 @@ wait:
 	/* keep going until the response map includes all nodes */
 	ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked);
 	if (ret < 0) {
+		wait_on_recovery = 1;
 		mlog(0, "%s:%.*s: node map changed, redo the "
 		     "master request now, blocked=%d\n",
 		     dlm->name, res->lockname.len,
@@ -866,7 +942,7 @@ wait:
 			     dlm->name, res->lockname.len, 
 			     res->lockname.name, blocked);
 			dlm_print_one_lock_resource(res);
-			/* dlm_print_one_mle(mle); */
+			dlm_print_one_mle(mle);
 			tries = 0;
 		}
 		goto redo_request;
@@ -880,7 +956,7 @@ wait:
 	dlm_mle_detach_hb_events(dlm, mle);
 	dlm_put_mle(mle);
 	/* put the extra ref */
-	dlm_put_mle(mle);
+	dlm_put_mle_inuse(mle);
 
 wake_waiters:
 	spin_lock(&res->spinlock);
@@ -921,12 +997,14 @@ recheck:
 		spin_unlock(&res->spinlock);
 		/* this will cause the master to re-assert across
 		 * the whole cluster, freeing up mles */
-		ret = dlm_do_master_request(mle, res->owner);
-		if (ret < 0) {
-			/* give recovery a chance to run */
-			mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret);
-			msleep(500);
-			goto recheck;
+		if (res->owner != dlm->node_num) {
+			ret = dlm_do_master_request(mle, res->owner);
+			if (ret < 0) {
+				/* give recovery a chance to run */
+				mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret);
+				msleep(500);
+				goto recheck;
+			}
 		}
 		ret = 0;
 		goto leave;
@@ -962,6 +1040,12 @@ recheck:
 		     "rechecking now\n", dlm->name, res->lockname.len,
 		     res->lockname.name);
 		goto recheck;
+	} else {
+		if (!voting_done) {
+			mlog(0, "map not changed and voting not done "
+			     "for %s:%.*s\n", dlm->name, res->lockname.len,
+			     res->lockname.name);
+		}
 	}
 
 	if (m != O2NM_MAX_NODES) {
@@ -1129,18 +1213,6 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
 			set_bit(node, mle->vote_map);
 		} else {
 			mlog(ML_ERROR, "node down! %d\n", node);
-
-			/* if the node wasn't involved in mastery skip it,
-			 * but clear it out from the maps so that it will
-			 * not affect mastery of this lockres */
-			clear_bit(node, mle->response_map);
-			clear_bit(node, mle->vote_map);
-			if (!test_bit(node, mle->maybe_map))
-				goto next;
-
-			/* if we're already blocked on lock mastery, and the
-			 * dead node wasn't the expected master, or there is
-			 * another node in the maybe_map, keep waiting */
 			if (blocked) {
 				int lowest = find_next_bit(mle->maybe_map,
 						       O2NM_MAX_NODES, 0);
@@ -1148,54 +1220,53 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
 				/* act like it was never there */
 				clear_bit(node, mle->maybe_map);
 
-			       	if (node != lowest)
-					goto next;
-
-				mlog(ML_ERROR, "expected master %u died while "
-				     "this node was blocked waiting on it!\n",
-				     node);
-				lowest = find_next_bit(mle->maybe_map,
-						       O2NM_MAX_NODES,
-						       lowest+1);
-				if (lowest < O2NM_MAX_NODES) {
-					mlog(0, "still blocked. waiting "
-					     "on %u now\n", lowest);
-					goto next;
+			       	if (node == lowest) {
+					mlog(0, "expected master %u died"
+					    " while this node was blocked "
+					    "waiting on it!\n", node);
+					lowest = find_next_bit(mle->maybe_map,
+						       	O2NM_MAX_NODES,
+						       	lowest+1);
+					if (lowest < O2NM_MAX_NODES) {
+						mlog(0, "%s:%.*s:still "
+						     "blocked. waiting on %u "
+						     "now\n", dlm->name,
+						     res->lockname.len,
+						     res->lockname.name,
+						     lowest);
+					} else {
+						/* mle is an MLE_BLOCK, but
+						 * there is now nothing left to
+						 * block on.  we need to return
+						 * all the way back out and try
+						 * again with an MLE_MASTER.
+						 * dlm_do_local_recovery_cleanup
+						 * has already run, so the mle
+						 * refcount is ok */
+						mlog(0, "%s:%.*s: no "
+						     "longer blocking. try to "
+						     "master this here\n",
+						     dlm->name,
+						     res->lockname.len,
+						     res->lockname.name);
+						mle->type = DLM_MLE_MASTER;
+						mle->u.res = res;
+					}
 				}
-
-				/* mle is an MLE_BLOCK, but there is now
-				 * nothing left to block on.  we need to return
-				 * all the way back out and try again with
-				 * an MLE_MASTER. dlm_do_local_recovery_cleanup
-				 * has already run, so the mle refcount is ok */
-				mlog(0, "no longer blocking. we can "
-				     "try to master this here\n");
-				mle->type = DLM_MLE_MASTER;
-				memset(mle->maybe_map, 0,
-				       sizeof(mle->maybe_map));
-				memset(mle->response_map, 0,
-				       sizeof(mle->maybe_map));
-				memcpy(mle->vote_map, mle->node_map,
-				       sizeof(mle->node_map));
-				mle->u.res = res;
-				set_bit(dlm->node_num, mle->maybe_map);
-
-				ret = -EAGAIN;
-				goto next;
 			}
 
-			clear_bit(node, mle->maybe_map);
-			if (node > dlm->node_num)
-				goto next;
-
-			mlog(0, "dead node in map!\n");
-			/* yuck. go back and re-contact all nodes
-			 * in the vote_map, removing this node. */
-			memset(mle->response_map, 0,
-			       sizeof(mle->response_map));
+			/* now blank out everything, as if we had never
+			 * contacted anyone */
+			memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
+			memset(mle->response_map, 0, sizeof(mle->response_map));
+			/* reset the vote_map to the current node_map */
+			memcpy(mle->vote_map, mle->node_map,
+			       sizeof(mle->node_map));
+			/* put myself into the maybe map */
+			if (mle->type != DLM_MLE_BLOCK)
+				set_bit(dlm->node_num, mle->maybe_map);
 		}
 		ret = -EAGAIN;
-next:
 		node = dlm_bitmap_diff_iter_next(&bdi, &sc);
 	}
 	return ret;
@@ -1316,7 +1387,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
 	struct dlm_master_request *request = (struct dlm_master_request *) msg->buf;
 	struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL;
 	char *name;
-	unsigned int namelen;
+	unsigned int namelen, hash;
 	int found, ret;
 	int set_maybe;
 	int dispatch_assert = 0;
@@ -1331,6 +1402,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
 
 	name = request->name;
 	namelen = request->namelen;
+	hash = dlm_lockid_hash(name, namelen);
 
 	if (namelen > DLM_LOCKID_NAME_MAX) {
 		response = DLM_IVBUFLEN;
@@ -1339,7 +1411,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
 
 way_up_top:
 	spin_lock(&dlm->spinlock);
-	res = __dlm_lookup_lockres(dlm, name, namelen);
+	res = __dlm_lookup_lockres(dlm, name, namelen, hash);
 	if (res) {
 		spin_unlock(&dlm->spinlock);
 
@@ -1459,21 +1531,18 @@ way_up_top:
 			spin_unlock(&dlm->spinlock);
 
 			mle = (struct dlm_master_list_entry *)
-				kmem_cache_alloc(dlm_mle_cache, GFP_KERNEL);
+				kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
 			if (!mle) {
 				response = DLM_MASTER_RESP_ERROR;
 				mlog_errno(-ENOMEM);
 				goto send_response;
 			}
-			spin_lock(&dlm->spinlock);
-			dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL,
-					 name, namelen);
-			spin_unlock(&dlm->spinlock);
 			goto way_up_top;
 		}
 
 		// mlog(0, "this is second time thru, already allocated, "
 		// "add the block.\n");
+		dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen);
 		set_bit(request->node_idx, mle->maybe_map);
 		list_add(&mle->list, &dlm->master_list);
 		response = DLM_MASTER_RESP_NO;
@@ -1556,6 +1625,8 @@ again:
 	dlm_node_iter_init(nodemap, &iter);
 	while ((to = dlm_node_iter_next(&iter)) >= 0) {
 		int r = 0;
+		struct dlm_master_list_entry *mle = NULL;
+
 		mlog(0, "sending assert master to %d (%.*s)\n", to,
 		     namelen, lockname);
 		memset(&assert, 0, sizeof(assert));
@@ -1567,20 +1638,28 @@ again:
 		tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key,
 					    &assert, sizeof(assert), to, &r);
 		if (tmpret < 0) {
-			mlog(ML_ERROR, "assert_master returned %d!\n", tmpret);
+			mlog(0, "assert_master returned %d!\n", tmpret);
 			if (!dlm_is_host_down(tmpret)) {
-				mlog(ML_ERROR, "unhandled error!\n");
+				mlog(ML_ERROR, "unhandled error=%d!\n", tmpret);
 				BUG();
 			}
 			/* a node died.  finish out the rest of the nodes. */
-			mlog(ML_ERROR, "link to %d went down!\n", to);
+			mlog(0, "link to %d went down!\n", to);
 			/* any nonzero status return will do */
 			ret = tmpret;
 		} else if (r < 0) {
 			/* ok, something horribly messed.  kill thyself. */
 			mlog(ML_ERROR,"during assert master of %.*s to %u, "
 			     "got %d.\n", namelen, lockname, to, r);
-			dlm_dump_lock_resources(dlm);
+			spin_lock(&dlm->spinlock);
+			spin_lock(&dlm->master_lock);
+			if (dlm_find_mle(dlm, &mle, (char *)lockname,
+					 namelen)) {
+				dlm_print_one_mle(mle);
+				__dlm_put_mle(mle);
+			}
+			spin_unlock(&dlm->master_lock);
+			spin_unlock(&dlm->spinlock);
 			BUG();
 		} else if (r == EAGAIN) {
 			mlog(0, "%.*s: node %u create mles on other "
@@ -1612,7 +1691,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
 	struct dlm_assert_master *assert = (struct dlm_assert_master *)msg->buf;
 	struct dlm_lock_resource *res = NULL;
 	char *name;
-	unsigned int namelen;
+	unsigned int namelen, hash;
 	u32 flags;
 	int master_request = 0;
 	int ret = 0;
@@ -1622,6 +1701,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
 
 	name = assert->name;
 	namelen = assert->namelen;
+	hash = dlm_lockid_hash(name, namelen);
 	flags = be32_to_cpu(assert->flags);
 
 	if (namelen > DLM_LOCKID_NAME_MAX) {
@@ -1646,7 +1726,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
 		if (bit >= O2NM_MAX_NODES) {
 			/* not necessarily an error, though less likely.
 			 * could be master just re-asserting. */
-			mlog(ML_ERROR, "no bits set in the maybe_map, but %u "
+			mlog(0, "no bits set in the maybe_map, but %u "
 			     "is asserting! (%.*s)\n", assert->node_idx,
 			     namelen, name);
 		} else if (bit != assert->node_idx) {
@@ -1658,19 +1738,36 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
 				 * number winning the mastery will respond
 				 * YES to mastery requests, but this node
 				 * had no way of knowing.  let it pass. */
-				mlog(ML_ERROR, "%u is the lowest node, "
+				mlog(0, "%u is the lowest node, "
 				     "%u is asserting. (%.*s)  %u must "
 				     "have begun after %u won.\n", bit,
 				     assert->node_idx, namelen, name, bit,
 				     assert->node_idx);
 			}
 		}
+		if (mle->type == DLM_MLE_MIGRATION) {
+			if (flags & DLM_ASSERT_MASTER_MLE_CLEANUP) {
+				mlog(0, "%s:%.*s: got cleanup assert"
+				     " from %u for migration\n",
+				     dlm->name, namelen, name,
+				     assert->node_idx);
+			} else if (!(flags & DLM_ASSERT_MASTER_FINISH_MIGRATION)) {
+				mlog(0, "%s:%.*s: got unrelated assert"
+				     " from %u for migration, ignoring\n",
+				     dlm->name, namelen, name,
+				     assert->node_idx);
+				__dlm_put_mle(mle);
+				spin_unlock(&dlm->master_lock);
+				spin_unlock(&dlm->spinlock);
+				goto done;
+			}	
+		}
 	}
 	spin_unlock(&dlm->master_lock);
 
 	/* ok everything checks out with the MLE
 	 * now check to see if there is a lockres */
-	res = __dlm_lookup_lockres(dlm, name, namelen);
+	res = __dlm_lookup_lockres(dlm, name, namelen, hash);
 	if (res) {
 		spin_lock(&res->spinlock);
 		if (res->state & DLM_LOCK_RES_RECOVERING)  {
@@ -1679,7 +1776,8 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
 			goto kill;
 		}
 		if (!mle) {
-			if (res->owner != assert->node_idx) {
+			if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN &&
+			    res->owner != assert->node_idx) {
 				mlog(ML_ERROR, "assert_master from "
 					  "%u, but current owner is "
 					  "%u! (%.*s)\n",
@@ -1732,6 +1830,7 @@ ok:
 	if (mle) {
 		int extra_ref = 0;
 		int nn = -1;
+		int rr, err = 0;
 		
 		spin_lock(&mle->spinlock);
 		if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
@@ -1751,27 +1850,64 @@ ok:
 		wake_up(&mle->wq);
 		spin_unlock(&mle->spinlock);
 
-		if (mle->type == DLM_MLE_MIGRATION && res) {
-			mlog(0, "finishing off migration of lockres %.*s, "
-			     "from %u to %u\n",
-			       res->lockname.len, res->lockname.name,
-			       dlm->node_num, mle->new_master);
+		if (res) {
 			spin_lock(&res->spinlock);
-			res->state &= ~DLM_LOCK_RES_MIGRATING;
-			dlm_change_lockres_owner(dlm, res, mle->new_master);
-			BUG_ON(res->state & DLM_LOCK_RES_DIRTY);
+			if (mle->type == DLM_MLE_MIGRATION) {
+				mlog(0, "finishing off migration of lockres %.*s, "
+			     		"from %u to %u\n",
+			       		res->lockname.len, res->lockname.name,
+			       		dlm->node_num, mle->new_master);
+				res->state &= ~DLM_LOCK_RES_MIGRATING;
+				dlm_change_lockres_owner(dlm, res, mle->new_master);
+				BUG_ON(res->state & DLM_LOCK_RES_DIRTY);
+			} else {
+				dlm_change_lockres_owner(dlm, res, mle->master);
+			}
 			spin_unlock(&res->spinlock);
 		}
-		/* master is known, detach if not already detached */
-		dlm_mle_detach_hb_events(dlm, mle);
-		dlm_put_mle(mle);
-		
+
+		/* master is known, detach if not already detached.
+		 * ensures that only one assert_master call will happen
+		 * on this mle. */
+		spin_lock(&dlm->spinlock);
+		spin_lock(&dlm->master_lock);
+
+		rr = atomic_read(&mle->mle_refs.refcount);
+		if (mle->inuse > 0) {
+			if (extra_ref && rr < 3)
+				err = 1;
+			else if (!extra_ref && rr < 2)
+				err = 1;
+		} else {
+			if (extra_ref && rr < 2)
+				err = 1;
+			else if (!extra_ref && rr < 1)
+				err = 1;
+		}
+		if (err) {
+			mlog(ML_ERROR, "%s:%.*s: got assert master from %u "
+			     "that will mess up this node, refs=%d, extra=%d, "
+			     "inuse=%d\n", dlm->name, namelen, name,
+			     assert->node_idx, rr, extra_ref, mle->inuse);
+			dlm_print_one_mle(mle);
+		}
+		list_del_init(&mle->list);
+		__dlm_mle_detach_hb_events(dlm, mle);
+		__dlm_put_mle(mle);
 		if (extra_ref) {
 			/* the assert master message now balances the extra
 		 	 * ref given by the master / migration request message.
 		 	 * if this is the last put, it will be removed
 		 	 * from the list. */
-			dlm_put_mle(mle);
+			__dlm_put_mle(mle);
+		}
+		spin_unlock(&dlm->master_lock);
+		spin_unlock(&dlm->spinlock);
+	} else if (res) {
+		if (res->owner != assert->node_idx) {
+			mlog(0, "assert_master from %u, but current "
+			     "owner is %u (%.*s), no mle\n", assert->node_idx,
+			     res->owner, namelen, name);
 		}
 	}
 
@@ -1788,12 +1924,12 @@ done:
 
 kill:
 	/* kill the caller! */
+	mlog(ML_ERROR, "Bad message received from another node.  Dumping state "
+	     "and killing the other node now!  This node is OK and can continue.\n");
+	__dlm_print_one_lock_resource(res);
 	spin_unlock(&res->spinlock);
 	spin_unlock(&dlm->spinlock);
 	dlm_lockres_put(res);
-	mlog(ML_ERROR, "Bad message received from another node.  Dumping state "
-	     "and killing the other node now!  This node is OK and can continue.\n");
-	dlm_dump_lock_resources(dlm);
 	dlm_put(dlm);
 	return -EINVAL;
 }
@@ -1803,7 +1939,7 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
 			       int ignore_higher, u8 request_from, u32 flags)
 {
 	struct dlm_work_item *item;
-	item = kcalloc(1, sizeof(*item), GFP_KERNEL);
+	item = kcalloc(1, sizeof(*item), GFP_NOFS);
 	if (!item)
 		return -ENOMEM;
 
@@ -1825,7 +1961,7 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
 	list_add_tail(&item->list, &dlm->work_list);
 	spin_unlock(&dlm->work_lock);
 
-	schedule_work(&dlm->dispatched_work);
+	queue_work(dlm->dlm_worker, &dlm->dispatched_work);
 	return 0;
 }
 
@@ -1866,6 +2002,23 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
 		}
 	}
 
+	/*
+	 * If we're migrating this lock to someone else, we are no
+	 * longer allowed to assert out own mastery.  OTOH, we need to
+	 * prevent migration from starting while we're still asserting
+	 * our dominance.  The reserved ast delays migration.
+	 */
+	spin_lock(&res->spinlock);
+	if (res->state & DLM_LOCK_RES_MIGRATING) {
+		mlog(0, "Someone asked us to assert mastery, but we're "
+		     "in the middle of migration.  Skipping assert, "
+		     "the new master will handle that.\n");
+		spin_unlock(&res->spinlock);
+		goto put;
+	} else
+		__dlm_lockres_reserve_ast(res);
+	spin_unlock(&res->spinlock);
+
 	/* this call now finishes out the nodemap
 	 * even if one or more nodes die */
 	mlog(0, "worker about to master %.*s here, this=%u\n",
@@ -1875,9 +2028,14 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
 				   nodemap, flags);
 	if (ret < 0) {
 		/* no need to restart, we are done */
-		mlog_errno(ret);
+		if (!dlm_is_host_down(ret))
+			mlog_errno(ret);
 	}
 
+	/* Ok, we've asserted ourselves.  Let's let migration start. */
+	dlm_lockres_release_ast(dlm, res);
+
+put:
 	dlm_lockres_put(res);
 
 	mlog(0, "finished with dlm_assert_master_worker\n");
@@ -1916,6 +2074,7 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
 				BUG();
 			/* host is down, so answer for that node would be
 			 * DLM_LOCK_RES_OWNER_UNKNOWN.  continue. */
+			ret = 0;
 		}
 
 		if (master != DLM_LOCK_RES_OWNER_UNKNOWN) {
@@ -2016,14 +2175,14 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 	 */
 
 	ret = -ENOMEM;
-	mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_KERNEL);
+	mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS);
 	if (!mres) {
 		mlog_errno(ret);
 		goto leave;
 	}
 
 	mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache,
-								GFP_KERNEL);
+								GFP_NOFS);
 	if (!mle) {
 		mlog_errno(ret);
 		goto leave;
@@ -2117,7 +2276,7 @@ fail:
 	 * take both dlm->spinlock and dlm->master_lock */
 	spin_lock(&dlm->spinlock);
 	spin_lock(&dlm->master_lock);
-	dlm_get_mle(mle);
+	dlm_get_mle_inuse(mle);
 	spin_unlock(&dlm->master_lock);
 	spin_unlock(&dlm->spinlock);
 
@@ -2134,7 +2293,10 @@ fail:
 		/* migration failed, detach and clean up mle */
 		dlm_mle_detach_hb_events(dlm, mle);
 		dlm_put_mle(mle);
-		dlm_put_mle(mle);
+		dlm_put_mle_inuse(mle);
+		spin_lock(&res->spinlock);
+		res->state &= ~DLM_LOCK_RES_MIGRATING;
+		spin_unlock(&res->spinlock);
 		goto leave;
 	}
 
@@ -2164,8 +2326,8 @@ fail:
 			/* avoid hang during shutdown when migrating lockres 
 			 * to a node which also goes down */
 			if (dlm_is_node_dead(dlm, target)) {
-				mlog(0, "%s:%.*s: expected migration target %u "
-				     "is no longer up.  restarting.\n",
+				mlog(0, "%s:%.*s: expected migration "
+				     "target %u is no longer up, restarting\n",
 				     dlm->name, res->lockname.len,
 				     res->lockname.name, target);
 				ret = -ERESTARTSYS;
@@ -2175,7 +2337,10 @@ fail:
 			/* migration failed, detach and clean up mle */
 			dlm_mle_detach_hb_events(dlm, mle);
 			dlm_put_mle(mle);
-			dlm_put_mle(mle);
+			dlm_put_mle_inuse(mle);
+			spin_lock(&res->spinlock);
+			res->state &= ~DLM_LOCK_RES_MIGRATING;
+			spin_unlock(&res->spinlock);
 			goto leave;
 		}
 		/* TODO: if node died: stop, clean up, return error */
@@ -2191,7 +2356,7 @@ fail:
 
 	/* master is known, detach if not already detached */
 	dlm_mle_detach_hb_events(dlm, mle);
-	dlm_put_mle(mle);
+	dlm_put_mle_inuse(mle);
 	ret = 0;
 
 	dlm_lockres_calc_usage(dlm, res);
@@ -2462,7 +2627,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data)
 	struct dlm_migrate_request *migrate = (struct dlm_migrate_request *) msg->buf;
 	struct dlm_master_list_entry *mle = NULL, *oldmle = NULL;
 	const char *name;
-	unsigned int namelen;
+	unsigned int namelen, hash;
 	int ret = 0;
 
 	if (!dlm_grab(dlm))
@@ -2470,10 +2635,11 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data)
 
 	name = migrate->name;
 	namelen = migrate->namelen;
+	hash = dlm_lockid_hash(name, namelen);
 
 	/* preallocate.. if this fails, abort */
 	mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache,
-							 GFP_KERNEL);
+							 GFP_NOFS);
 
 	if (!mle) {
 		ret = -ENOMEM;
@@ -2482,7 +2648,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data)
 
 	/* check for pre-existing lock */
 	spin_lock(&dlm->spinlock);
-	res = __dlm_lookup_lockres(dlm, name, namelen);
+	res = __dlm_lookup_lockres(dlm, name, namelen, hash);
 	spin_lock(&dlm->master_lock);
 
 	if (res) {
@@ -2580,6 +2746,7 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
 			/* remove it from the list so that only one
 			 * mle will be found */
 			list_del_init(&tmp->list);
+			__dlm_mle_detach_hb_events(dlm, mle);
 		}
 		spin_unlock(&tmp->spinlock);
 	}
@@ -2601,6 +2768,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
 	struct list_head *iter, *iter2;
 	struct dlm_master_list_entry *mle;
 	struct dlm_lock_resource *res;
+	unsigned int hash;
 
 	mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node);
 top:
@@ -2640,7 +2808,7 @@ top:
 				 * may result in the mle being unlinked and
 				 * freed, but there may still be a process
 				 * waiting in the dlmlock path which is fine. */
-				mlog(ML_ERROR, "node %u was expected master\n",
+				mlog(0, "node %u was expected master\n",
 				     dead_node);
 				atomic_set(&mle->woken, 1);
 				spin_unlock(&mle->spinlock);
@@ -2673,19 +2841,21 @@ top:
 
 		/* remove from the list early.  NOTE: unlinking
 		 * list_head while in list_for_each_safe */
+		__dlm_mle_detach_hb_events(dlm, mle);
 		spin_lock(&mle->spinlock);
 		list_del_init(&mle->list);
 		atomic_set(&mle->woken, 1);
 		spin_unlock(&mle->spinlock);
 		wake_up(&mle->wq);
 
-		mlog(0, "node %u died during migration from "
-		     "%u to %u!\n", dead_node,
+		mlog(0, "%s: node %u died during migration from "
+		     "%u to %u!\n", dlm->name, dead_node,
 		     mle->master, mle->new_master);
 		/* if there is a lockres associated with this
 	 	 * mle, find it and set its owner to UNKNOWN */
+		hash = dlm_lockid_hash(mle->u.name.name, mle->u.name.len);
 		res = __dlm_lookup_lockres(dlm, mle->u.name.name,
-					mle->u.name.len);
+					   mle->u.name.len, hash);
 		if (res) {
 			/* unfortunately if we hit this rare case, our
 		 	 * lock ordering is messed.  we need to drop
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 9962190e7416..29b2845f370d 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -98,8 +98,8 @@ static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data);
 
 static u64 dlm_get_next_mig_cookie(void);
 
-static spinlock_t dlm_reco_state_lock = SPIN_LOCK_UNLOCKED;
-static spinlock_t dlm_mig_cookie_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(dlm_reco_state_lock);
+static DEFINE_SPINLOCK(dlm_mig_cookie_lock);
 static u64 dlm_mig_cookie = 1;
 
 static u64 dlm_get_next_mig_cookie(void)
@@ -115,12 +115,37 @@ static u64 dlm_get_next_mig_cookie(void)
 	return c;
 }
 
+static inline void dlm_set_reco_dead_node(struct dlm_ctxt *dlm,
+					  u8 dead_node)
+{
+	assert_spin_locked(&dlm->spinlock);
+	if (dlm->reco.dead_node != dead_node)
+		mlog(0, "%s: changing dead_node from %u to %u\n",
+		     dlm->name, dlm->reco.dead_node, dead_node);
+	dlm->reco.dead_node = dead_node;
+}
+
+static inline void dlm_set_reco_master(struct dlm_ctxt *dlm,
+				       u8 master)
+{
+	assert_spin_locked(&dlm->spinlock);
+	mlog(0, "%s: changing new_master from %u to %u\n",
+	     dlm->name, dlm->reco.new_master, master);
+	dlm->reco.new_master = master;
+}
+
+static inline void __dlm_reset_recovery(struct dlm_ctxt *dlm)
+{
+	assert_spin_locked(&dlm->spinlock);
+	clear_bit(dlm->reco.dead_node, dlm->recovery_map);
+	dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
+	dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
+}
+
 static inline void dlm_reset_recovery(struct dlm_ctxt *dlm)
 {
 	spin_lock(&dlm->spinlock);
-	clear_bit(dlm->reco.dead_node, dlm->recovery_map);
-	dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
-	dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
+	__dlm_reset_recovery(dlm);
 	spin_unlock(&dlm->spinlock);
 }
 
@@ -132,12 +157,21 @@ void dlm_dispatch_work(void *data)
 	struct list_head *iter, *iter2;
 	struct dlm_work_item *item;
 	dlm_workfunc_t *workfunc;
+	int tot=0;
+
+	if (!dlm_joined(dlm))
+		return;
 
 	spin_lock(&dlm->work_lock);
 	list_splice_init(&dlm->work_list, &tmp_list);
 	spin_unlock(&dlm->work_lock);
 
 	list_for_each_safe(iter, iter2, &tmp_list) {
+		tot++;
+	}
+	mlog(0, "%s: work thread has %d work items\n", dlm->name, tot);
+
+	list_for_each_safe(iter, iter2, &tmp_list) {
 		item = list_entry(iter, struct dlm_work_item, list);
 		workfunc = item->func;
 		list_del_init(&item->list);
@@ -220,6 +254,52 @@ void dlm_complete_recovery_thread(struct dlm_ctxt *dlm)
  *
  */
 
+static void dlm_print_reco_node_status(struct dlm_ctxt *dlm)
+{
+	struct dlm_reco_node_data *ndata;
+	struct dlm_lock_resource *res;
+
+	mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n",
+	     dlm->name, dlm->dlm_reco_thread_task->pid,
+	     dlm->reco.state & DLM_RECO_STATE_ACTIVE ? "ACTIVE" : "inactive",
+	     dlm->reco.dead_node, dlm->reco.new_master);
+
+	list_for_each_entry(ndata, &dlm->reco.node_data, list) {
+		char *st = "unknown";
+		switch (ndata->state) {
+			case DLM_RECO_NODE_DATA_INIT:
+				st = "init";
+				break;
+			case DLM_RECO_NODE_DATA_REQUESTING:
+				st = "requesting";
+				break;
+			case DLM_RECO_NODE_DATA_DEAD:
+				st = "dead";
+				break;
+			case DLM_RECO_NODE_DATA_RECEIVING:
+				st = "receiving";
+				break;
+			case DLM_RECO_NODE_DATA_REQUESTED:
+				st = "requested";
+				break;
+			case DLM_RECO_NODE_DATA_DONE:
+				st = "done";
+				break;
+			case DLM_RECO_NODE_DATA_FINALIZE_SENT:
+				st = "finalize-sent";
+				break;
+			default:
+				st = "bad";
+				break;
+		}
+		mlog(ML_NOTICE, "%s: reco state, node %u, state=%s\n",
+		     dlm->name, ndata->node_num, st);
+	}
+	list_for_each_entry(res, &dlm->reco.resources, recovering) {
+		mlog(ML_NOTICE, "%s: lockres %.*s on recovering list\n",
+		     dlm->name, res->lockname.len, res->lockname.name);
+	}
+}
 
 #define DLM_RECO_THREAD_TIMEOUT_MS (5 * 1000)
 
@@ -267,11 +347,23 @@ int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node)
 {
 	int dead;
 	spin_lock(&dlm->spinlock);
-	dead = test_bit(node, dlm->domain_map);
+	dead = !test_bit(node, dlm->domain_map);
 	spin_unlock(&dlm->spinlock);
 	return dead;
 }
 
+/* returns true if node is no longer in the domain
+ * could be dead or just not joined */
+static int dlm_is_node_recovered(struct dlm_ctxt *dlm, u8 node)
+{
+	int recovered;
+	spin_lock(&dlm->spinlock);
+	recovered = !test_bit(node, dlm->recovery_map);
+	spin_unlock(&dlm->spinlock);
+	return recovered;
+}
+
+
 int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
 {
 	if (timeout) {
@@ -290,6 +382,24 @@ int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
 	return 0;
 }
 
+int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout)
+{
+	if (timeout) {
+		mlog(0, "%s: waiting %dms for notification of "
+		     "recovery of node %u\n", dlm->name, timeout, node);
+		wait_event_timeout(dlm->dlm_reco_thread_wq,
+			   dlm_is_node_recovered(dlm, node),
+			   msecs_to_jiffies(timeout));
+	} else {
+		mlog(0, "%s: waiting indefinitely for notification "
+		     "of recovery of node %u\n", dlm->name, node);
+		wait_event(dlm->dlm_reco_thread_wq,
+			   dlm_is_node_recovered(dlm, node));
+	}
+	/* for now, return 0 */
+	return 0;
+}
+
 /* callers of the top-level api calls (dlmlock/dlmunlock) should
  * block on the dlm->reco.event when recovery is in progress.
  * the dlm recovery thread will set this state when it begins
@@ -308,6 +418,13 @@ static int dlm_in_recovery(struct dlm_ctxt *dlm)
 
 void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
 {
+	if (dlm_in_recovery(dlm)) {
+		mlog(0, "%s: reco thread %d in recovery: "
+		     "state=%d, master=%u, dead=%u\n",
+		     dlm->name, dlm->dlm_reco_thread_task->pid,
+		     dlm->reco.state, dlm->reco.new_master,
+		     dlm->reco.dead_node);
+	}
 	wait_event(dlm->reco.event, !dlm_in_recovery(dlm));
 }
 
@@ -341,7 +458,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 		mlog(0, "new master %u died while recovering %u!\n",
 		     dlm->reco.new_master, dlm->reco.dead_node);
 		/* unset the new_master, leave dead_node */
-		dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
+		dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
 	}
 
 	/* select a target to recover */
@@ -350,14 +467,14 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 
 		bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0);
 		if (bit >= O2NM_MAX_NODES || bit < 0)
-			dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
+			dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
 		else
-			dlm->reco.dead_node = bit;
+			dlm_set_reco_dead_node(dlm, bit);
 	} else if (!test_bit(dlm->reco.dead_node, dlm->recovery_map)) {
 		/* BUG? */
 		mlog(ML_ERROR, "dead_node %u no longer in recovery map!\n",
 		     dlm->reco.dead_node);
-		dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
+		dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
 	}
 
 	if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
@@ -366,7 +483,8 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 		/* return to main thread loop and sleep. */
 		return 0;
 	}
-	mlog(0, "recovery thread found node %u in the recovery map!\n",
+	mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
+	     dlm->name, dlm->dlm_reco_thread_task->pid,
 	     dlm->reco.dead_node);
 	spin_unlock(&dlm->spinlock);
 
@@ -389,8 +507,8 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 		}
 		mlog(0, "another node will master this recovery session.\n");
 	}
-	mlog(0, "dlm=%s, new_master=%u, this node=%u, dead_node=%u\n",
-	     dlm->name, dlm->reco.new_master,
+	mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n",
+	     dlm->name, dlm->dlm_reco_thread_task->pid, dlm->reco.new_master,
 	     dlm->node_num, dlm->reco.dead_node);
 
 	/* it is safe to start everything back up here
@@ -402,11 +520,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 	return 0;
 
 master_here:
-	mlog(0, "mastering recovery of %s:%u here(this=%u)!\n",
+	mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n",
+	     dlm->dlm_reco_thread_task->pid,
 	     dlm->name, dlm->reco.dead_node, dlm->node_num);
 
 	status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
 	if (status < 0) {
+		/* we should never hit this anymore */
 		mlog(ML_ERROR, "error %d remastering locks for node %u, "
 		     "retrying.\n", status, dlm->reco.dead_node);
 		/* yield a bit to allow any final network messages
@@ -433,9 +553,16 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
 	int destroy = 0;
 	int pass = 0;
 
-	status = dlm_init_recovery_area(dlm, dead_node);
-	if (status < 0)
-		goto leave;
+	do {
+		/* we have become recovery master.  there is no escaping
+		 * this, so just keep trying until we get it. */
+		status = dlm_init_recovery_area(dlm, dead_node);
+		if (status < 0) {
+			mlog(ML_ERROR, "%s: failed to alloc recovery area, "
+			     "retrying\n", dlm->name);
+			msleep(1000);
+		}
+	} while (status != 0);
 
 	/* safe to access the node data list without a lock, since this
 	 * process is the only one to change the list */
@@ -452,16 +579,36 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
 			continue;
 		}
 
-		status = dlm_request_all_locks(dlm, ndata->node_num, dead_node);
-		if (status < 0) {
-			mlog_errno(status);
-			if (dlm_is_host_down(status))
-				ndata->state = DLM_RECO_NODE_DATA_DEAD;
-			else {
-				destroy = 1;
-				goto leave;
+		do {
+			status = dlm_request_all_locks(dlm, ndata->node_num,
+						       dead_node);
+			if (status < 0) {
+				mlog_errno(status);
+				if (dlm_is_host_down(status)) {
+					/* node died, ignore it for recovery */
+					status = 0;
+					ndata->state = DLM_RECO_NODE_DATA_DEAD;
+					/* wait for the domain map to catch up
+					 * with the network state. */
+					wait_event_timeout(dlm->dlm_reco_thread_wq,
+							   dlm_is_node_dead(dlm,
+								ndata->node_num),
+							   msecs_to_jiffies(1000));
+					mlog(0, "waited 1 sec for %u, "
+					     "dead? %s\n", ndata->node_num,
+					     dlm_is_node_dead(dlm, ndata->node_num) ?
+					     "yes" : "no");
+				} else {
+					/* -ENOMEM on the other node */
+					mlog(0, "%s: node %u returned "
+					     "%d during recovery, retrying "
+					     "after a short wait\n",
+					     dlm->name, ndata->node_num,
+					     status);
+					msleep(100);
+				}
 			}
-		}
+		} while (status != 0);
 
 		switch (ndata->state) {
 			case DLM_RECO_NODE_DATA_INIT:
@@ -473,10 +620,9 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
 				mlog(0, "node %u died after requesting "
 				     "recovery info for node %u\n",
 				     ndata->node_num, dead_node);
-				// start all over
-				destroy = 1;
-				status = -EAGAIN;
-				goto leave;
+				/* fine.  don't need this node's info.
+				 * continue without it. */
+				break;
 			case DLM_RECO_NODE_DATA_REQUESTING:
 				ndata->state = DLM_RECO_NODE_DATA_REQUESTED;
 				mlog(0, "now receiving recovery data from "
@@ -520,35 +666,26 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
 					BUG();
 					break;
 				case DLM_RECO_NODE_DATA_DEAD:
-					mlog(ML_NOTICE, "node %u died after "
+					mlog(0, "node %u died after "
 					     "requesting recovery info for "
 					     "node %u\n", ndata->node_num,
 					     dead_node);
-					spin_unlock(&dlm_reco_state_lock);
-					// start all over
-					destroy = 1;
-					status = -EAGAIN;
-					/* instead of spinning like crazy here,
-					 * wait for the domain map to catch up
-					 * with the network state.  otherwise this
-					 * can be hit hundreds of times before
-					 * the node is really seen as dead. */
-					wait_event_timeout(dlm->dlm_reco_thread_wq,
-							   dlm_is_node_dead(dlm,
-								ndata->node_num),
-							   msecs_to_jiffies(1000));
-					mlog(0, "waited 1 sec for %u, "
-					     "dead? %s\n", ndata->node_num,
-					     dlm_is_node_dead(dlm, ndata->node_num) ?
-					     "yes" : "no");
-					goto leave;
+					break;
 				case DLM_RECO_NODE_DATA_RECEIVING:
 				case DLM_RECO_NODE_DATA_REQUESTED:
+					mlog(0, "%s: node %u still in state %s\n",
+					     dlm->name, ndata->node_num,
+					     ndata->state==DLM_RECO_NODE_DATA_RECEIVING ?
+					     "receiving" : "requested");
 					all_nodes_done = 0;
 					break;
 				case DLM_RECO_NODE_DATA_DONE:
+					mlog(0, "%s: node %u state is done\n",
+					     dlm->name, ndata->node_num);
 					break;
 				case DLM_RECO_NODE_DATA_FINALIZE_SENT:
+					mlog(0, "%s: node %u state is finalize\n",
+					     dlm->name, ndata->node_num);
 					break;
 			}
 		}
@@ -578,7 +715,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
 			     jiffies, dlm->reco.dead_node,
 			     dlm->node_num, dlm->reco.new_master);
 			destroy = 1;
-			status = ret;
+			status = 0;
 			/* rescan everything marked dirty along the way */
 			dlm_kick_thread(dlm, NULL);
 			break;
@@ -591,7 +728,6 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
 
 	}
 
-leave:
 	if (destroy)
 		dlm_destroy_recovery_area(dlm, dead_node);
 
@@ -617,7 +753,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
 		}
 		BUG_ON(num == dead_node);
 
-		ndata = kcalloc(1, sizeof(*ndata), GFP_KERNEL);
+		ndata = kcalloc(1, sizeof(*ndata), GFP_NOFS);
 		if (!ndata) {
 			dlm_destroy_recovery_area(dlm, dead_node);
 			return -ENOMEM;
@@ -691,16 +827,25 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data)
 	if (!dlm_grab(dlm))
 		return -EINVAL;
 
+	if (lr->dead_node != dlm->reco.dead_node) {
+		mlog(ML_ERROR, "%s: node %u sent dead_node=%u, but local "
+		     "dead_node is %u\n", dlm->name, lr->node_idx,
+		     lr->dead_node, dlm->reco.dead_node);
+		dlm_print_reco_node_status(dlm);
+		/* this is a hack */
+		dlm_put(dlm);
+		return -ENOMEM;
+	}
 	BUG_ON(lr->dead_node != dlm->reco.dead_node);
 
-	item = kcalloc(1, sizeof(*item), GFP_KERNEL);
+	item = kcalloc(1, sizeof(*item), GFP_NOFS);
 	if (!item) {
 		dlm_put(dlm);
 		return -ENOMEM;
 	}
 
 	/* this will get freed by dlm_request_all_locks_worker */
-	buf = (char *) __get_free_page(GFP_KERNEL);
+	buf = (char *) __get_free_page(GFP_NOFS);
 	if (!buf) {
 		kfree(item);
 		dlm_put(dlm);
@@ -715,7 +860,7 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data)
 	spin_lock(&dlm->work_lock);
 	list_add_tail(&item->list, &dlm->work_list);
 	spin_unlock(&dlm->work_lock);
-	schedule_work(&dlm->dispatched_work);
+	queue_work(dlm->dlm_worker, &dlm->dispatched_work);
 
 	dlm_put(dlm);
 	return 0;
@@ -730,32 +875,34 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
 	struct list_head *iter;
 	int ret;
 	u8 dead_node, reco_master;
+	int skip_all_done = 0;
 
 	dlm = item->dlm;
 	dead_node = item->u.ral.dead_node;
 	reco_master = item->u.ral.reco_master;
 	mres = (struct dlm_migratable_lockres *)data;
 
+	mlog(0, "%s: recovery worker started, dead=%u, master=%u\n",
+	     dlm->name, dead_node, reco_master);
+
 	if (dead_node != dlm->reco.dead_node ||
 	    reco_master != dlm->reco.new_master) {
-		/* show extra debug info if the recovery state is messed */
-		mlog(ML_ERROR, "%s: bad reco state: reco(dead=%u, master=%u), "
-		     "request(dead=%u, master=%u)\n",
-		     dlm->name, dlm->reco.dead_node, dlm->reco.new_master,
-		     dead_node, reco_master);
-		mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u "
-		     "entry[0]={c=%u:%llu,l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n",
-		     dlm->name, mres->lockname_len, mres->lockname, mres->master,
-		     mres->num_locks, mres->total_locks, mres->flags,
-		     dlm_get_lock_cookie_node(mres->ml[0].cookie),
-		     dlm_get_lock_cookie_seq(mres->ml[0].cookie),
-		     mres->ml[0].list, mres->ml[0].flags,
-		     mres->ml[0].type, mres->ml[0].convert_type,
-		     mres->ml[0].highest_blocked, mres->ml[0].node);
-		BUG();
+		/* worker could have been created before the recovery master
+		 * died.  if so, do not continue, but do not error. */
+		if (dlm->reco.new_master == O2NM_INVALID_NODE_NUM) {
+			mlog(ML_NOTICE, "%s: will not send recovery state, "
+			     "recovery master %u died, thread=(dead=%u,mas=%u)"
+			     " current=(dead=%u,mas=%u)\n", dlm->name,
+			     reco_master, dead_node, reco_master,
+			     dlm->reco.dead_node, dlm->reco.new_master);
+		} else {
+			mlog(ML_NOTICE, "%s: reco state invalid: reco(dead=%u, "
+			     "master=%u), request(dead=%u, master=%u)\n",
+			     dlm->name, dlm->reco.dead_node,
+			     dlm->reco.new_master, dead_node, reco_master);
+		}
+		goto leave;
 	}
-	BUG_ON(dead_node != dlm->reco.dead_node);
-	BUG_ON(reco_master != dlm->reco.new_master);
 
 	/* lock resources should have already been moved to the
  	 * dlm->reco.resources list.  now move items from that list
@@ -766,12 +913,20 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
 	dlm_move_reco_locks_to_list(dlm, &resources, dead_node);
 
 	/* now we can begin blasting lockreses without the dlm lock */
+
+	/* any errors returned will be due to the new_master dying,
+	 * the dlm_reco_thread should detect this */
 	list_for_each(iter, &resources) {
 		res = list_entry (iter, struct dlm_lock_resource, recovering);
 		ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
 				   	DLM_MRES_RECOVERY);
-		if (ret < 0)
-			mlog_errno(ret);
+		if (ret < 0) {
+			mlog(ML_ERROR, "%s: node %u went down while sending "
+			     "recovery state for dead node %u, ret=%d\n", dlm->name,
+			     reco_master, dead_node, ret);
+			skip_all_done = 1;
+			break;
+		}
 	}
 
 	/* move the resources back to the list */
@@ -779,10 +934,15 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
 	list_splice_init(&resources, &dlm->reco.resources);
 	spin_unlock(&dlm->spinlock);
 
-	ret = dlm_send_all_done_msg(dlm, dead_node, reco_master);
-	if (ret < 0)
-		mlog_errno(ret);
-
+	if (!skip_all_done) {
+		ret = dlm_send_all_done_msg(dlm, dead_node, reco_master);
+		if (ret < 0) {
+			mlog(ML_ERROR, "%s: node %u went down while sending "
+			     "recovery all-done for dead node %u, ret=%d\n",
+			     dlm->name, reco_master, dead_node, ret);
+		}
+	}
+leave:
 	free_page((unsigned long)data);
 }
 
@@ -801,8 +961,14 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
 
 	ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
 				 sizeof(done_msg), send_to, &tmpret);
-	/* negative status is ignored by the caller */
-	if (ret >= 0)
+	if (ret < 0) {
+		if (!dlm_is_host_down(ret)) {
+			mlog_errno(ret);
+			mlog(ML_ERROR, "%s: unknown error sending data-done "
+			     "to %u\n", dlm->name, send_to);
+			BUG();
+		}
+	} else
 		ret = tmpret;
 	return ret;
 }
@@ -822,7 +988,11 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data)
 	mlog(0, "got DATA DONE: dead_node=%u, reco.dead_node=%u, "
 	     "node_idx=%u, this node=%u\n", done->dead_node,
 	     dlm->reco.dead_node, done->node_idx, dlm->node_num);
-	BUG_ON(done->dead_node != dlm->reco.dead_node);
+
+	mlog_bug_on_msg((done->dead_node != dlm->reco.dead_node),
+			"Got DATA DONE: dead_node=%u, reco.dead_node=%u, "
+			"node_idx=%u, this node=%u\n", done->dead_node,
+			dlm->reco.dead_node, done->node_idx, dlm->node_num);
 
 	spin_lock(&dlm_reco_state_lock);
 	list_for_each(iter, &dlm->reco.node_data) {
@@ -1021,8 +1191,9 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
 		    ml->type == LKM_PRMODE) {
 			/* if it is already set, this had better be a PR
 			 * and it has to match */
-			if (mres->lvb[0] && (ml->type == LKM_EXMODE ||
-			    memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) {
+			if (!dlm_lvb_is_empty(mres->lvb) &&
+			    (ml->type == LKM_EXMODE ||
+			     memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) {
 				mlog(ML_ERROR, "mismatched lvbs!\n");
 				__dlm_print_one_lock_resource(lock->lockres);
 				BUG();
@@ -1081,22 +1252,25 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 			 * we must send it immediately. */
 			ret = dlm_send_mig_lockres_msg(dlm, mres, send_to,
 						       res, total_locks);
-			if (ret < 0) {
-				// TODO
-				mlog(ML_ERROR, "dlm_send_mig_lockres_msg "
-				     "returned %d, TODO\n", ret);
-				BUG();
-			}
+			if (ret < 0)
+				goto error;
 		}
 	}
 	/* flush any remaining locks */
 	ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks);
-	if (ret < 0) {
-		// TODO
-		mlog(ML_ERROR, "dlm_send_mig_lockres_msg returned %d, "
-		     "TODO\n", ret);
+	if (ret < 0)
+		goto error;
+	return ret;
+
+error:
+	mlog(ML_ERROR, "%s: dlm_send_mig_lockres_msg returned %d\n",
+	     dlm->name, ret);
+	if (!dlm_is_host_down(ret))
 		BUG();
-	}
+	mlog(0, "%s: node %u went down while sending %s "
+	     "lockres %.*s\n", dlm->name, send_to,
+	     flags & DLM_MRES_RECOVERY ?  "recovery" : "migration",
+	     res->lockname.len, res->lockname.name);
 	return ret;
 }
 
@@ -1144,8 +1318,8 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
 		mlog(0, "all done flag.  all lockres data received!\n");
 
 	ret = -ENOMEM;
-	buf = kmalloc(be16_to_cpu(msg->data_len), GFP_KERNEL);
-	item = kcalloc(1, sizeof(*item), GFP_KERNEL);
+	buf = kmalloc(be16_to_cpu(msg->data_len), GFP_NOFS);
+	item = kcalloc(1, sizeof(*item), GFP_NOFS);
 	if (!buf || !item)
 		goto leave;
 
@@ -1236,7 +1410,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
 	spin_lock(&dlm->work_lock);
 	list_add_tail(&item->list, &dlm->work_list);
 	spin_unlock(&dlm->work_lock);
-	schedule_work(&dlm->dispatched_work);
+	queue_work(dlm->dlm_worker, &dlm->dispatched_work);
 
 leave:
 	dlm_put(dlm);
@@ -1404,6 +1578,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data)
 	struct dlm_ctxt *dlm = data;
 	struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf;
 	struct dlm_lock_resource *res = NULL;
+	unsigned int hash;
 	int master = DLM_LOCK_RES_OWNER_UNKNOWN;
 	u32 flags = DLM_ASSERT_MASTER_REQUERY;
 
@@ -1413,8 +1588,10 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data)
 		return master;
 	}
 
+	hash = dlm_lockid_hash(req->name, req->namelen);
+
 	spin_lock(&dlm->spinlock);
-	res = __dlm_lookup_lockres(dlm, req->name, req->namelen);
+	res = __dlm_lookup_lockres(dlm, req->name, req->namelen, hash);
 	if (res) {
 		spin_lock(&res->spinlock);
 		master = res->owner;
@@ -1481,7 +1658,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 	struct dlm_lock *newlock = NULL;
 	struct dlm_lockstatus *lksb = NULL;
 	int ret = 0;
-	int i;
+	int i, bad;
 	struct list_head *iter;
 	struct dlm_lock *lock = NULL;
 
@@ -1550,28 +1727,48 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 		}
 		lksb->flags |= (ml->flags &
 				(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB));
-			
-		if (mres->lvb[0]) {
+
+		if (ml->type == LKM_NLMODE)
+			goto skip_lvb;
+
+		if (!dlm_lvb_is_empty(mres->lvb)) {
 			if (lksb->flags & DLM_LKSB_PUT_LVB) {
 				/* other node was trying to update
 				 * lvb when node died.  recreate the
 				 * lksb with the updated lvb. */
 				memcpy(lksb->lvb, mres->lvb, DLM_LVB_LEN);
+				/* the lock resource lvb update must happen
+				 * NOW, before the spinlock is dropped.
+				 * we no longer wait for the AST to update
+				 * the lvb. */
+				memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
 			} else {
 				/* otherwise, the node is sending its 
 				 * most recent valid lvb info */
 				BUG_ON(ml->type != LKM_EXMODE &&
 				       ml->type != LKM_PRMODE);
-				if (res->lvb[0] && (ml->type == LKM_EXMODE ||
-				    memcmp(res->lvb, mres->lvb, DLM_LVB_LEN))) {
-					mlog(ML_ERROR, "received bad lvb!\n");
-					__dlm_print_one_lock_resource(res);
-					BUG();
+				if (!dlm_lvb_is_empty(res->lvb) &&
+ 				    (ml->type == LKM_EXMODE ||
+ 				     memcmp(res->lvb, mres->lvb, DLM_LVB_LEN))) {
+ 					int i;
+ 					mlog(ML_ERROR, "%s:%.*s: received bad "
+ 					     "lvb! type=%d\n", dlm->name,
+ 					     res->lockname.len,
+ 					     res->lockname.name, ml->type);
+ 					printk("lockres lvb=[");
+ 					for (i=0; i<DLM_LVB_LEN; i++)
+ 						printk("%02x", res->lvb[i]);
+ 					printk("]\nmigrated lvb=[");
+ 					for (i=0; i<DLM_LVB_LEN; i++)
+ 						printk("%02x", mres->lvb[i]);
+ 					printk("]\n");
+ 					dlm_print_one_lock_resource(res);
+ 					BUG();
 				}
 				memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
 			}
 		}
-
+skip_lvb:
 
 		/* NOTE:
 		 * wrt lock queue ordering and recovery:
@@ -1589,9 +1786,33 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 		 * relative to each other, but clearly *not*
 		 * preserved relative to locks from other nodes.
 		 */
+		bad = 0;
 		spin_lock(&res->spinlock);
-		dlm_lock_get(newlock);
-		list_add_tail(&newlock->list, queue);
+		list_for_each_entry(lock, queue, list) {
+			if (lock->ml.cookie == ml->cookie) {
+				u64 c = lock->ml.cookie;
+				mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
+				     "exists on this lockres!\n", dlm->name,
+				     res->lockname.len, res->lockname.name,
+				     dlm_get_lock_cookie_node(c),
+				     dlm_get_lock_cookie_seq(c));
+
+				mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, "
+				     "node=%u, cookie=%u:%llu, queue=%d\n",
+	      			     ml->type, ml->convert_type, ml->node,
+				     dlm_get_lock_cookie_node(ml->cookie),
+				     dlm_get_lock_cookie_seq(ml->cookie),
+				     ml->list);
+
+				__dlm_print_one_lock_resource(res);
+				bad = 1;
+				break;
+			}
+		}
+		if (!bad) {
+			dlm_lock_get(newlock);
+			list_add_tail(&newlock->list, queue);
+		}
 		spin_unlock(&res->spinlock);
 	}
 	mlog(0, "done running all the locks\n");
@@ -1615,8 +1836,14 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
 	struct dlm_lock *lock;
 
 	res->state |= DLM_LOCK_RES_RECOVERING;
-	if (!list_empty(&res->recovering))
+	if (!list_empty(&res->recovering)) {
+		mlog(0,
+		     "Recovering res %s:%.*s, is already on recovery list!\n",
+		     dlm->name, res->lockname.len, res->lockname.name);
 		list_del_init(&res->recovering);
+	}
+	/* We need to hold a reference while on the recovery list */
+	dlm_lockres_get(res);
 	list_add_tail(&res->recovering, &dlm->reco.resources);
 
 	/* find any pending locks and put them back on proper list */
@@ -1705,9 +1932,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
 			spin_lock(&res->spinlock);
 			dlm_change_lockres_owner(dlm, res, new_master);
 			res->state &= ~DLM_LOCK_RES_RECOVERING;
-			__dlm_dirty_lockres(dlm, res);
+			if (!__dlm_lockres_unused(res))
+				__dlm_dirty_lockres(dlm, res);
 			spin_unlock(&res->spinlock);
 			wake_up(&res->wq);
+			dlm_lockres_put(res);
 		}
 	}
 
@@ -1716,7 +1945,7 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
 	 * the RECOVERING state and set the owner
 	 * if necessary */
 	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
-		bucket = &(dlm->lockres_hash[i]);
+		bucket = dlm_lockres_hash(dlm, i);
 		hlist_for_each_entry(res, hash_iter, bucket, hash_node) {
 			if (res->state & DLM_LOCK_RES_RECOVERING) {
 				if (res->owner == dead_node) {
@@ -1740,11 +1969,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
 					     dlm->name, res->lockname.len,
 					     res->lockname.name, res->owner);
 					list_del_init(&res->recovering);
+					dlm_lockres_put(res);
 				}
 				spin_lock(&res->spinlock);
 				dlm_change_lockres_owner(dlm, res, new_master);
 				res->state &= ~DLM_LOCK_RES_RECOVERING;
-				__dlm_dirty_lockres(dlm, res);
+				if (!__dlm_lockres_unused(res))
+					__dlm_dirty_lockres(dlm, res);
 				spin_unlock(&res->spinlock);
 				wake_up(&res->wq);
 			}
@@ -1881,7 +2112,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
 	 *    need to be fired as a result.
 	 */
 	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
-		bucket = &(dlm->lockres_hash[i]);
+		bucket = dlm_lockres_hash(dlm, i);
 		hlist_for_each_entry(res, iter, bucket, hash_node) {
  			/* always prune any $RECOVERY entries for dead nodes,
  			 * otherwise hangs can occur during later recovery */
@@ -1921,6 +2152,20 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
 {
 	assert_spin_locked(&dlm->spinlock);
 
+	if (dlm->reco.new_master == idx) {
+		mlog(0, "%s: recovery master %d just died\n",
+		     dlm->name, idx);
+		if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
+			/* finalize1 was reached, so it is safe to clear
+			 * the new_master and dead_node.  that recovery
+			 * is complete. */
+			mlog(0, "%s: dead master %d had reached "
+			     "finalize1 state, clearing\n", dlm->name, idx);
+			dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
+			__dlm_reset_recovery(dlm);
+		}
+	}
+
 	/* check to see if the node is already considered dead */
 	if (!test_bit(idx, dlm->live_nodes_map)) {
 		mlog(0, "for domain %s, node %d is already dead. "
@@ -2084,7 +2329,7 @@ again:
 
 			/* set the new_master to this node */
 			spin_lock(&dlm->spinlock);
-			dlm->reco.new_master = dlm->node_num;
+			dlm_set_reco_master(dlm, dlm->node_num);
 			spin_unlock(&dlm->spinlock);
 		}
 
@@ -2122,6 +2367,10 @@ again:
 		mlog(0, "%s: reco master %u is ready to recover %u\n",
 		     dlm->name, dlm->reco.new_master, dlm->reco.dead_node);
 		status = -EEXIST;
+	} else if (ret == DLM_RECOVERING) {
+		mlog(0, "dlm=%s dlmlock says master node died (this=%u)\n",
+		     dlm->name, dlm->node_num);
+		goto again;
 	} else {
 		struct dlm_lock_resource *res;
 
@@ -2153,7 +2402,7 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
 
 	mlog_entry("%u\n", dead_node);
 
-	mlog(0, "dead node is %u\n", dead_node);
+	mlog(0, "%s: dead node is %u\n", dlm->name, dead_node);
 
 	spin_lock(&dlm->spinlock);
 	dlm_node_iter_init(dlm->domain_map, &iter);
@@ -2211,6 +2460,14 @@ retry:
 			 * another ENOMEM */
 			msleep(100);
 			goto retry;
+		} else if (ret == EAGAIN) {
+			mlog(0, "%s: trying to start recovery of node "
+			     "%u, but node %u is waiting for last recovery "
+			     "to complete, backoff for a bit\n", dlm->name,
+			     dead_node, nodenum);
+			/* TODO Look into replacing msleep with cond_resched() */
+			msleep(100);
+			goto retry;
 		}
 	}
 
@@ -2226,8 +2483,20 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
 	if (!dlm_grab(dlm))
 		return 0;
 
-	mlog(0, "node %u wants to recover node %u\n",
-		  br->node_idx, br->dead_node);
+	spin_lock(&dlm->spinlock);
+	if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
+		mlog(0, "%s: node %u wants to recover node %u (%u:%u) "
+		     "but this node is in finalize state, waiting on finalize2\n",
+		     dlm->name, br->node_idx, br->dead_node,
+		     dlm->reco.dead_node, dlm->reco.new_master);
+		spin_unlock(&dlm->spinlock);
+		return EAGAIN;
+	}
+	spin_unlock(&dlm->spinlock);
+
+	mlog(0, "%s: node %u wants to recover node %u (%u:%u)\n",
+	     dlm->name, br->node_idx, br->dead_node,
+	     dlm->reco.dead_node, dlm->reco.new_master);
 
 	dlm_fire_domain_eviction_callbacks(dlm, br->dead_node);
 
@@ -2249,8 +2518,8 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
 		     "node %u changing it to %u\n", dlm->name, 
 		     dlm->reco.dead_node, br->node_idx, br->dead_node);
 	}
-	dlm->reco.new_master = br->node_idx;
-	dlm->reco.dead_node = br->dead_node;
+	dlm_set_reco_master(dlm, br->node_idx);
+	dlm_set_reco_dead_node(dlm, br->dead_node);
 	if (!test_bit(br->dead_node, dlm->recovery_map)) {
 		mlog(0, "recovery master %u sees %u as dead, but this "
 		     "node has not yet.  marking %u as dead\n",
@@ -2269,10 +2538,16 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
 	spin_unlock(&dlm->spinlock);
 
 	dlm_kick_recovery_thread(dlm);
+
+	mlog(0, "%s: recovery started by node %u, for %u (%u:%u)\n",
+	     dlm->name, br->node_idx, br->dead_node,
+	     dlm->reco.dead_node, dlm->reco.new_master);
+
 	dlm_put(dlm);
 	return 0;
 }
 
+#define DLM_FINALIZE_STAGE2  0x01
 static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
 {
 	int ret = 0;
@@ -2280,25 +2555,31 @@ static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
 	struct dlm_node_iter iter;
 	int nodenum;
 	int status;
+	int stage = 1;
 
-	mlog(0, "finishing recovery for node %s:%u\n",
-	     dlm->name, dlm->reco.dead_node);
+	mlog(0, "finishing recovery for node %s:%u, "
+	     "stage %d\n", dlm->name, dlm->reco.dead_node, stage);
 
 	spin_lock(&dlm->spinlock);
 	dlm_node_iter_init(dlm->domain_map, &iter);
 	spin_unlock(&dlm->spinlock);
 
+stage2:
 	memset(&fr, 0, sizeof(fr));
 	fr.node_idx = dlm->node_num;
 	fr.dead_node = dlm->reco.dead_node;
+	if (stage == 2)
+		fr.flags |= DLM_FINALIZE_STAGE2;
 
 	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
 		if (nodenum == dlm->node_num)
 			continue;
 		ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key,
 					 &fr, sizeof(fr), nodenum, &status);
-		if (ret >= 0) {
+		if (ret >= 0)
 			ret = status;
+		if (ret < 0) {
+			mlog_errno(ret);
 			if (dlm_is_host_down(ret)) {
 				/* this has no effect on this recovery 
 				 * session, so set the status to zero to 
@@ -2306,13 +2587,17 @@ static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
 				mlog(ML_ERROR, "node %u went down after this "
 				     "node finished recovery.\n", nodenum);
 				ret = 0;
+				continue;
 			}
-		}
-		if (ret < 0) {
-			mlog_errno(ret);
 			break;
 		}
 	}
+	if (stage == 1) {
+		/* reset the node_iter back to the top and send finalize2 */
+		iter.curnode = -1;
+		stage = 2;
+		goto stage2;
+	}
 
 	return ret;
 }
@@ -2321,14 +2606,19 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data)
 {
 	struct dlm_ctxt *dlm = data;
 	struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf;
+	int stage = 1;
 
 	/* ok to return 0, domain has gone away */
 	if (!dlm_grab(dlm))
 		return 0;
 
-	mlog(0, "node %u finalizing recovery of node %u\n",
-	     fr->node_idx, fr->dead_node);
+	if (fr->flags & DLM_FINALIZE_STAGE2)
+		stage = 2;
 
+	mlog(0, "%s: node %u finalizing recovery stage%d of "
+	     "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage,
+	     fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master);
+ 
 	spin_lock(&dlm->spinlock);
 
 	if (dlm->reco.new_master != fr->node_idx) {
@@ -2344,13 +2634,41 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data)
 		BUG();
 	}
 
-	dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx);
-
-	spin_unlock(&dlm->spinlock);
+	switch (stage) {
+		case 1:
+			dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx);
+			if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
+				mlog(ML_ERROR, "%s: received finalize1 from "
+				     "new master %u for dead node %u, but "
+				     "this node has already received it!\n",
+				     dlm->name, fr->node_idx, fr->dead_node);
+				dlm_print_reco_node_status(dlm);
+				BUG();
+			}
+			dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
+			spin_unlock(&dlm->spinlock);
+			break;
+		case 2:
+			if (!(dlm->reco.state & DLM_RECO_STATE_FINALIZE)) {
+				mlog(ML_ERROR, "%s: received finalize2 from "
+				     "new master %u for dead node %u, but "
+				     "this node did not have finalize1!\n",
+				     dlm->name, fr->node_idx, fr->dead_node);
+				dlm_print_reco_node_status(dlm);
+				BUG();
+			}
+			dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
+			spin_unlock(&dlm->spinlock);
+			dlm_reset_recovery(dlm);
+			dlm_kick_recovery_thread(dlm);
+			break;
+		default:
+			BUG();
+	}
 
-	dlm_reset_recovery(dlm);
+	mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n",
+	     dlm->name, fr->node_idx, dlm->reco.dead_node, dlm->reco.new_master);
 
-	dlm_kick_recovery_thread(dlm);
 	dlm_put(dlm);
 	return 0;
 }
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 44d3b57ae8a8..0c822f3ffb05 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -39,6 +39,7 @@
 #include <linux/inet.h>
 #include <linux/timer.h>
 #include <linux/kthread.h>
+#include <linux/delay.h>
 
 
 #include "cluster/heartbeat.h"
@@ -53,6 +54,8 @@
 #include "cluster/masklog.h"
 
 static int dlm_thread(void *data);
+static void dlm_purge_lockres_now(struct dlm_ctxt *dlm,
+				  struct dlm_lock_resource *lockres);
 
 static void dlm_flush_asts(struct dlm_ctxt *dlm);
 
@@ -80,7 +83,7 @@ repeat:
 }
 
 
-static int __dlm_lockres_unused(struct dlm_lock_resource *res)
+int __dlm_lockres_unused(struct dlm_lock_resource *res)
 {
 	if (list_empty(&res->granted) &&
 	    list_empty(&res->converting) &&
@@ -103,6 +106,20 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
 	assert_spin_locked(&res->spinlock);
 
 	if (__dlm_lockres_unused(res)){
+		/* For now, just keep any resource we master */
+		if (res->owner == dlm->node_num)
+		{
+			if (!list_empty(&res->purge)) {
+				mlog(0, "we master %s:%.*s, but it is on "
+				     "the purge list.  Removing\n",
+				     dlm->name, res->lockname.len,
+				     res->lockname.name);
+				list_del_init(&res->purge);
+				dlm->purge_count--;
+			}
+			return;
+		}
+
 		if (list_empty(&res->purge)) {
 			mlog(0, "putting lockres %.*s from purge list\n",
 			     res->lockname.len, res->lockname.name);
@@ -110,10 +127,23 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
 			res->last_used = jiffies;
 			list_add_tail(&res->purge, &dlm->purge_list);
 			dlm->purge_count++;
+
+			/* if this node is not the owner, there is
+			 * no way to keep track of who the owner could be.
+			 * unhash it to avoid serious problems. */
+			if (res->owner != dlm->node_num) {
+				mlog(0, "%s:%.*s: doing immediate "
+				     "purge of lockres owned by %u\n",
+				     dlm->name, res->lockname.len,
+				     res->lockname.name, res->owner);
+
+				dlm_purge_lockres_now(dlm, res);
+			}
 		}
 	} else if (!list_empty(&res->purge)) {
-		mlog(0, "removing lockres %.*s from purge list\n",
-		     res->lockname.len, res->lockname.name);
+		mlog(0, "removing lockres %.*s from purge list, "
+		     "owner=%u\n", res->lockname.len, res->lockname.name,
+		     res->owner);
 
 		list_del_init(&res->purge);
 		dlm->purge_count--;
@@ -165,6 +195,7 @@ again:
 	} else if (ret < 0) {
 		mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n",
 		     lockres->lockname.len, lockres->lockname.name);
+		msleep(100);
 		goto again;
 	}
 
@@ -178,6 +209,24 @@ finish:
 	__dlm_unhash_lockres(lockres);
 }
 
+/* make an unused lockres go away immediately.
+ * as soon as the dlm spinlock is dropped, this lockres
+ * will not be found. kfree still happens on last put. */
+static void dlm_purge_lockres_now(struct dlm_ctxt *dlm,
+				  struct dlm_lock_resource *lockres)
+{
+	assert_spin_locked(&dlm->spinlock);
+	assert_spin_locked(&lockres->spinlock);
+
+	BUG_ON(!__dlm_lockres_unused(lockres));
+
+	if (!list_empty(&lockres->purge)) {
+		list_del_init(&lockres->purge);
+		dlm->purge_count--;
+	}
+	__dlm_unhash_lockres(lockres);
+}
+
 static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 			       int purge_now)
 {
@@ -420,6 +469,8 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 	/* don't shuffle secondary queues */
 	if ((res->owner == dlm->node_num) &&
 	    !(res->state & DLM_LOCK_RES_DIRTY)) {
+		/* ref for dirty_list */
+		dlm_lockres_get(res);
 		list_add_tail(&res->dirty, &dlm->dirty_list);
 		res->state |= DLM_LOCK_RES_DIRTY;
 	}
@@ -604,6 +655,8 @@ static int dlm_thread(void *data)
 			list_del_init(&res->dirty);
 			spin_unlock(&res->spinlock);
 			spin_unlock(&dlm->spinlock);
+			/* Drop dirty_list ref */
+			dlm_lockres_put(res);
 
 		 	/* lockres can be re-dirtied/re-added to the
 			 * dirty_list in this gap, but that is ok */
@@ -640,8 +693,9 @@ static int dlm_thread(void *data)
 			 * spinlock and do NOT have the dlm lock.
 			 * safe to reserve/queue asts and run the lists. */
 
-			mlog(0, "calling dlm_shuffle_lists with dlm=%p, "
-			     "res=%p\n", dlm, res);
+			mlog(0, "calling dlm_shuffle_lists with dlm=%s, "
+			     "res=%.*s\n", dlm->name,
+			     res->lockname.len, res->lockname.name);
 
 			/* called while holding lockres lock */
 			dlm_shuffle_lists(dlm, res);
@@ -655,6 +709,8 @@ in_progress:
 			/* if the lock was in-progress, stick
 			 * it on the back of the list */
 			if (delay) {
+				/* ref for dirty_list */
+				dlm_lockres_get(res);
 				spin_lock(&res->spinlock);
 				list_add_tail(&res->dirty, &dlm->dirty_list);
 				res->state |= DLM_LOCK_RES_DIRTY;
@@ -675,7 +731,7 @@ in_progress:
 
 		/* yield and continue right away if there is more work to do */
 		if (!n) {
-			yield();
+			cond_resched();
 			continue;
 		}
 
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index ac89c509daf9..b0c3134f4f70 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -318,6 +318,16 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
 
 	mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
 
+	if (owner == dlm->node_num) {
+		/* ended up trying to contact ourself.  this means
+		 * that the lockres had been remote but became local
+		 * via a migration.  just retry it, now as local */
+		mlog(0, "%s:%.*s: this node became the master due to a "
+		     "migration, re-evaluate now\n", dlm->name,
+		     res->lockname.len, res->lockname.name);
+		return DLM_FORWARD;
+	}
+
 	memset(&unlock, 0, sizeof(unlock));
 	unlock.node_idx = dlm->node_num;
 	unlock.flags = cpu_to_be32(flags);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index 74ca4e5f9765..e641b084b343 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -672,7 +672,7 @@ struct dlm_ctxt *user_dlm_register_context(struct qstr *name)
 	u32 dlm_key;
 	char *domain;
 
-	domain = kmalloc(name->len + 1, GFP_KERNEL);
+	domain = kmalloc(name->len + 1, GFP_NOFS);
 	if (!domain) {
 		mlog_errno(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 64cd52860c87..4acd37286bdd 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -242,7 +242,7 @@ static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
 	mlog_exit_void();
 }
 
-static spinlock_t ocfs2_dlm_tracking_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
 
 static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
 				       struct ocfs2_dlm_debug *dlm_debug)
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 84c507961287..35140f6cf840 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -114,7 +114,7 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
 
 extern kmem_cache_t *ocfs2_inode_cache;
 
-extern struct address_space_operations ocfs2_aops;
+extern const struct address_space_operations ocfs2_aops;
 
 struct buffer_head *ocfs2_bread(struct inode *inode, int block,
 				int *err, int reada);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 3fe8781c22cb..910a601b2e98 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -49,7 +49,7 @@
 
 #include "buffer_head_io.h"
 
-spinlock_t trans_inc_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(trans_inc_lock);
 
 static int ocfs2_force_read_journal(struct inode *inode);
 static int ocfs2_recover_node(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index ee42765a8553..cf70fe2075b8 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -988,9 +988,7 @@ int ocfs2_request_mount_vote(struct ocfs2_super *osb)
 	}
 
 bail:
-	if (request)
-		kfree(request);
-
+	kfree(request);
 	return status;
 }
 
@@ -1021,9 +1019,7 @@ int ocfs2_request_umount_vote(struct ocfs2_super *osb)
 	}
 
 bail:
-	if (request)
-		kfree(request);
-
+	kfree(request);
 	return status;
 }
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 0137ec4c1368..0a163a4f7764 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -122,6 +122,11 @@ struct mem_size_stats
 	unsigned long private_dirty;
 };
 
+__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+
 static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
 {
 	struct proc_maps_private *priv = m->private;
@@ -158,22 +163,23 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
 		pad_len_spaces(m, len);
 		seq_path(m, file->f_vfsmnt, file->f_dentry, "\n");
 	} else {
-		if (mm) {
-			if (vma->vm_start <= mm->start_brk &&
+		const char *name = arch_vma_name(vma);
+		if (!name) {
+			if (mm) {
+				if (vma->vm_start <= mm->start_brk &&
 						vma->vm_end >= mm->brk) {
-				pad_len_spaces(m, len);
-				seq_puts(m, "[heap]");
-			} else {
-				if (vma->vm_start <= mm->start_stack &&
-					vma->vm_end >= mm->start_stack) {
-
-					pad_len_spaces(m, len);
-					seq_puts(m, "[stack]");
+					name = "[heap]";
+				} else if (vma->vm_start <= mm->start_stack &&
+					   vma->vm_end >= mm->start_stack) {
+					name = "[stack]";
 				}
+			} else {
+				name = "[vdso]";
 			}
-		} else {
+		}
+		if (name) {
 			pad_len_spaces(m, len);
-			seq_puts(m, "[vdso]");
+			seq_puts(m, name);
 		}
 	}
 	seq_putc(m, '\n');
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 2f24c46f72a1..8bc182a88748 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -450,7 +450,7 @@ static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,qnx4_get_block);
 }
-static struct address_space_operations qnx4_aops = {
+static const struct address_space_operations qnx4_aops = {
 	.readpage	= qnx4_readpage,
 	.writepage	= qnx4_writepage,
 	.sync_page	= block_sync_page,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 00a933eb820c..86f14cacf641 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -26,7 +26,7 @@
 
 #include <linux/fs.h>
 
-struct address_space_operations ramfs_aops = {
+const struct address_space_operations ramfs_aops = {
 	.readpage	= simple_readpage,
 	.prepare_write	= simple_prepare_write,
 	.commit_write	= simple_commit_write
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index f443a84b98a5..99fffc9e1bfd 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -27,7 +27,7 @@
 
 static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
 
-struct address_space_operations ramfs_aops = {
+const struct address_space_operations ramfs_aops = {
 	.readpage		= simple_readpage,
 	.prepare_write		= simple_prepare_write,
 	.commit_write		= simple_commit_write
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index 313237631b49..c2bb58e74653 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -10,6 +10,6 @@
  */
 
 
-extern struct address_space_operations ramfs_aops;
+extern const struct address_space_operations ramfs_aops;
 extern const struct file_operations ramfs_file_operations;
 extern struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9857e50f85e7..a24858a632fa 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2996,7 +2996,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 	return error;
 }
 
-struct address_space_operations reiserfs_address_space_operations = {
+const struct address_space_operations reiserfs_address_space_operations = {
 	.writepage = reiserfs_writepage,
 	.readpage = reiserfs_readpage,
 	.readpages = reiserfs_readpages,
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 283fbc6b8eea..22eed61ebf69 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -459,7 +459,7 @@ err_out:
 
 /* Mapping from our types to the kernel */
 
-static struct address_space_operations romfs_aops = {
+static const struct address_space_operations romfs_aops = {
 	.readpage = romfs_readpage
 };
 
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index ed9a24d19d7d..dae67048baba 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -306,7 +306,7 @@ static int smb_commit_write(struct file *file, struct page *page,
 	return status;
 }
 
-struct address_space_operations smb_file_aops = {
+const struct address_space_operations smb_file_aops = {
 	.readpage = smb_readpage,
 	.writepage = smb_writepage,
 	.prepare_write = smb_prepare_write,
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
index 972ed7dad388..34fb462b2379 100644
--- a/fs/smbfs/proto.h
+++ b/fs/smbfs/proto.h
@@ -63,7 +63,7 @@ extern int smb_revalidate_inode(struct dentry *dentry);
 extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
 extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
 /* file.c */
-extern struct address_space_operations smb_file_aops;
+extern const struct address_space_operations smb_file_aops;
 extern const struct file_operations smb_file_operations;
 extern struct inode_operations smb_file_inode_operations;
 /* ioctl.c */
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index f0b347bd12ca..5e0e31cc46f5 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -16,7 +16,7 @@
 
 extern struct super_block * sysfs_sb;
 
-static struct address_space_operations sysfs_aops = {
+static const struct address_space_operations sysfs_aops = {
 	.readpage	= simple_readpage,
 	.prepare_write	= simple_prepare_write,
 	.commit_write	= simple_commit_write
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 86f5f8d43d0f..f2bcccd1d6fc 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -465,7 +465,7 @@ static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,get_block);
 }
-struct address_space_operations sysv_aops = {
+const struct address_space_operations sysv_aops = {
 	.readpage = sysv_readpage,
 	.writepage = sysv_writepage,
 	.sync_page = block_sync_page,
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 393a480e4deb..9dcc82120935 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -161,7 +161,7 @@ extern struct inode_operations sysv_dir_inode_operations;
 extern struct inode_operations sysv_fast_symlink_inode_operations;
 extern const struct file_operations sysv_file_operations;
 extern const struct file_operations sysv_dir_operations;
-extern struct address_space_operations sysv_aops;
+extern const struct address_space_operations sysv_aops;
 extern struct super_operations sysv_sops;
 extern struct dentry_operations sysv_dentry_operations;
 
diff --git a/fs/udf/file.c b/fs/udf/file.c
index e34b00e303f1..a59e5f33daf6 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -95,7 +95,7 @@ static int udf_adinicb_commit_write(struct file *file, struct page *page, unsign
 	return 0;
 }
 
-struct address_space_operations udf_adinicb_aops = {
+const struct address_space_operations udf_adinicb_aops = {
 	.readpage		= udf_adinicb_readpage,
 	.writepage		= udf_adinicb_writepage,
 	.sync_page		= block_sync_page,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 2983afd5e7fd..605f5111b6d8 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -132,7 +132,7 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping,block,udf_get_block);
 }
 
-struct address_space_operations udf_aops = {
+const struct address_space_operations udf_aops = {
 	.readpage		= udf_readpage,
 	.writepage		= udf_writepage,
 	.sync_page		= block_sync_page,
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 674bb40edc83..ba068a786563 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -113,6 +113,6 @@ out:
 /*
  * symlinks can't do much...
  */
-struct address_space_operations udf_symlink_aops = {
+const struct address_space_operations udf_symlink_aops = {
 	.readpage		= udf_symlink_filler,
 };
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 023e19ba5a2e..2f992387cc9e 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -47,9 +47,9 @@ extern struct inode_operations udf_dir_inode_operations;
 extern const struct file_operations udf_dir_operations;
 extern struct inode_operations udf_file_inode_operations;
 extern const struct file_operations udf_file_operations;
-extern struct address_space_operations udf_aops;
-extern struct address_space_operations udf_adinicb_aops;
-extern struct address_space_operations udf_symlink_aops;
+extern const struct address_space_operations udf_aops;
+extern const struct address_space_operations udf_adinicb_aops;
+extern const struct address_space_operations udf_symlink_aops;
 
 struct udf_fileident_bh
 {
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index f2dbdf5a8769..488b5ff48afb 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -98,7 +98,9 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag)
 	u64 temp = 0L;
 
 	UFSD(": frag = %llu  depth = %d\n", (unsigned long long)frag, depth);
-	UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",uspi->s_fpbshift,uspi->s_apbmask,mask);
+	UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",
+		uspi->s_fpbshift, uspi->s_apbmask,
+		(unsigned long long)mask);
 
 	if (depth == 0)
 		return 0;
@@ -429,7 +431,7 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
 	
 	if (!create) {
 		phys64 = ufs_frag_map(inode, fragment);
-		UFSD("phys64 = %llu \n",phys64);
+		UFSD("phys64 = %llu\n", (unsigned long long)phys64);
 		if (phys64)
 			map_bh(bh_result, sb, phys64);
 		return 0;
@@ -574,7 +576,7 @@ static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,ufs_getfrag_block);
 }
-struct address_space_operations ufs_aops = {
+const struct address_space_operations ufs_aops = {
 	.readpage = ufs_readpage,
 	.writepage = ufs_writepage,
 	.sync_page = block_sync_page,
@@ -605,39 +607,12 @@ static void ufs_set_inode_ops(struct inode *inode)
 				   ufs_get_inode_dev(inode->i_sb, UFS_I(inode)));
 }
 
-void ufs_read_inode (struct inode * inode)
+static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 {
 	struct ufs_inode_info *ufsi = UFS_I(inode);
-	struct super_block * sb;
-	struct ufs_sb_private_info * uspi;
-	struct ufs_inode * ufs_inode;	
-	struct ufs2_inode *ufs2_inode;
-	struct buffer_head * bh;
+	struct super_block *sb = inode->i_sb;
 	mode_t mode;
 	unsigned i;
-	unsigned flags;
-	
-	UFSD("ENTER, ino %lu\n", inode->i_ino);
-	
-	sb = inode->i_sb;
-	uspi = UFS_SB(sb)->s_uspi;
-	flags = UFS_SB(sb)->s_flags;
-
-	if (inode->i_ino < UFS_ROOTINO || 
-	    inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) {
-		ufs_warning (sb, "ufs_read_inode", "bad inode number (%lu)\n", inode->i_ino);
-		goto bad_inode;
-	}
-	
-	bh = sb_bread(sb, uspi->s_sbbase + ufs_inotofsba(inode->i_ino));
-	if (!bh) {
-		ufs_warning (sb, "ufs_read_inode", "unable to read inode %lu\n", inode->i_ino);
-		goto bad_inode;
-	}
-	if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
-		goto ufs2_inode;
-
-	ufs_inode = (struct ufs_inode *) (bh->b_data + sizeof(struct ufs_inode) * ufs_inotofsbo(inode->i_ino));
 
 	/*
 	 * Copy data to the in-core inode.
@@ -661,14 +636,11 @@ void ufs_read_inode (struct inode * inode)
 	inode->i_atime.tv_nsec = 0;
 	inode->i_ctime.tv_nsec = 0;
 	inode->i_blocks = fs32_to_cpu(sb, ufs_inode->ui_blocks);
-	inode->i_blksize = PAGE_SIZE;   /* This is the optimal IO size (for stat) */
-	inode->i_version++;
 	ufsi->i_flags = fs32_to_cpu(sb, ufs_inode->ui_flags);
 	ufsi->i_gen = fs32_to_cpu(sb, ufs_inode->ui_gen);
 	ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow);
 	ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag);
-	ufsi->i_lastfrag = (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
-	ufsi->i_dir_start_lookup = 0;
+
 	
 	if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) {
 		for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++)
@@ -677,24 +649,16 @@ void ufs_read_inode (struct inode * inode)
 		for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++)
 			ufsi->i_u1.i_symlink[i] = ufs_inode->ui_u2.ui_symlink[i];
 	}
-	ufsi->i_osync = 0;
-
-	ufs_set_inode_ops(inode);
-
-	brelse (bh);
-
-	UFSD("EXIT\n");
-	return;
+}
 
-bad_inode:
-	make_bad_inode(inode);
-	return;
+static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
+{
+	struct ufs_inode_info *ufsi = UFS_I(inode);
+	struct super_block *sb = inode->i_sb;
+	mode_t mode;
+	unsigned i;
 
-ufs2_inode :
 	UFSD("Reading ufs2 inode, ino %lu\n", inode->i_ino);
-
-	ufs2_inode = (struct ufs2_inode *)(bh->b_data + sizeof(struct ufs2_inode) * ufs_inotofsbo(inode->i_ino));
-
 	/*
 	 * Copy data to the in-core inode.
 	 */
@@ -717,26 +681,64 @@ ufs2_inode :
 	inode->i_atime.tv_nsec = 0;
 	inode->i_ctime.tv_nsec = 0;
 	inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks);
-	inode->i_blksize = PAGE_SIZE; /*This is the optimal IO size(for stat)*/
-
-	inode->i_version++;
 	ufsi->i_flags = fs32_to_cpu(sb, ufs2_inode->ui_flags);
 	ufsi->i_gen = fs32_to_cpu(sb, ufs2_inode->ui_gen);
 	/*
 	ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow);
 	ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag);
 	*/
-	ufsi->i_lastfrag= (inode->i_size + uspi->s_fsize- 1) >> uspi->s_fshift;
 
 	if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) {
 		for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++)
 			ufsi->i_u1.u2_i_data[i] =
 				ufs2_inode->ui_u2.ui_addr.ui_db[i];
-	}
-	else {
+	} else {
 		for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++)
 			ufsi->i_u1.i_symlink[i] = ufs2_inode->ui_u2.ui_symlink[i];
 	}
+}
+
+void ufs_read_inode(struct inode * inode)
+{
+	struct ufs_inode_info *ufsi = UFS_I(inode);
+	struct super_block * sb;
+	struct ufs_sb_private_info * uspi;
+	struct buffer_head * bh;
+
+	UFSD("ENTER, ino %lu\n", inode->i_ino);
+
+	sb = inode->i_sb;
+	uspi = UFS_SB(sb)->s_uspi;
+
+	if (inode->i_ino < UFS_ROOTINO ||
+	    inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) {
+		ufs_warning(sb, "ufs_read_inode", "bad inode number (%lu)\n",
+			    inode->i_ino);
+		goto bad_inode;
+	}
+
+	bh = sb_bread(sb, uspi->s_sbbase + ufs_inotofsba(inode->i_ino));
+	if (!bh) {
+		ufs_warning(sb, "ufs_read_inode", "unable to read inode %lu\n",
+			    inode->i_ino);
+		goto bad_inode;
+	}
+	if ((UFS_SB(sb)->s_flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
+		struct ufs2_inode *ufs2_inode = (struct ufs2_inode *)bh->b_data;
+
+		ufs2_read_inode(inode,
+				ufs2_inode + ufs_inotofsbo(inode->i_ino));
+	} else {
+		struct ufs_inode *ufs_inode = (struct ufs_inode *)bh->b_data;
+
+		ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino));
+	}
+
+	inode->i_blksize = PAGE_SIZE;/*This is the optimal IO size (for stat)*/
+	inode->i_version++;
+	ufsi->i_lastfrag =
+		(inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
+	ufsi->i_dir_start_lookup = 0;
 	ufsi->i_osync = 0;
 
 	ufs_set_inode_ops(inode);
@@ -745,6 +747,9 @@ ufs2_inode :
 
 	UFSD("EXIT\n");
 	return;
+
+bad_inode:
+	make_bad_inode(inode);
 }
 
 static int ufs_update_inode(struct inode * inode, int do_sync)
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 3e807b828e22..c40f81ba9b13 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1454,7 +1454,7 @@ xfs_vm_invalidatepage(
 	block_invalidatepage(page, offset);
 }
 
-struct address_space_operations xfs_address_space_operations = {
+const struct address_space_operations xfs_address_space_operations = {
 	.readpage		= xfs_vm_readpage,
 	.readpages		= xfs_vm_readpages,
 	.writepage		= xfs_vm_writepage,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 706d8c781b8a..2244e516b66a 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -40,7 +40,7 @@ typedef struct xfs_ioend {
 	struct work_struct	io_work;	/* xfsdatad work queue */
 } xfs_ioend_t;
 
-extern struct address_space_operations xfs_address_space_operations;
+extern const struct address_space_operations xfs_address_space_operations;
 extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
 
 #endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 26fed0756f01..2af528dcfb04 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1520,7 +1520,7 @@ xfs_mapping_buftarg(
 	struct backing_dev_info	*bdi;
 	struct inode		*inode;
 	struct address_space	*mapping;
-	static struct address_space_operations mapping_aops = {
+	static const struct address_space_operations mapping_aops = {
 		.sync_page = block_sync_page,
 		.migratepage = fail_migrate_page,
 	};
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 12810baeb5d4..d9180020de63 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -419,16 +419,15 @@ xfs_vn_link(
 	int		error;
 
 	ip = old_dentry->d_inode;	/* inode being linked to */
-	if (S_ISDIR(ip->i_mode))
-		return -EPERM;
-
 	tdvp = vn_from_inode(dir);
 	vp = vn_from_inode(ip);
 
+	VN_HOLD(vp);
 	error = bhv_vop_link(tdvp, vp, dentry, NULL);
-	if (likely(!error)) {
+	if (unlikely(error)) {
+		VN_RELE(vp);
+	} else {
 		VMODIFY(tdvp);
-		VN_HOLD(vp);
 		xfs_validate_fields(ip, &vattr);
 		d_instantiate(dentry, ip);
 	}
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index aa26ab906c88..028eb17ec2ed 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -140,9 +140,7 @@ BUFFER_FNS(PrivateStart, unwritten);
 #define current_pid()		(current->pid)
 #define current_fsuid(cred)	(current->fsuid)
 #define current_fsgid(cred)	(current->fsgid)
-#define current_set_flags(f)	(current->flags |= (f))
 #define current_test_flags(f)	(current->flags & (f))
-#define current_clear_flags(f)	(current->flags & ~(f))
 #define current_set_flags_nested(sp, f)		\
 		(*(sp) = current->flags, current->flags |= (f))
 #define current_clear_flags_nested(sp, f)	\
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 35c6a01963a7..c42b3221b20c 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -93,7 +93,7 @@ typedef enum {
  */
 static inline struct bhv_vnode *vn_from_inode(struct inode *inode)
 {
-	return (bhv_vnode_t *)list_entry(inode, bhv_vnode_t, v_inode);
+	return container_of(inode, bhv_vnode_t, v_inode);
 }
 static inline struct inode *vn_to_inode(struct bhv_vnode *vnode)
 {
diff --git a/fs/xfs/xfs_behavior.h b/fs/xfs/xfs_behavior.h
index 1d8ff103201c..6e6e56fb352d 100644
--- a/fs/xfs/xfs_behavior.h
+++ b/fs/xfs/xfs_behavior.h
@@ -78,15 +78,12 @@
  *
  */
 
-struct bhv_head_lock;
-
 /*
  * Behavior head.  Head of the chain of behaviors.
  * Contained within each virtualized object data structure.
  */
 typedef struct bhv_head {
 	struct bhv_desc *bh_first;	/* first behavior in chain */
-	struct bhv_head_lock *bh_lockp;	/* pointer to lock info struct */
 } bhv_head_t;
 
 /*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5fa0adb7e173..86c1bf0bba9e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1961,9 +1961,9 @@ xfs_iunlink_remove(
 	xfs_agino_t	agino;
 	xfs_agino_t	next_agino;
 	xfs_buf_t	*last_ibp;
-	xfs_dinode_t	*last_dip;
+	xfs_dinode_t	*last_dip = NULL;
 	short		bucket_index;
-	int		offset, last_offset;
+	int		offset, last_offset = 0;
 	int		error;
 	int		agi_ok;
 
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index d8f5d4cbe8b7..e730328636c3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1740,10 +1740,10 @@ xlog_write(xfs_mount_t *	mp,
 	   xlog_in_core_t	**commit_iclog,
 	   uint			flags)
 {
-    xlog_t	     *log    = mp->m_log;
+    xlog_t	     *log = mp->m_log;
     xlog_ticket_t    *ticket = (xlog_ticket_t *)tic;
+    xlog_in_core_t   *iclog = NULL;  /* ptr to current in-core log */
     xlog_op_header_t *logop_head;    /* ptr to log operation header */
-    xlog_in_core_t   *iclog;	     /* ptr to current in-core log */
     __psint_t	     ptr;	     /* copy address into data region */
     int		     len;	     /* # xlog_write() bytes 2 still copy */
     int		     index;	     /* region index currently copying */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 55b4237c2153..3cb678e3a132 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -990,6 +990,8 @@ xlog_find_zeroed(
 	xfs_daddr_t     num_scan_bblks;
 	int	        error, log_bbnum = log->l_logBBsize;
 
+	*blk_no = 0;
+
 	/* check totally zeroed log */
 	bp = xlog_get_bp(log, 1);
 	if (!bp)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 10dbf203c62f..4be5c0b2d296 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1721,15 +1721,14 @@ xfs_mount_log_sbunit(
  * is present to prevent thrashing).
  */
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
  * hot-plug CPU notifier support.
  *
- * We cannot use the hotcpu_register() function because it does
- * not allow notifier instances. We need a notifier per filesystem
- * as we need to be able to identify the filesystem to balance
- * the counters out. This is achieved by having a notifier block
- * embedded in the xfs_mount_t and doing pointer magic to get the
- * mount pointer from the notifier block address.
+ * We need a notifier per filesystem as we need to be able to identify
+ * the filesystem to balance the counters out. This is achieved by
+ * having a notifier block embedded in the xfs_mount_t and doing pointer
+ * magic to get the mount pointer from the notifier block address.
  */
 STATIC int
 xfs_icsb_cpu_notify(
@@ -1779,6 +1778,7 @@ xfs_icsb_cpu_notify(
 
 	return NOTIFY_OK;
 }
+#endif /* CONFIG_HOTPLUG_CPU */
 
 int
 xfs_icsb_init_counters(
@@ -1791,9 +1791,11 @@ xfs_icsb_init_counters(
 	if (mp->m_sb_cnts == NULL)
 		return -ENOMEM;
 
+#ifdef CONFIG_HOTPLUG_CPU
 	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
 	mp->m_icsb_notifier.priority = 0;
-	register_cpu_notifier(&mp->m_icsb_notifier);
+	register_hotcpu_notifier(&mp->m_icsb_notifier);
+#endif /* CONFIG_HOTPLUG_CPU */
 
 	for_each_online_cpu(i) {
 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
@@ -1812,7 +1814,7 @@ xfs_icsb_destroy_counters(
 	xfs_mount_t	*mp)
 {
 	if (mp->m_sb_cnts) {
-		unregister_cpu_notifier(&mp->m_icsb_notifier);
+		unregister_hotcpu_notifier(&mp->m_icsb_notifier);
 		free_percpu(mp->m_sb_cnts);
 	}
 }
@@ -2026,7 +2028,7 @@ xfs_icsb_balance_counter(
 	xfs_sb_field_t  field,
 	int		flags)
 {
-	uint64_t	count, resid = 0;
+	uint64_t	count, resid;
 	int		weight = num_online_cpus();
 	int		s;
 
@@ -2058,6 +2060,7 @@ xfs_icsb_balance_counter(
 		break;
 	default:
 		BUG();
+		count = resid = 0;	/* quiet, gcc */
 		break;
 	}
 
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 0c1e42b037ef..5a0b678956e0 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1929,7 +1929,7 @@ xfs_growfs_rt(
 	/*
 	 * Initial error checking.
 	 */
-	if (mp->m_rtdev_targp || mp->m_rbmip == NULL ||
+	if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL ||
 	    (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
 	    (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
 		return XFS_ERROR(EINVAL);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index cb65c3a603f5..9dc88b380608 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -338,8 +338,6 @@ typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
 typedef struct xfs_trans {
 	unsigned int		t_magic;	/* magic number */
 	xfs_log_callback_t	t_logcb;	/* log callback struct */
-	struct xfs_trans	*t_forw;	/* async list pointers */
-	struct xfs_trans	*t_back;	/* async list pointers */
 	unsigned int		t_type;		/* transaction type */
 	unsigned int		t_log_res;	/* amt of log space resvd */
 	unsigned int		t_log_count;	/* count for perm log res */
@@ -364,9 +362,11 @@ typedef struct xfs_trans {
 	long			t_res_fdblocks_delta; /* on-disk only chg */
 	long			t_frextents_delta;/* superblock freextents chg*/
 	long			t_res_frextents_delta; /* on-disk only chg */
+#ifdef DEBUG
 	long			t_ag_freeblks_delta; /* debugging counter */
 	long			t_ag_flist_delta; /* debugging counter */
 	long			t_ag_btree_delta; /* debugging counter */
+#endif
 	long			t_dblocks_delta;/* superblock dblocks change */
 	long			t_agcount_delta;/* superblock agcount change */
 	long			t_imaxpct_delta;/* superblock imaxpct change */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 00a6b7dc24a0..23cfa5837728 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2603,8 +2603,7 @@ xfs_link(
 	vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address);
 
 	target_namelen = VNAMELEN(dentry);
-	if (VN_ISDIR(src_vp))
-		return XFS_ERROR(EPERM);
+	ASSERT(!VN_ISDIR(src_vp));
 
 	sip = xfs_vtoi(src_vp);
 	tdp = XFS_BHVTOI(target_dir_bdp);
@@ -2699,9 +2698,8 @@ xfs_link(
 	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
 
 	error = xfs_bumplink(tp, sip);
-	if (error) {
+	if (error)
 		goto abort_return;
-	}
 
 	/*
 	 * If this is a synchronous mount, make sure that the
@@ -2719,9 +2717,8 @@ xfs_link(
 	}
 
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
-	if (error) {
+	if (error)
 		goto std_return;
-	}
 
 	/* Fall through to std_return with error = 0. */
 std_return:
@@ -2742,6 +2739,8 @@ std_return:
 	xfs_trans_cancel(tp, cancel_flags);
 	goto std_return;
 }
+
+
 /*
  * xfs_mkdir
  *