118 files changed, 6186 insertions, 4870 deletions
diff --git a/net/802/fc.c b/net/802/fc.c
index cb3475ea6fda..34cf1ee014b8 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -82,13 +82,13 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev,
 
 static int fc_rebuild_header(struct sk_buff *skb)
 {
+#ifdef CONFIG_INET
 	struct fch_hdr *fch=(struct fch_hdr *)skb->data;
 	struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr));
 	if(fcllc->ethertype != htons(ETH_P_IP)) {
 		printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype));
 		return 0;
 	}
-#ifdef CONFIG_INET
 	return arp_find(fch->daddr, skb);
 #else
 	return 0;
diff --git a/net/802/psnap.c b/net/802/psnap.c
index b3cfe5a14fca..70980baeb682 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -1,7 +1,7 @@
 /*
  *	SNAP data link layer. Derived from 802.2
  *
- *		Alan Cox <Alan.Cox@linux.org>,
+ *		Alan Cox <alan@lxorguk.ukuu.org.uk>,
  *		from the 802.2 layer by Greg Page.
  *		Merged in additions from Greg Page's psnap.c.
  *
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 519219480db1..1041b7bd12e2 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -4,10 +4,9 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
 9pnet-objs := \
 	mod.o \
 	client.o \
-	conv.o \
 	error.o \
-	fcprint.o \
 	util.o \
+	protocol.o \
 	trans_fd.o \
 
 9pnet_virtio-objs := \
diff --git a/net/9p/client.c b/net/9p/client.c
index 10e320307ec0..bbac2f72b4d2 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -33,12 +33,9 @@
 #include <linux/uaccess.h>
 #include <net/9p/9p.h>
 #include <linux/parser.h>
-#include <net/9p/transport.h>
 #include <net/9p/client.h>
-
-static struct p9_fid *p9_fid_create(struct p9_client *clnt);
-static void p9_fid_destroy(struct p9_fid *fid);
-static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu);
+#include <net/9p/transport.h>
+#include "protocol.h"
 
 /*
   * Client Option Parsing (code inspired by NFS code)
@@ -52,13 +49,16 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_msize, "msize=%u"},
 	{Opt_legacy, "noextend"},
 	{Opt_trans, "trans=%s"},
 	{Opt_err, NULL},
 };
 
+static struct p9_req_t *
+p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
+
 /**
  * v9fs_parse_options - parse mount options into session structure
  * @options: options string passed from mount
@@ -124,31 +124,585 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	return ret;
 }
 
+/**
+ * p9_tag_alloc - lookup/allocate a request by tag
+ * @c: client session to lookup tag within
+ * @tag: numeric id for transaction
+ *
+ * this is a simple array lookup, but will grow the
+ * request_slots as necessary to accomodate transaction
+ * ids which did not previously have a slot.
+ *
+ * this code relies on the client spinlock to manage locks, its
+ * possible we should switch to something else, but I'd rather
+ * stick with something low-overhead for the common case.
+ *
+ */
+
+static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
+{
+	unsigned long flags;
+	int row, col;
+	struct p9_req_t *req;
+
+	/* This looks up the original request by tag so we know which
+	 * buffer to read the data into */
+	tag++;
+
+	if (tag >= c->max_tag) {
+		spin_lock_irqsave(&c->lock, flags);
+		/* check again since original check was outside of lock */
+		while (tag >= c->max_tag) {
+			row = (tag / P9_ROW_MAXTAG);
+			c->reqs[row] = kcalloc(P9_ROW_MAXTAG,
+					sizeof(struct p9_req_t), GFP_ATOMIC);
+
+			if (!c->reqs[row]) {
+				printk(KERN_ERR "Couldn't grow tag array\n");
+				return ERR_PTR(-ENOMEM);
+			}
+			for (col = 0; col < P9_ROW_MAXTAG; col++) {
+				c->reqs[row][col].status = REQ_STATUS_IDLE;
+				c->reqs[row][col].tc = NULL;
+			}
+			c->max_tag += P9_ROW_MAXTAG;
+		}
+		spin_unlock_irqrestore(&c->lock, flags);
+	}
+	row = tag / P9_ROW_MAXTAG;
+	col = tag % P9_ROW_MAXTAG;
+
+	req = &c->reqs[row][col];
+	if (!req->tc) {
+		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
+		if (!req->wq) {
+			printk(KERN_ERR "Couldn't grow tag array\n");
+			return ERR_PTR(-ENOMEM);
+		}
+		init_waitqueue_head(req->wq);
+		req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+								GFP_KERNEL);
+		req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+								GFP_KERNEL);
+		if ((!req->tc) || (!req->rc)) {
+			printk(KERN_ERR "Couldn't grow tag array\n");
+			kfree(req->tc);
+			kfree(req->rc);
+			return ERR_PTR(-ENOMEM);
+		}
+		req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
+		req->tc->capacity = c->msize;
+		req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
+		req->rc->capacity = c->msize;
+	}
+
+	p9pdu_reset(req->tc);
+	p9pdu_reset(req->rc);
+
+	req->flush_tag = 0;
+	req->tc->tag = tag-1;
+	req->status = REQ_STATUS_ALLOC;
+
+	return &c->reqs[row][col];
+}
+
+/**
+ * p9_tag_lookup - lookup a request by tag
+ * @c: client session to lookup tag within
+ * @tag: numeric id for transaction
+ *
+ */
+
+struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
+{
+	int row, col;
+
+	/* This looks up the original request by tag so we know which
+	 * buffer to read the data into */
+	tag++;
+
+	BUG_ON(tag >= c->max_tag);
+
+	row = tag / P9_ROW_MAXTAG;
+	col = tag % P9_ROW_MAXTAG;
+
+	return &c->reqs[row][col];
+}
+EXPORT_SYMBOL(p9_tag_lookup);
+
+/**
+ * p9_tag_init - setup tags structure and contents
+ * @tags: tags structure from the client struct
+ *
+ * This initializes the tags structure for each client instance.
+ *
+ */
+
+static int p9_tag_init(struct p9_client *c)
+{
+	int err = 0;
+
+	c->tagpool = p9_idpool_create();
+	if (IS_ERR(c->tagpool)) {
+		err = PTR_ERR(c->tagpool);
+		c->tagpool = NULL;
+		goto error;
+	}
+
+	p9_idpool_get(c->tagpool); /* reserve tag 0 */
+
+	c->max_tag = 0;
+error:
+	return err;
+}
 
 /**
- * p9_client_rpc - sends 9P request and waits until a response is available.
- *      The function can be interrupted.
- * @c: client data
- * @tc: request to be sent
- * @rc: pointer where a pointer to the response is stored
+ * p9_tag_cleanup - cleans up tags structure and reclaims resources
+ * @tags: tags structure from the client struct
+ *
+ * This frees resources associated with the tags structure
+ *
  */
+static void p9_tag_cleanup(struct p9_client *c)
+{
+	int row, col;
+
+	/* check to insure all requests are idle */
+	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
+		for (col = 0; col < P9_ROW_MAXTAG; col++) {
+			if (c->reqs[row][col].status != REQ_STATUS_IDLE) {
+				P9_DPRINTK(P9_DEBUG_MUX,
+				  "Attempting to cleanup non-free tag %d,%d\n",
+				  row, col);
+				/* TODO: delay execution of cleanup */
+				return;
+			}
+		}
+	}
+
+	if (c->tagpool)
+		p9_idpool_destroy(c->tagpool);
+
+	/* free requests associated with tags */
+	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
+		for (col = 0; col < P9_ROW_MAXTAG; col++) {
+			kfree(c->reqs[row][col].wq);
+			kfree(c->reqs[row][col].tc);
+			kfree(c->reqs[row][col].rc);
+		}
+		kfree(c->reqs[row]);
+	}
+	c->max_tag = 0;
+}
+
+/**
+ * p9_free_req - free a request and clean-up as necessary
+ * c: client state
+ * r: request to release
+ *
+ */
+
+static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
+{
+	int tag = r->tc->tag;
+	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
+
+	r->status = REQ_STATUS_IDLE;
+	if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool))
+		p9_idpool_put(tag, c->tagpool);
+
+	/* if this was a flush request we have to free response fcall */
+	if (r->rc->id == P9_RFLUSH) {
+		kfree(r->tc);
+		kfree(r->rc);
+	}
+}
+
+/**
+ * p9_client_cb - call back from transport to client
+ * c: client state
+ * req: request received
+ *
+ */
+void p9_client_cb(struct p9_client *c, struct p9_req_t *req)
+{
+	struct p9_req_t *other_req;
+	unsigned long flags;
+
+	P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
+
+	if (req->status == REQ_STATUS_ERROR)
+		wake_up(req->wq);
+
+	if (req->flush_tag) { 			/* flush receive path */
+		P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag);
+		spin_lock_irqsave(&c->lock, flags);
+		other_req = p9_tag_lookup(c, req->flush_tag);
+		if (other_req->status != REQ_STATUS_FLSH) /* stale flush */
+			spin_unlock_irqrestore(&c->lock, flags);
+		else {
+			other_req->status = REQ_STATUS_FLSHD;
+			spin_unlock_irqrestore(&c->lock, flags);
+			wake_up(other_req->wq);
+		}
+		p9_free_req(c, req);
+	} else { 				/* normal receive path */
+		P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag);
+		spin_lock_irqsave(&c->lock, flags);
+		if (req->status != REQ_STATUS_FLSHD)
+			req->status = REQ_STATUS_RCVD;
+		spin_unlock_irqrestore(&c->lock, flags);
+		wake_up(req->wq);
+		P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
+	}
+}
+EXPORT_SYMBOL(p9_client_cb);
+
+/**
+ * p9_parse_header - parse header arguments out of a packet
+ * @pdu: packet to parse
+ * @size: size of packet
+ * @type: type of request
+ * @tag: tag of packet
+ * @rewind: set if we need to rewind offset afterwards
+ */
+
 int
-p9_client_rpc(struct p9_client *c, struct p9_fcall *tc,
-	struct p9_fcall **rc)
+p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag,
+								int rewind)
 {
-	return c->trans->rpc(c->trans, tc, rc);
+	int8_t r_type;
+	int16_t r_tag;
+	int32_t r_size;
+	int offset = pdu->offset;
+	int err;
+
+	pdu->offset = 0;
+	if (pdu->size == 0)
+		pdu->size = 7;
+
+	err = p9pdu_readf(pdu, 0, "dbw", &r_size, &r_type, &r_tag);
+	if (err)
+		goto rewind_and_exit;
+
+	pdu->size = r_size;
+	pdu->id = r_type;
+	pdu->tag = r_tag;
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", pdu->size,
+							pdu->id, pdu->tag);
+
+	if (type)
+		*type = r_type;
+	if (tag)
+		*tag = r_tag;
+	if (size)
+		*size = r_size;
+
+
+rewind_and_exit:
+	if (rewind)
+		pdu->offset = offset;
+	return err;
 }
+EXPORT_SYMBOL(p9_parse_header);
+
+/**
+ * p9_check_errors - check 9p packet for error return and process it
+ * @c: current client instance
+ * @req: request to parse and check for error conditions
+ *
+ * returns error code if one is discovered, otherwise returns 0
+ *
+ * this will have to be more complicated if we have multiple
+ * error packet types
+ */
+
+static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
+{
+	int8_t type;
+	int err;
+
+	err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
+	if (err) {
+		P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
+		return err;
+	}
+
+	if (type == P9_RERROR) {
+		int ecode;
+		char *ename;
+
+		err = p9pdu_readf(req->rc, c->dotu, "s?d", &ename, &ecode);
+		if (err) {
+			P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n",
+									err);
+			return err;
+		}
+
+		if (c->dotu)
+			err = -ecode;
+
+		if (!err) {
+			err = p9_errstr2errno(ename, strlen(ename));
+
+			/* string match failed */
+			if (!err)
+				err = -ESERVERFAULT;
+		}
+
+		P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
+
+		kfree(ename);
+	} else
+		err = 0;
+
+	return err;
+}
+
+/**
+ * p9_client_flush - flush (cancel) a request
+ * c: client state
+ * req: request to cancel
+ *
+ * This sents a flush for a particular requests and links
+ * the flush request to the original request.  The current
+ * code only supports a single flush request although the protocol
+ * allows for multiple flush requests to be sent for a single request.
+ *
+ */
+
+static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
+{
+	struct p9_req_t *req;
+	int16_t oldtag;
+	int err;
+
+	err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1);
+	if (err)
+		return err;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag);
+
+	req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->flush_tag = oldtag;
+
+	/* we don't free anything here because RPC isn't complete */
+	return 0;
+}
+
+/**
+ * p9_client_rpc - issue a request and wait for a response
+ * @c: client session
+ * @type: type of request
+ * @fmt: protocol format string (see protocol.c)
+ *
+ * Returns request structure (which client must free using p9_free_req)
+ */
+
+static struct p9_req_t *
+p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
+{
+	va_list ap;
+	int tag, err;
+	struct p9_req_t *req;
+	unsigned long flags;
+	int sigpending;
+	int flushed = 0;
+
+	P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type);
+
+	if (c->status != Connected)
+		return ERR_PTR(-EIO);
+
+	if (signal_pending(current)) {
+		sigpending = 1;
+		clear_thread_flag(TIF_SIGPENDING);
+	} else
+		sigpending = 0;
+
+	tag = P9_NOTAG;
+	if (type != P9_TVERSION) {
+		tag = p9_idpool_get(c->tagpool);
+		if (tag < 0)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	req = p9_tag_alloc(c, tag);
+	if (IS_ERR(req))
+		return req;
+
+	/* marshall the data */
+	p9pdu_prepare(req->tc, tag, type);
+	va_start(ap, fmt);
+	err = p9pdu_vwritef(req->tc, c->dotu, fmt, ap);
+	va_end(ap);
+	p9pdu_finalize(req->tc);
+
+	err = c->trans_mod->request(c, req);
+	if (err < 0) {
+		c->status = Disconnected;
+		goto reterr;
+	}
+
+	/* if it was a flush we just transmitted, return our tag */
+	if (type == P9_TFLUSH)
+		return req;
+again:
+	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag);
+	err = wait_event_interruptible(*req->wq,
+						req->status >= REQ_STATUS_RCVD);
+	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n",
+						req->wq, tag, err, flushed);
+
+	if (req->status == REQ_STATUS_ERROR) {
+		P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
+		err = req->t_err;
+	} else if (err == -ERESTARTSYS && flushed) {
+		P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n");
+		goto again;
+	} else if (req->status == REQ_STATUS_FLSHD) {
+		P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n");
+		err = -ERESTARTSYS;
+	}
+
+	if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) {
+		P9_DPRINTK(P9_DEBUG_MUX, "flushing\n");
+		spin_lock_irqsave(&c->lock, flags);
+		if (req->status == REQ_STATUS_SENT)
+			req->status = REQ_STATUS_FLSH;
+		spin_unlock_irqrestore(&c->lock, flags);
+		sigpending = 1;
+		flushed = 1;
+		clear_thread_flag(TIF_SIGPENDING);
+
+		if (c->trans_mod->cancel(c, req)) {
+			err = p9_client_flush(c, req);
+			if (err == 0)
+				goto again;
+		}
+	}
+
+	if (sigpending) {
+		spin_lock_irqsave(&current->sighand->siglock, flags);
+		recalc_sigpending();
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	}
+
+	if (err < 0)
+		goto reterr;
+
+	err = p9_check_errors(c, req);
+	if (!err) {
+		P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type);
+		return req;
+	}
+
+reterr:
+	P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type,
+									err);
+	p9_free_req(c, req);
+	return ERR_PTR(err);
+}
+
+static struct p9_fid *p9_fid_create(struct p9_client *clnt)
+{
+	int err;
+	struct p9_fid *fid;
+
+	P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt);
+	fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
+	if (!fid)
+		return ERR_PTR(-ENOMEM);
+
+	fid->fid = p9_idpool_get(clnt->fidpool);
+	if (fid->fid < 0) {
+		err = -ENOSPC;
+		goto error;
+	}
+
+	memset(&fid->qid, 0, sizeof(struct p9_qid));
+	fid->mode = -1;
+	fid->rdir_fpos = 0;
+	fid->uid = current->fsuid;
+	fid->clnt = clnt;
+	fid->aux = NULL;
+
+	spin_lock(&clnt->lock);
+	list_add(&fid->flist, &clnt->fidlist);
+	spin_unlock(&clnt->lock);
+
+	return fid;
+
+error:
+	kfree(fid);
+	return ERR_PTR(err);
+}
+
+static void p9_fid_destroy(struct p9_fid *fid)
+{
+	struct p9_client *clnt;
+
+	P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid);
+	clnt = fid->clnt;
+	p9_idpool_put(fid->fid, clnt->fidpool);
+	spin_lock(&clnt->lock);
+	list_del(&fid->flist);
+	spin_unlock(&clnt->lock);
+	kfree(fid);
+}
+
+int p9_client_version(struct p9_client *c)
+{
+	int err = 0;
+	struct p9_req_t *req;
+	char *version;
+	int msize;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d extended %d\n",
+							c->msize, c->dotu);
+	req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize,
+				c->dotu ? "9P2000.u" : "9P2000");
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version);
+	if (err) {
+		P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err);
+		p9pdu_dump(1, req->rc);
+		goto error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
+	if (!memcmp(version, "9P2000.u", 8))
+		c->dotu = 1;
+	else if (!memcmp(version, "9P2000", 6))
+		c->dotu = 0;
+	else {
+		err = -EREMOTEIO;
+		goto error;
+	}
+
+	if (msize < c->msize)
+		c->msize = msize;
+
+error:
+	kfree(version);
+	p9_free_req(c, req);
+
+	return err;
+}
+EXPORT_SYMBOL(p9_client_version);
 
 struct p9_client *p9_client_create(const char *dev_name, char *options)
 {
-	int err, n;
+	int err;
 	struct p9_client *clnt;
-	struct p9_fcall *tc, *rc;
-	struct p9_str *version;
 
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL);
 	if (!clnt)
 		return ERR_PTR(-ENOMEM);
@@ -164,6 +718,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		goto error;
 	}
 
+	p9_tag_init(clnt);
+
 	err = parse_opts(options, clnt);
 	if (err < 0)
 		goto error;
@@ -175,53 +731,23 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		goto error;
 	}
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p trans %p msize %d dotu %d\n",
+	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n",
 		clnt, clnt->trans_mod, clnt->msize, clnt->dotu);
 
-
-	clnt->trans = clnt->trans_mod->create(dev_name, options, clnt->msize,
-								clnt->dotu);
-	if (IS_ERR(clnt->trans)) {
-		err = PTR_ERR(clnt->trans);
-		clnt->trans = NULL;
+	err = clnt->trans_mod->create(clnt, dev_name, options);
+	if (err)
 		goto error;
-	}
 
 	if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize)
 		clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ;
 
-	tc = p9_create_tversion(clnt->msize, clnt->dotu?"9P2000.u":"9P2000");
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto error;
-	}
-
-	err = p9_client_rpc(clnt, tc, &rc);
+	err = p9_client_version(clnt);
 	if (err)
 		goto error;
 
-	version = &rc->params.rversion.version;
-	if (version->len == 8 && !memcmp(version->str, "9P2000.u", 8))
-		clnt->dotu = 1;
-	else if (version->len == 6 && !memcmp(version->str, "9P2000", 6))
-		clnt->dotu = 0;
-	else {
-		err = -EREMOTEIO;
-		goto error;
-	}
-
-	n = rc->params.rversion.msize;
-	if (n < clnt->msize)
-		clnt->msize = n;
-
-	kfree(tc);
-	kfree(rc);
 	return clnt;
 
 error:
-	kfree(tc);
-	kfree(rc);
 	p9_client_destroy(clnt);
 	return ERR_PTR(err);
 }
@@ -231,13 +757,10 @@ void p9_client_destroy(struct p9_client *clnt)
 {
 	struct p9_fid *fid, *fidptr;
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
+	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p\n", clnt);
 
-	if (clnt->trans) {
-		clnt->trans->close(clnt->trans);
-		kfree(clnt->trans);
-		clnt->trans = NULL;
-	}
+	if (clnt->trans_mod)
+		clnt->trans_mod->close(clnt);
 
 	v9fs_put_trans(clnt->trans_mod);
 
@@ -247,6 +770,8 @@ void p9_client_destroy(struct p9_client *clnt)
 	if (clnt->fidpool)
 		p9_idpool_destroy(clnt->fidpool);
 
+	p9_tag_cleanup(clnt);
+
 	kfree(clnt);
 }
 EXPORT_SYMBOL(p9_client_destroy);
@@ -254,7 +779,7 @@ EXPORT_SYMBOL(p9_client_destroy);
 void p9_client_disconnect(struct p9_client *clnt)
 {
 	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
-	clnt->trans->status = Disconnected;
+	clnt->status = Disconnected;
 }
 EXPORT_SYMBOL(p9_client_disconnect);
 
@@ -262,14 +787,13 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 	char *uname, u32 n_uname, char *aname)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
+	struct p9_req_t *req;
 	struct p9_fid *fid;
+	struct p9_qid qid;
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p afid %d uname %s aname %s\n",
-		clnt, afid?afid->fid:-1, uname, aname);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n",
+					afid ? afid->fid : -1, uname, aname);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 
 	fid = p9_fid_create(clnt);
 	if (IS_ERR(fid)) {
@@ -278,73 +802,77 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 		goto error;
 	}
 
-	tc = p9_create_tattach(fid->fid, afid?afid->fid:P9_NOFID, uname, aname,
-		n_uname, clnt->dotu);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
+	req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid,
+			afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
+	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
 		goto error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n",
+					qid.type, qid.path, qid.version);
 
-	memmove(&fid->qid, &rc->params.rattach.qid, sizeof(struct p9_qid));
-	kfree(tc);
-	kfree(rc);
+	memmove(&fid->qid, &qid, sizeof(struct p9_qid));
+
+	p9_free_req(clnt, req);
 	return fid;
 
 error:
-	kfree(tc);
-	kfree(rc);
 	if (fid)
 		p9_fid_destroy(fid);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_attach);
 
-struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname,
-	u32 n_uname, char *aname)
+struct p9_fid *
+p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
-	struct p9_fid *fid;
+	struct p9_req_t *req;
+	struct p9_qid qid;
+	struct p9_fid *afid;
 
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p uname %s aname %s\n", clnt, uname,
-									aname);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TAUTH uname %s aname %s\n", uname, aname);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 
-	fid = p9_fid_create(clnt);
-	if (IS_ERR(fid)) {
-		err = PTR_ERR(fid);
-		fid = NULL;
+	afid = p9_fid_create(clnt);
+	if (IS_ERR(afid)) {
+		err = PTR_ERR(afid);
+		afid = NULL;
 		goto error;
 	}
 
-	tc = p9_create_tauth(fid->fid, uname, aname, n_uname, clnt->dotu);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
+	req = p9_client_rpc(clnt, P9_TAUTH, "dss?d",
+			afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
+	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
 		goto error;
+	}
 
-	memmove(&fid->qid, &rc->params.rauth.qid, sizeof(struct p9_qid));
-	kfree(tc);
-	kfree(rc);
-	return fid;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n",
+					qid.type, qid.path, qid.version);
+
+	memmove(&afid->qid, &qid, sizeof(struct p9_qid));
+	p9_free_req(clnt, req);
+	return afid;
 
 error:
-	kfree(tc);
-	kfree(rc);
-	if (fid)
-		p9_fid_destroy(fid);
+	if (afid)
+		p9_fid_destroy(afid);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_auth);
@@ -353,15 +881,13 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 	int clone)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
 	struct p9_fid *fid;
+	struct p9_qid *wqids;
+	struct p9_req_t *req;
+	int16_t nwqids, count;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d nwname %d wname[0] %s\n",
-		oldfid->fid, nwname, wnames?wnames[0]:NULL);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = oldfid->clnt;
 	if (clone) {
 		fid = p9_fid_create(clnt);
@@ -375,53 +901,49 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 	} else
 		fid = oldfid;
 
-	tc = p9_create_twalk(oldfid->fid, fid->fid, nwname, wnames);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n",
+		oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
+
+	req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
+								nwname, wnames);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
+	err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids);
 	if (err) {
-		if (rc && rc->id == P9_RWALK)
-			goto clunk_fid;
-		else
-			goto error;
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
+		goto clunk_fid;
 	}
+	p9_free_req(clnt, req);
 
-	if (rc->params.rwalk.nwqid != nwname) {
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);
+
+	if (nwqids != nwname) {
 		err = -ENOENT;
 		goto clunk_fid;
 	}
 
+	for (count = 0; count < nwqids; count++)
+		P9_DPRINTK(P9_DEBUG_9P, "<<<     [%d] %x.%llx.%x\n",
+			count, wqids[count].type, wqids[count].path,
+			wqids[count].version);
+
 	if (nwname)
-		memmove(&fid->qid,
-			&rc->params.rwalk.wqids[rc->params.rwalk.nwqid - 1],
-			sizeof(struct p9_qid));
+		memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid));
 	else
 		fid->qid = oldfid->qid;
 
-	kfree(tc);
-	kfree(rc);
 	return fid;
 
 clunk_fid:
-	kfree(tc);
-	kfree(rc);
-	rc = NULL;
-	tc = p9_create_tclunk(fid->fid);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto error;
-	}
-
-	p9_client_rpc(clnt, tc, &rc);
+	p9_client_clunk(fid);
+	fid = NULL;
 
 error:
-	kfree(tc);
-	kfree(rc);
 	if (fid && (fid != oldfid))
 		p9_fid_destroy(fid);
 
@@ -432,35 +954,39 @@ EXPORT_SYMBOL(p9_client_walk);
 int p9_client_open(struct p9_fid *fid, int mode)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
+	struct p9_qid qid;
+	int iounit;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d mode %d\n", fid->fid, mode);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TOPEN fid %d mode %d\n", fid->fid, mode);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
 	if (fid->mode != -1)
 		return -EINVAL;
 
-	tc = p9_create_topen(fid->fid, mode);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
-		goto done;
+	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n",
+				qid.type, qid.path, qid.version, iounit);
 
 	fid->mode = mode;
-	fid->iounit = rc->params.ropen.iounit;
+	fid->iounit = iounit;
 
-done:
-	kfree(tc);
-	kfree(rc);
+free_and_error:
+	p9_free_req(clnt, req);
+error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_open);
@@ -469,37 +995,41 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 		     char *extension)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
+	struct p9_qid qid;
+	int iounit;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d name %s perm %d mode %d\n", fid->fid,
-		name, perm, mode);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",
+						fid->fid, name, perm, mode);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
 	if (fid->mode != -1)
 		return -EINVAL;
 
-	tc = p9_create_tcreate(fid->fid, name, perm, mode, extension,
-							       clnt->dotu);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm,
+				mode, extension);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
-		goto done;
+	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",
+				qid.type, qid.path, qid.version, iounit);
 
 	fid->mode = mode;
-	fid->iounit = rc->params.ropen.iounit;
+	fid->iounit = iounit;
 
-done:
-	kfree(tc);
-	kfree(rc);
+free_and_error:
+	p9_free_req(clnt, req);
+error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_fcreate);
@@ -507,31 +1037,25 @@ EXPORT_SYMBOL(p9_client_fcreate);
 int p9_client_clunk(struct p9_fid *fid)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
-	tc = p9_create_tclunk(fid->fid);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
-		goto done;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid);
 
+	p9_free_req(clnt, req);
 	p9_fid_destroy(fid);
 
-done:
-	kfree(tc);
-	kfree(rc);
+error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_clunk);
@@ -539,157 +1063,41 @@ EXPORT_SYMBOL(p9_client_clunk);
 int p9_client_remove(struct p9_fid *fid)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
-	tc = p9_create_tremove(fid->fid);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
+	req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
-		goto done;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
 
+	p9_free_req(clnt, req);
 	p9_fid_destroy(fid);
 
-done:
-	kfree(tc);
-	kfree(rc);
-	return err;
-}
-EXPORT_SYMBOL(p9_client_remove);
-
-int p9_client_read(struct p9_fid *fid, char *data, u64 offset, u32 count)
-{
-	int err, n, rsize, total;
-	struct p9_fcall *tc, *rc;
-	struct p9_client *clnt;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu %d\n", fid->fid,
-					(long long unsigned) offset, count);
-	err = 0;
-	tc = NULL;
-	rc = NULL;
-	clnt = fid->clnt;
-	total = 0;
-
-	rsize = fid->iounit;
-	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
-		rsize = clnt->msize - P9_IOHDRSZ;
-
-	do {
-		if (count < rsize)
-			rsize = count;
-
-		tc = p9_create_tread(fid->fid, offset, rsize);
-		if (IS_ERR(tc)) {
-			err = PTR_ERR(tc);
-			tc = NULL;
-			goto error;
-		}
-
-		err = p9_client_rpc(clnt, tc, &rc);
-		if (err)
-			goto error;
-
-		n = rc->params.rread.count;
-		if (n > count)
-			n = count;
-
-		memmove(data, rc->params.rread.data, n);
-		count -= n;
-		data += n;
-		offset += n;
-		total += n;
-		kfree(tc);
-		tc = NULL;
-		kfree(rc);
-		rc = NULL;
-	} while (count > 0 && n == rsize);
-
-	return total;
-
 error:
-	kfree(tc);
-	kfree(rc);
 	return err;
 }
-EXPORT_SYMBOL(p9_client_read);
-
-int p9_client_write(struct p9_fid *fid, char *data, u64 offset, u32 count)
-{
-	int err, n, rsize, total;
-	struct p9_fcall *tc, *rc;
-	struct p9_client *clnt;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
-					(long long unsigned) offset, count);
-	err = 0;
-	tc = NULL;
-	rc = NULL;
-	clnt = fid->clnt;
-	total = 0;
-
-	rsize = fid->iounit;
-	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
-		rsize = clnt->msize - P9_IOHDRSZ;
-
-	do {
-		if (count < rsize)
-			rsize = count;
-
-		tc = p9_create_twrite(fid->fid, offset, rsize, data);
-		if (IS_ERR(tc)) {
-			err = PTR_ERR(tc);
-			tc = NULL;
-			goto error;
-		}
-
-		err = p9_client_rpc(clnt, tc, &rc);
-		if (err)
-			goto error;
-
-		n = rc->params.rread.count;
-		count -= n;
-		data += n;
-		offset += n;
-		total += n;
-		kfree(tc);
-		tc = NULL;
-		kfree(rc);
-		rc = NULL;
-	} while (count > 0);
-
-	return total;
-
-error:
-	kfree(tc);
-	kfree(rc);
-	return err;
-}
-EXPORT_SYMBOL(p9_client_write);
+EXPORT_SYMBOL(p9_client_remove);
 
 int
-p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, u32 count)
+p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
+								u32 count)
 {
-	int err, n, rsize, total;
-	struct p9_fcall *tc, *rc;
+	int err, rsize, total;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
+	char *dataptr;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid,
 					(long long unsigned) offset, count);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 	total = 0;
 
@@ -697,63 +1105,57 @@ p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, u32 count)
 	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
 		rsize = clnt->msize - P9_IOHDRSZ;
 
-	do {
-		if (count < rsize)
-			rsize = count;
+	if (count < rsize)
+		rsize = count;
 
-		tc = p9_create_tread(fid->fid, offset, rsize);
-		if (IS_ERR(tc)) {
-			err = PTR_ERR(tc);
-			tc = NULL;
-			goto error;
-		}
+	req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
 
-		err = p9_client_rpc(clnt, tc, &rc);
-		if (err)
-			goto error;
+	err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
 
-		n = rc->params.rread.count;
-		if (n > count)
-			n = count;
+	if (data) {
+		memmove(data, dataptr, count);
+		data += count;
+	}
 
-		err = copy_to_user(data, rc->params.rread.data, n);
+	if (udata) {
+		err = copy_to_user(udata, dataptr, count);
 		if (err) {
 			err = -EFAULT;
-			goto error;
+			goto free_and_error;
 		}
+	}
 
-		count -= n;
-		data += n;
-		offset += n;
-		total += n;
-		kfree(tc);
-		tc = NULL;
-		kfree(rc);
-		rc = NULL;
-	} while (count > 0 && n == rsize);
-
-	return total;
+	p9_free_req(clnt, req);
+	return count;
 
+free_and_error:
+	p9_free_req(clnt, req);
 error:
-	kfree(tc);
-	kfree(rc);
 	return err;
 }
-EXPORT_SYMBOL(p9_client_uread);
+EXPORT_SYMBOL(p9_client_read);
 
 int
-p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset,
-								   u32 count)
+p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
+							u64 offset, u32 count)
 {
-	int err, n, rsize, total;
-	struct p9_fcall *tc, *rc;
+	int err, rsize, total;
 	struct p9_client *clnt;
+	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
-					(long long unsigned) offset, count);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n",
+				fid->fid, (long long unsigned) offset, count);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 	total = 0;
 
@@ -761,325 +1163,114 @@ p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset,
 	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
 		rsize = clnt->msize - P9_IOHDRSZ;
 
-	do {
-		if (count < rsize)
-			rsize = count;
-
-		tc = p9_create_twrite_u(fid->fid, offset, rsize, data);
-		if (IS_ERR(tc)) {
-			err = PTR_ERR(tc);
-			tc = NULL;
-			goto error;
-		}
+	if (count < rsize)
+		rsize = count;
+	if (data)
+		req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset,
+								rsize, data);
+	else
+		req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset,
+								rsize, udata);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
 
-		err = p9_client_rpc(clnt, tc, &rc);
-		if (err)
-			goto error;
+	err = p9pdu_readf(req->rc, clnt->dotu, "d", &count);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
+	}
 
-		n = rc->params.rread.count;
-		count -= n;
-		data += n;
-		offset += n;
-		total += n;
-		kfree(tc);
-		tc = NULL;
-		kfree(rc);
-		rc = NULL;
-	} while (count > 0);
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
 
-	return total;
+	p9_free_req(clnt, req);
+	return count;
 
+free_and_error:
+	p9_free_req(clnt, req);
 error:
-	kfree(tc);
-	kfree(rc);
 	return err;
 }
-EXPORT_SYMBOL(p9_client_uwrite);
-
-int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count)
-{
-	int n, total;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid,
-					(long long unsigned) offset, count);
-	n = 0;
-	total = 0;
-	while (count) {
-		n = p9_client_read(fid, data, offset, count);
-		if (n <= 0)
-			break;
-
-		data += n;
-		offset += n;
-		count -= n;
-		total += n;
-	}
-
-	if (n < 0)
-		total = n;
-
-	return total;
-}
-EXPORT_SYMBOL(p9_client_readn);
+EXPORT_SYMBOL(p9_client_write);
 
-struct p9_stat *p9_client_stat(struct p9_fid *fid)
+struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
 	struct p9_client *clnt;
-	struct p9_stat *ret;
+	struct p9_wstat *ret = kmalloc(sizeof(struct p9_wstat), GFP_KERNEL);
+	struct p9_req_t *req;
+	u16 ignored;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid);
+
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
-	ret = NULL;
 	clnt = fid->clnt;
 
-	tc = p9_create_tstat(fid->fid);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
+	req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	err = p9_client_rpc(clnt, tc, &rc);
-	if (err)
-		goto error;
-
-	ret = p9_clone_stat(&rc->params.rstat.stat, clnt->dotu);
-	if (IS_ERR(ret)) {
-		err = PTR_ERR(ret);
-		ret = NULL;
-		goto error;
+	err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret);
+	if (err) {
+		ret = ERR_PTR(err);
+		p9pdu_dump(1, req->rc);
+		goto free_and_error;
 	}
 
-	kfree(tc);
-	kfree(rc);
-	return ret;
-
+	P9_DPRINTK(P9_DEBUG_9P,
+		"<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
+		"<<<    mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
+		"<<<    name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
+		"<<<    uid=%d gid=%d n_muid=%d\n",
+		ret->size, ret->type, ret->dev, ret->qid.type,
+		ret->qid.path, ret->qid.version, ret->mode,
+		ret->atime, ret->mtime, ret->length, ret->name,
+		ret->uid, ret->gid, ret->muid, ret->extension,
+		ret->n_uid, ret->n_gid, ret->n_muid);
+
+free_and_error:
+	p9_free_req(clnt, req);
 error:
-	kfree(tc);
-	kfree(rc);
-	kfree(ret);
-	return ERR_PTR(err);
+	return ret;
 }
 EXPORT_SYMBOL(p9_client_stat);
 
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 {
 	int err;
-	struct p9_fcall *tc, *rc;
+	struct p9_req_t *req;
 	struct p9_client *clnt;
 
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P,
+		"     sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
+		"     mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
+		"     name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
+		"     uid=%d gid=%d n_muid=%d\n",
+		wst->size, wst->type, wst->dev, wst->qid.type,
+		wst->qid.path, wst->qid.version, wst->mode,
+		wst->atime, wst->mtime, wst->length, wst->name,
+		wst->uid, wst->gid, wst->muid, wst->extension,
+		wst->n_uid, wst->n_gid, wst->n_muid);
 	err = 0;
-	tc = NULL;
-	rc = NULL;
 	clnt = fid->clnt;
 
-	tc = p9_create_twstat(fid->fid, wst, clnt->dotu);
-	if (IS_ERR(tc)) {
-		err = PTR_ERR(tc);
-		tc = NULL;
-		goto done;
-	}
-
-	err = p9_client_rpc(clnt, tc, &rc);
-
-done:
-	kfree(tc);
-	kfree(rc);
-	return err;
-}
-EXPORT_SYMBOL(p9_client_wstat);
-
-struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset)
-{
-	int err, n, m;
-	struct p9_fcall *tc, *rc;
-	struct p9_client *clnt;
-	struct p9_stat st, *ret;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu\n", fid->fid,
-						(long long unsigned) offset);
-	err = 0;
-	tc = NULL;
-	rc = NULL;
-	ret = NULL;
-	clnt = fid->clnt;
-
-	/* if the offset is below or above the current response, free it */
-	if (offset < fid->rdir_fpos || (fid->rdir_fcall &&
-		offset >= fid->rdir_fpos+fid->rdir_fcall->params.rread.count)) {
-		fid->rdir_pos = 0;
-		if (fid->rdir_fcall)
-			fid->rdir_fpos += fid->rdir_fcall->params.rread.count;
-
-		kfree(fid->rdir_fcall);
-		fid->rdir_fcall = NULL;
-		if (offset < fid->rdir_fpos)
-			fid->rdir_fpos = 0;
-	}
-
-	if (!fid->rdir_fcall) {
-		n = fid->iounit;
-		if (!n || n > clnt->msize-P9_IOHDRSZ)
-			n = clnt->msize - P9_IOHDRSZ;
-
-		while (1) {
-			if (fid->rdir_fcall) {
-				fid->rdir_fpos +=
-					fid->rdir_fcall->params.rread.count;
-				kfree(fid->rdir_fcall);
-				fid->rdir_fcall = NULL;
-			}
-
-			tc = p9_create_tread(fid->fid, fid->rdir_fpos, n);
-			if (IS_ERR(tc)) {
-				err = PTR_ERR(tc);
-				tc = NULL;
-				goto error;
-			}
-
-			err = p9_client_rpc(clnt, tc, &rc);
-			if (err)
-				goto error;
-
-			n = rc->params.rread.count;
-			if (n == 0)
-				goto done;
-
-			fid->rdir_fcall = rc;
-			rc = NULL;
-			if (offset >= fid->rdir_fpos &&
-						offset < fid->rdir_fpos+n)
-				break;
-		}
-
-		fid->rdir_pos = 0;
-	}
-
-	m = offset - fid->rdir_fpos;
-	if (m < 0)
-		goto done;
-
-	n = p9_deserialize_stat(fid->rdir_fcall->params.rread.data + m,
-		fid->rdir_fcall->params.rread.count - m, &st, clnt->dotu);
-
-	if (!n) {
-		err = -EIO;
-		goto error;
-	}
-
-	fid->rdir_pos += n;
-	st.size = n;
-	ret = p9_clone_stat(&st, clnt->dotu);
-	if (IS_ERR(ret)) {
-		err = PTR_ERR(ret);
-		ret = NULL;
-		goto error;
-	}
-
-done:
-	kfree(tc);
-	kfree(rc);
-	return ret;
-
-error:
-	kfree(tc);
-	kfree(rc);
-	kfree(ret);
-	return ERR_PTR(err);
-}
-EXPORT_SYMBOL(p9_client_dirread);
-
-static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu)
-{
-	int n;
-	char *p;
-	struct p9_stat *ret;
-
-	n = sizeof(struct p9_stat) + st->name.len + st->uid.len + st->gid.len +
-		st->muid.len;
-
-	if (dotu)
-		n += st->extension.len;
-
-	ret = kmalloc(n, GFP_KERNEL);
-	if (!ret)
-		return ERR_PTR(-ENOMEM);
-
-	memmove(ret, st, sizeof(struct p9_stat));
-	p = ((char *) ret) + sizeof(struct p9_stat);
-	memmove(p, st->name.str, st->name.len);
-	ret->name.str = p;
-	p += st->name.len;
-	memmove(p, st->uid.str, st->uid.len);
-	ret->uid.str = p;
-	p += st->uid.len;
-	memmove(p, st->gid.str, st->gid.len);
-	ret->gid.str = p;
-	p += st->gid.len;
-	memmove(p, st->muid.str, st->muid.len);
-	ret->muid.str = p;
-	p += st->muid.len;
-
-	if (dotu) {
-		memmove(p, st->extension.str, st->extension.len);
-		ret->extension.str = p;
-		p += st->extension.len;
-	}
-
-	return ret;
-}
-
-static struct p9_fid *p9_fid_create(struct p9_client *clnt)
-{
-	int err;
-	struct p9_fid *fid;
-
-	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt);
-	fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
-	if (!fid)
-		return ERR_PTR(-ENOMEM);
-
-	fid->fid = p9_idpool_get(clnt->fidpool);
-	if (fid->fid < 0) {
-		err = -ENOSPC;
+	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
 		goto error;
 	}
 
-	memset(&fid->qid, 0, sizeof(struct p9_qid));
-	fid->mode = -1;
-	fid->rdir_fpos = 0;
-	fid->rdir_pos = 0;
-	fid->rdir_fcall = NULL;
-	fid->uid = current->fsuid;
-	fid->clnt = clnt;
-	fid->aux = NULL;
-
-	spin_lock(&clnt->lock);
-	list_add(&fid->flist, &clnt->fidlist);
-	spin_unlock(&clnt->lock);
-
-	return fid;
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid);
 
+	p9_free_req(clnt, req);
 error:
-	kfree(fid);
-	return ERR_PTR(err);
-}
-
-static void p9_fid_destroy(struct p9_fid *fid)
-{
-	struct p9_client *clnt;
-
-	P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid);
-	clnt = fid->clnt;
-	p9_idpool_put(fid->fid, clnt->fidpool);
-	spin_lock(&clnt->lock);
-	list_del(&fid->flist);
-	spin_unlock(&clnt->lock);
-	kfree(fid->rdir_fcall);
-	kfree(fid);
+	return err;
 }
+EXPORT_SYMBOL(p9_client_wstat);
diff --git a/net/9p/conv.c b/net/9p/conv.c
deleted file mode 100644
index 5ad3a3bd73b2..000000000000
--- a/net/9p/conv.c
+++ /dev/null
@@ -1,1054 +0,0 @@
-/*
- * net/9p/conv.c
- *
- * 9P protocol conversion functions
- *
- *  Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/idr.h>
-#include <linux/uaccess.h>
-#include <net/9p/9p.h>
-
-/*
- * Buffer to help with string parsing
- */
-struct cbuf {
-	unsigned char *sp;
-	unsigned char *p;
-	unsigned char *ep;
-};
-
-static inline void buf_init(struct cbuf *buf, void *data, int datalen)
-{
-	buf->sp = buf->p = data;
-	buf->ep = data + datalen;
-}
-
-static inline int buf_check_overflow(struct cbuf *buf)
-{
-	return buf->p > buf->ep;
-}
-
-static int buf_check_size(struct cbuf *buf, int len)
-{
-	if (buf->p + len > buf->ep) {
-		if (buf->p < buf->ep) {
-			P9_EPRINTK(KERN_ERR,
-				"buffer overflow: want %d has %d\n", len,
-				(int)(buf->ep - buf->p));
-			dump_stack();
-			buf->p = buf->ep + 1;
-		}
-
-		return 0;
-	}
-
-	return 1;
-}
-
-static void *buf_alloc(struct cbuf *buf, int len)
-{
-	void *ret = NULL;
-
-	if (buf_check_size(buf, len)) {
-		ret = buf->p;
-		buf->p += len;
-	}
-
-	return ret;
-}
-
-static void buf_put_int8(struct cbuf *buf, u8 val)
-{
-	if (buf_check_size(buf, 1)) {
-		buf->p[0] = val;
-		buf->p++;
-	}
-}
-
-static void buf_put_int16(struct cbuf *buf, u16 val)
-{
-	if (buf_check_size(buf, 2)) {
-		*(__le16 *) buf->p = cpu_to_le16(val);
-		buf->p += 2;
-	}
-}
-
-static void buf_put_int32(struct cbuf *buf, u32 val)
-{
-	if (buf_check_size(buf, 4)) {
-		*(__le32 *)buf->p = cpu_to_le32(val);
-		buf->p += 4;
-	}
-}
-
-static void buf_put_int64(struct cbuf *buf, u64 val)
-{
-	if (buf_check_size(buf, 8)) {
-		*(__le64 *)buf->p = cpu_to_le64(val);
-		buf->p += 8;
-	}
-}
-
-static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
-{
-	char *ret;
-
-	ret = NULL;
-	if (buf_check_size(buf, slen + 2)) {
-		buf_put_int16(buf, slen);
-		ret = buf->p;
-		memcpy(buf->p, s, slen);
-		buf->p += slen;
-	}
-
-	return ret;
-}
-
-static u8 buf_get_int8(struct cbuf *buf)
-{
-	u8 ret = 0;
-
-	if (buf_check_size(buf, 1)) {
-		ret = buf->p[0];
-		buf->p++;
-	}
-
-	return ret;
-}
-
-static u16 buf_get_int16(struct cbuf *buf)
-{
-	u16 ret = 0;
-
-	if (buf_check_size(buf, 2)) {
-		ret = le16_to_cpu(*(__le16 *)buf->p);
-		buf->p += 2;
-	}
-
-	return ret;
-}
-
-static u32 buf_get_int32(struct cbuf *buf)
-{
-	u32 ret = 0;
-
-	if (buf_check_size(buf, 4)) {
-		ret = le32_to_cpu(*(__le32 *)buf->p);
-		buf->p += 4;
-	}
-
-	return ret;
-}
-
-static u64 buf_get_int64(struct cbuf *buf)
-{
-	u64 ret = 0;
-
-	if (buf_check_size(buf, 8)) {
-		ret = le64_to_cpu(*(__le64 *)buf->p);
-		buf->p += 8;
-	}
-
-	return ret;
-}
-
-static void buf_get_str(struct cbuf *buf, struct p9_str *vstr)
-{
-	vstr->len = buf_get_int16(buf);
-	if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) {
-		vstr->str = buf->p;
-		buf->p += vstr->len;
-	} else {
-		vstr->len = 0;
-		vstr->str = NULL;
-	}
-}
-
-static void buf_get_qid(struct cbuf *bufp, struct p9_qid *qid)
-{
-	qid->type = buf_get_int8(bufp);
-	qid->version = buf_get_int32(bufp);
-	qid->path = buf_get_int64(bufp);
-}
-
-/**
- * p9_size_wstat - calculate the size of a variable length stat struct
- * @wstat: metadata (stat) structure
- * @dotu: non-zero if 9P2000.u
- *
- */
-
-static int p9_size_wstat(struct p9_wstat *wstat, int dotu)
-{
-	int size = 0;
-
-	if (wstat == NULL) {
-		P9_EPRINTK(KERN_ERR, "p9_size_stat: got a NULL stat pointer\n");
-		return 0;
-	}
-
-	size =			/* 2 + *//* size[2] */
-	    2 +			/* type[2] */
-	    4 +			/* dev[4] */
-	    1 +			/* qid.type[1] */
-	    4 +			/* qid.vers[4] */
-	    8 +			/* qid.path[8] */
-	    4 +			/* mode[4] */
-	    4 +			/* atime[4] */
-	    4 +			/* mtime[4] */
-	    8 +			/* length[8] */
-	    8;			/* minimum sum of string lengths */
-
-	if (wstat->name)
-		size += strlen(wstat->name);
-	if (wstat->uid)
-		size += strlen(wstat->uid);
-	if (wstat->gid)
-		size += strlen(wstat->gid);
-	if (wstat->muid)
-		size += strlen(wstat->muid);
-
-	if (dotu) {
-		size += 4 +	/* n_uid[4] */
-		    4 +		/* n_gid[4] */
-		    4 +		/* n_muid[4] */
-		    2;		/* string length of extension[4] */
-		if (wstat->extension)
-			size += strlen(wstat->extension);
-	}
-
-	return size;
-}
-
-/**
- * buf_get_stat - safely decode a recieved metadata (stat) structure
- * @bufp: buffer to deserialize
- * @stat: metadata (stat) structure
- * @dotu: non-zero if 9P2000.u
- *
- */
-
-static void
-buf_get_stat(struct cbuf *bufp, struct p9_stat *stat, int dotu)
-{
-	stat->size = buf_get_int16(bufp);
-	stat->type = buf_get_int16(bufp);
-	stat->dev = buf_get_int32(bufp);
-	stat->qid.type = buf_get_int8(bufp);
-	stat->qid.version = buf_get_int32(bufp);
-	stat->qid.path = buf_get_int64(bufp);
-	stat->mode = buf_get_int32(bufp);
-	stat->atime = buf_get_int32(bufp);
-	stat->mtime = buf_get_int32(bufp);
-	stat->length = buf_get_int64(bufp);
-	buf_get_str(bufp, &stat->name);
-	buf_get_str(bufp, &stat->uid);
-	buf_get_str(bufp, &stat->gid);
-	buf_get_str(bufp, &stat->muid);
-
-	if (dotu) {
-		buf_get_str(bufp, &stat->extension);
-		stat->n_uid = buf_get_int32(bufp);
-		stat->n_gid = buf_get_int32(bufp);
-		stat->n_muid = buf_get_int32(bufp);
-	}
-}
-
-/**
- * p9_deserialize_stat - decode a received metadata structure
- * @buf: buffer to deserialize
- * @buflen: length of received buffer
- * @stat: metadata structure to decode into
- * @dotu: non-zero if 9P2000.u
- *
- * Note: stat will point to the buf region.
- */
-
-int
-p9_deserialize_stat(void *buf, u32 buflen, struct p9_stat *stat,
-		int dotu)
-{
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-	unsigned char *p;
-
-	buf_init(bufp, buf, buflen);
-	p = bufp->p;
-	buf_get_stat(bufp, stat, dotu);
-
-	if (buf_check_overflow(bufp))
-		return 0;
-	else
-		return bufp->p - p;
-}
-EXPORT_SYMBOL(p9_deserialize_stat);
-
-/**
- * deserialize_fcall - unmarshal a response
- * @buf: recieved buffer
- * @buflen: length of received buffer
- * @rcall: fcall structure to populate
- * @rcalllen: length of fcall structure to populate
- * @dotu: non-zero if 9P2000.u
- *
- */
-
-int
-p9_deserialize_fcall(void *buf, u32 buflen, struct p9_fcall *rcall,
-		       int dotu)
-{
-
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-	int i = 0;
-
-	buf_init(bufp, buf, buflen);
-
-	rcall->size = buf_get_int32(bufp);
-	rcall->id = buf_get_int8(bufp);
-	rcall->tag = buf_get_int16(bufp);
-
-	P9_DPRINTK(P9_DEBUG_CONV, "size %d id %d tag %d\n", rcall->size,
-							rcall->id, rcall->tag);
-
-	switch (rcall->id) {
-	default:
-		P9_EPRINTK(KERN_ERR, "unknown message type: %d\n", rcall->id);
-		return -EPROTO;
-	case P9_RVERSION:
-		rcall->params.rversion.msize = buf_get_int32(bufp);
-		buf_get_str(bufp, &rcall->params.rversion.version);
-		break;
-	case P9_RFLUSH:
-		break;
-	case P9_RATTACH:
-		rcall->params.rattach.qid.type = buf_get_int8(bufp);
-		rcall->params.rattach.qid.version = buf_get_int32(bufp);
-		rcall->params.rattach.qid.path = buf_get_int64(bufp);
-		break;
-	case P9_RWALK:
-		rcall->params.rwalk.nwqid = buf_get_int16(bufp);
-		if (rcall->params.rwalk.nwqid > P9_MAXWELEM) {
-			P9_EPRINTK(KERN_ERR,
-					"Rwalk with more than %d qids: %d\n",
-					P9_MAXWELEM, rcall->params.rwalk.nwqid);
-			return -EPROTO;
-		}
-
-		for (i = 0; i < rcall->params.rwalk.nwqid; i++)
-			buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]);
-		break;
-	case P9_ROPEN:
-		buf_get_qid(bufp, &rcall->params.ropen.qid);
-		rcall->params.ropen.iounit = buf_get_int32(bufp);
-		break;
-	case P9_RCREATE:
-		buf_get_qid(bufp, &rcall->params.rcreate.qid);
-		rcall->params.rcreate.iounit = buf_get_int32(bufp);
-		break;
-	case P9_RREAD:
-		rcall->params.rread.count = buf_get_int32(bufp);
-		rcall->params.rread.data = bufp->p;
-		buf_check_size(bufp, rcall->params.rread.count);
-		break;
-	case P9_RWRITE:
-		rcall->params.rwrite.count = buf_get_int32(bufp);
-		break;
-	case P9_RCLUNK:
-		break;
-	case P9_RREMOVE:
-		break;
-	case P9_RSTAT:
-		buf_get_int16(bufp);
-		buf_get_stat(bufp, &rcall->params.rstat.stat, dotu);
-		break;
-	case P9_RWSTAT:
-		break;
-	case P9_RERROR:
-		buf_get_str(bufp, &rcall->params.rerror.error);
-		if (dotu)
-			rcall->params.rerror.errno = buf_get_int16(bufp);
-		break;
-	}
-
-	if (buf_check_overflow(bufp)) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "buffer overflow\n");
-		return -EIO;
-	}
-
-	return bufp->p - bufp->sp;
-}
-EXPORT_SYMBOL(p9_deserialize_fcall);
-
-static inline void p9_put_int8(struct cbuf *bufp, u8 val, u8 * p)
-{
-	*p = val;
-	buf_put_int8(bufp, val);
-}
-
-static inline void p9_put_int16(struct cbuf *bufp, u16 val, u16 * p)
-{
-	*p = val;
-	buf_put_int16(bufp, val);
-}
-
-static inline void p9_put_int32(struct cbuf *bufp, u32 val, u32 * p)
-{
-	*p = val;
-	buf_put_int32(bufp, val);
-}
-
-static inline void p9_put_int64(struct cbuf *bufp, u64 val, u64 * p)
-{
-	*p = val;
-	buf_put_int64(bufp, val);
-}
-
-static void
-p9_put_str(struct cbuf *bufp, char *data, struct p9_str *str)
-{
-	int len;
-	char *s;
-
-	if (data)
-		len = strlen(data);
-	else
-		len = 0;
-
-	s = buf_put_stringn(bufp, data, len);
-	if (str) {
-		str->len = len;
-		str->str = s;
-	}
-}
-
-static int
-p9_put_data(struct cbuf *bufp, const char *data, int count,
-		   unsigned char **pdata)
-{
-	*pdata = buf_alloc(bufp, count);
-	if (*pdata == NULL)
-		return -ENOMEM;
-	memmove(*pdata, data, count);
-	return 0;
-}
-
-static int
-p9_put_user_data(struct cbuf *bufp, const char __user *data, int count,
-		   unsigned char **pdata)
-{
-	*pdata = buf_alloc(bufp, count);
-	if (*pdata == NULL)
-		return -ENOMEM;
-	return copy_from_user(*pdata, data, count);
-}
-
-static void
-p9_put_wstat(struct cbuf *bufp, struct p9_wstat *wstat,
-	       struct p9_stat *stat, int statsz, int dotu)
-{
-	p9_put_int16(bufp, statsz, &stat->size);
-	p9_put_int16(bufp, wstat->type, &stat->type);
-	p9_put_int32(bufp, wstat->dev, &stat->dev);
-	p9_put_int8(bufp, wstat->qid.type, &stat->qid.type);
-	p9_put_int32(bufp, wstat->qid.version, &stat->qid.version);
-	p9_put_int64(bufp, wstat->qid.path, &stat->qid.path);
-	p9_put_int32(bufp, wstat->mode, &stat->mode);
-	p9_put_int32(bufp, wstat->atime, &stat->atime);
-	p9_put_int32(bufp, wstat->mtime, &stat->mtime);
-	p9_put_int64(bufp, wstat->length, &stat->length);
-
-	p9_put_str(bufp, wstat->name, &stat->name);
-	p9_put_str(bufp, wstat->uid, &stat->uid);
-	p9_put_str(bufp, wstat->gid, &stat->gid);
-	p9_put_str(bufp, wstat->muid, &stat->muid);
-
-	if (dotu) {
-		p9_put_str(bufp, wstat->extension, &stat->extension);
-		p9_put_int32(bufp, wstat->n_uid, &stat->n_uid);
-		p9_put_int32(bufp, wstat->n_gid, &stat->n_gid);
-		p9_put_int32(bufp, wstat->n_muid, &stat->n_muid);
-	}
-}
-
-static struct p9_fcall *
-p9_create_common(struct cbuf *bufp, u32 size, u8 id)
-{
-	struct p9_fcall *fc;
-
-	size += 4 + 1 + 2;	/* size[4] id[1] tag[2] */
-	fc = kmalloc(sizeof(struct p9_fcall) + size, GFP_KERNEL);
-	if (!fc)
-		return ERR_PTR(-ENOMEM);
-
-	fc->sdata = (char *)fc + sizeof(*fc);
-
-	buf_init(bufp, (char *)fc->sdata, size);
-	p9_put_int32(bufp, size, &fc->size);
-	p9_put_int8(bufp, id, &fc->id);
-	p9_put_int16(bufp, P9_NOTAG, &fc->tag);
-
-	return fc;
-}
-
-/**
- * p9_set_tag - set the tag field of an &p9_fcall structure
- * @fc: fcall structure to set tag within
- * @tag: tag id to set
- */
-
-void p9_set_tag(struct p9_fcall *fc, u16 tag)
-{
-	fc->tag = tag;
-	*(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag);
-}
-EXPORT_SYMBOL(p9_set_tag);
-
-/**
- * p9_create_tversion - allocates and creates a T_VERSION request
- * @msize: requested maximum data size
- * @version: version string to negotiate
- *
- */
-struct p9_fcall *p9_create_tversion(u32 msize, char *version)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 2 + strlen(version);	/* msize[4] version[s] */
-	fc = p9_create_common(bufp, size, P9_TVERSION);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, msize, &fc->params.tversion.msize);
-	p9_put_str(bufp, version, &fc->params.tversion.version);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tversion);
-
-/**
- * p9_create_tauth - allocates and creates a T_AUTH request
- * @afid: handle to use for authentication protocol
- * @uname: user name attempting to authenticate
- * @aname: mount specifier for remote server
- * @n_uname: numeric id for user attempting to authneticate
- * @dotu: 9P2000.u extension flag
- *
- */
-
-struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname,
-	u32 n_uname, int dotu)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* afid[4] uname[s] aname[s] */
-	size = 4 + 2 + 2;
-	if (uname)
-		size += strlen(uname);
-
-	if (aname)
-		size += strlen(aname);
-
-	if (dotu)
-		size += 4;	/* n_uname */
-
-	fc = p9_create_common(bufp, size, P9_TAUTH);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, afid, &fc->params.tauth.afid);
-	p9_put_str(bufp, uname, &fc->params.tauth.uname);
-	p9_put_str(bufp, aname, &fc->params.tauth.aname);
-	if (dotu)
-		p9_put_int32(bufp, n_uname, &fc->params.tauth.n_uname);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tauth);
-
-/**
- * p9_create_tattach - allocates and creates a T_ATTACH request
- * @fid: handle to use for the new mount point
- * @afid: handle to use for authentication protocol
- * @uname: user name attempting to attach
- * @aname: mount specifier for remote server
- * @n_uname: numeric id for user attempting to attach
- * @n_uname: numeric id for user attempting to attach
- * @dotu: 9P2000.u extension flag
- *
- */
-
-struct p9_fcall *
-p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname,
-	u32 n_uname, int dotu)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* fid[4] afid[4] uname[s] aname[s] */
-	size = 4 + 4 + 2 + 2;
-	if (uname)
-		size += strlen(uname);
-
-	if (aname)
-		size += strlen(aname);
-
-	if (dotu)
-		size += 4;	/* n_uname */
-
-	fc = p9_create_common(bufp, size, P9_TATTACH);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tattach.fid);
-	p9_put_int32(bufp, afid, &fc->params.tattach.afid);
-	p9_put_str(bufp, uname, &fc->params.tattach.uname);
-	p9_put_str(bufp, aname, &fc->params.tattach.aname);
-	if (dotu)
-		p9_put_int32(bufp, n_uname, &fc->params.tattach.n_uname);
-
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tattach);
-
-/**
- * p9_create_tflush - allocates and creates a T_FLUSH request
- * @oldtag: tag id for the transaction we are attempting to cancel
- *
- */
-
-struct p9_fcall *p9_create_tflush(u16 oldtag)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 2;		/* oldtag[2] */
-	fc = p9_create_common(bufp, size, P9_TFLUSH);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int16(bufp, oldtag, &fc->params.tflush.oldtag);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tflush);
-
-/**
- * p9_create_twalk - allocates and creates a T_FLUSH request
- * @fid: handle we are traversing from
- * @newfid: a new handle for this transaction
- * @nwname: number of path elements to traverse
- * @wnames: array of path elements
- *
- */
-
-struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname,
-				     char **wnames)
-{
-	int i, size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	if (nwname > P9_MAXWELEM) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "nwname > %d\n", P9_MAXWELEM);
-		return NULL;
-	}
-
-	size = 4 + 4 + 2;	/* fid[4] newfid[4] nwname[2] ... */
-	for (i = 0; i < nwname; i++) {
-		size += 2 + strlen(wnames[i]);	/* wname[s] */
-	}
-
-	fc = p9_create_common(bufp, size, P9_TWALK);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.twalk.fid);
-	p9_put_int32(bufp, newfid, &fc->params.twalk.newfid);
-	p9_put_int16(bufp, nwname, &fc->params.twalk.nwname);
-	for (i = 0; i < nwname; i++) {
-		p9_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]);
-	}
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_twalk);
-
-/**
- * p9_create_topen - allocates and creates a T_OPEN request
- * @fid: handle we are trying to open
- * @mode: what mode we are trying to open the file in
- *
- */
-
-struct p9_fcall *p9_create_topen(u32 fid, u8 mode)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 1;		/* fid[4] mode[1] */
-	fc = p9_create_common(bufp, size, P9_TOPEN);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.topen.fid);
-	p9_put_int8(bufp, mode, &fc->params.topen.mode);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_topen);
-
-/**
- * p9_create_tcreate - allocates and creates a T_CREATE request
- * @fid: handle of directory we are trying to create in
- * @name: name of the file we are trying to create
- * @perm: permissions for the file we are trying to create
- * @mode: what mode we are trying to open the file in
- * @extension: 9p2000.u extension string (for special files)
- * @dotu: 9p2000.u enabled flag
- *
- * Note: Plan 9 create semantics include opening the resulting file
- * which is why mode is included.
- */
-
-struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
-	char *extension, int dotu)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* fid[4] name[s] perm[4] mode[1] */
-	size = 4 + 2 + strlen(name) + 4 + 1;
-	if (dotu) {
-		size += 2 +			/* extension[s] */
-		    (extension == NULL ? 0 : strlen(extension));
-	}
-
-	fc = p9_create_common(bufp, size, P9_TCREATE);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tcreate.fid);
-	p9_put_str(bufp, name, &fc->params.tcreate.name);
-	p9_put_int32(bufp, perm, &fc->params.tcreate.perm);
-	p9_put_int8(bufp, mode, &fc->params.tcreate.mode);
-	if (dotu)
-		p9_put_str(bufp, extension, &fc->params.tcreate.extension);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tcreate);
-
-/**
- * p9_create_tread - allocates and creates a T_READ request
- * @fid: handle of the file we are trying to read
- * @offset: offset to start reading from
- * @count: how many bytes to read
- */
-
-struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4 + 8 + 4;	/* fid[4] offset[8] count[4] */
-	fc = p9_create_common(bufp, size, P9_TREAD);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tread.fid);
-	p9_put_int64(bufp, offset, &fc->params.tread.offset);
-	p9_put_int32(bufp, count, &fc->params.tread.count);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tread);
-
-/**
- * p9_create_twrite - allocates and creates a T_WRITE request from the kernel
- * @fid: handle of the file we are trying to write
- * @offset: offset to start writing at
- * @count: how many bytes to write
- * @data: data to write
- *
- * This function will create a requst with data buffers from the kernel
- * such as the page cache.
- */
-
-struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count,
-				      const char *data)
-{
-	int size, err;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* fid[4] offset[8] count[4] data[count] */
-	size = 4 + 8 + 4 + count;
-	fc = p9_create_common(bufp, size, P9_TWRITE);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.twrite.fid);
-	p9_put_int64(bufp, offset, &fc->params.twrite.offset);
-	p9_put_int32(bufp, count, &fc->params.twrite.count);
-	err = p9_put_data(bufp, data, count, &fc->params.twrite.data);
-	if (err) {
-		kfree(fc);
-		fc = ERR_PTR(err);
-		goto error;
-	}
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_twrite);
-
-/**
- * p9_create_twrite_u - allocates and creates a T_WRITE request from userspace
- * @fid: handle of the file we are trying to write
- * @offset: offset to start writing at
- * @count: how many bytes to write
- * @data: data to write
- *
- * This function will create a request with data buffers from userspace
- */
-
-struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count,
-				      const char __user *data)
-{
-	int size, err;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	/* fid[4] offset[8] count[4] data[count] */
-	size = 4 + 8 + 4 + count;
-	fc = p9_create_common(bufp, size, P9_TWRITE);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.twrite.fid);
-	p9_put_int64(bufp, offset, &fc->params.twrite.offset);
-	p9_put_int32(bufp, count, &fc->params.twrite.count);
-	err = p9_put_user_data(bufp, data, count, &fc->params.twrite.data);
-	if (err) {
-		kfree(fc);
-		fc = ERR_PTR(err);
-		goto error;
-	}
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_twrite_u);
-
-/**
- * p9_create_tclunk - allocate a request to forget about a file handle
- * @fid: handle of the file we closing or forgetting about
- *
- * clunk is used both to close open files and to discard transient handles
- * which may be created during meta-data operations and hierarchy traversal.
- */
-
-struct p9_fcall *p9_create_tclunk(u32 fid)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4;		/* fid[4] */
-	fc = p9_create_common(bufp, size, P9_TCLUNK);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tclunk.fid);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tclunk);
-
-/**
- * p9_create_tremove - allocate and create a request to remove a file
- * @fid: handle of the file or directory we are removing
- *
- */
-
-struct p9_fcall *p9_create_tremove(u32 fid)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4;		/* fid[4] */
-	fc = p9_create_common(bufp, size, P9_TREMOVE);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tremove.fid);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tremove);
-
-/**
- * p9_create_tstat - allocate and populate a request for attributes
- * @fid: handle of the file or directory we are trying to get the attributes of
- *
- */
-
-struct p9_fcall *p9_create_tstat(u32 fid)
-{
-	int size;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	size = 4;		/* fid[4] */
-	fc = p9_create_common(bufp, size, P9_TSTAT);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.tstat.fid);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_tstat);
-
-/**
- * p9_create_tstat - allocate and populate a request to change attributes
- * @fid: handle of the file or directory we are trying to change
- * @wstat: &p9_stat structure with attributes we wish to set
- * @dotu: 9p2000.u enabled flag
- *
- */
-
-struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat,
-				      int dotu)
-{
-	int size, statsz;
-	struct p9_fcall *fc;
-	struct cbuf buffer;
-	struct cbuf *bufp = &buffer;
-
-	statsz = p9_size_wstat(wstat, dotu);
-	size = 4 + 2 + 2 + statsz;	/* fid[4] stat[n] */
-	fc = p9_create_common(bufp, size, P9_TWSTAT);
-	if (IS_ERR(fc))
-		goto error;
-
-	p9_put_int32(bufp, fid, &fc->params.twstat.fid);
-	buf_put_int16(bufp, statsz + 2);
-	p9_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, dotu);
-
-	if (buf_check_overflow(bufp)) {
-		kfree(fc);
-		fc = ERR_PTR(-ENOMEM);
-	}
-error:
-	return fc;
-}
-EXPORT_SYMBOL(p9_create_twstat);
-
diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c
deleted file mode 100644
index 53dd8e28dd8a..000000000000
--- a/net/9p/fcprint.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- *  net/9p/fcprint.c
- *
- *  Print 9P call.
- *
- *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to:
- *  Free Software Foundation
- *  51 Franklin Street, Fifth Floor
- *  Boston, MA  02111-1301  USA
- *
- */
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/idr.h>
-#include <net/9p/9p.h>
-
-#ifdef CONFIG_NET_9P_DEBUG
-
-static int
-p9_printqid(char *buf, int buflen, struct p9_qid *q)
-{
-	int n;
-	char b[10];
-
-	n = 0;
-	if (q->type & P9_QTDIR)
-		b[n++] = 'd';
-	if (q->type & P9_QTAPPEND)
-		b[n++] = 'a';
-	if (q->type & P9_QTAUTH)
-		b[n++] = 'A';
-	if (q->type & P9_QTEXCL)
-		b[n++] = 'l';
-	if (q->type & P9_QTTMP)
-		b[n++] = 't';
-	if (q->type & P9_QTSYMLINK)
-		b[n++] = 'L';
-	b[n] = '\0';
-
-	return scnprintf(buf, buflen, "(%.16llx %x %s)",
-					(long long int) q->path, q->version, b);
-}
-
-static int
-p9_printperm(char *buf, int buflen, int perm)
-{
-	int n;
-	char b[15];
-
-	n = 0;
-	if (perm & P9_DMDIR)
-		b[n++] = 'd';
-	if (perm & P9_DMAPPEND)
-		b[n++] = 'a';
-	if (perm & P9_DMAUTH)
-		b[n++] = 'A';
-	if (perm & P9_DMEXCL)
-		b[n++] = 'l';
-	if (perm & P9_DMTMP)
-		b[n++] = 't';
-	if (perm & P9_DMDEVICE)
-		b[n++] = 'D';
-	if (perm & P9_DMSOCKET)
-		b[n++] = 'S';
-	if (perm & P9_DMNAMEDPIPE)
-		b[n++] = 'P';
-	if (perm & P9_DMSYMLINK)
-		b[n++] = 'L';
-	b[n] = '\0';
-
-	return scnprintf(buf, buflen, "%s%03o", b, perm&077);
-}
-
-static int
-p9_printstat(char *buf, int buflen, struct p9_stat *st, int extended)
-{
-	int n;
-
-	n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len,
-		st->name.str, st->uid.len, st->uid.str);
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid);
-
-	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str);
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid);
-
-	n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str);
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid);
-
-	n += scnprintf(buf+n, buflen-n, " q ");
-	n += p9_printqid(buf+n, buflen-n, &st->qid);
-	n += scnprintf(buf+n, buflen-n, " m ");
-	n += p9_printperm(buf+n, buflen-n, st->mode);
-	n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld",
-		st->atime, st->mtime, (long long int) st->length);
-
-	if (extended)
-		n += scnprintf(buf+n, buflen-n, " ext '%.*s'",
-			st->extension.len, st->extension.str);
-
-	return n;
-}
-
-static int
-p9_dumpdata(char *buf, int buflen, u8 *data, int datalen)
-{
-	int i, n;
-
-	i = n = 0;
-	while (i < datalen) {
-		n += scnprintf(buf + n, buflen - n, "%02x", data[i]);
-		if (i%4 == 3)
-			n += scnprintf(buf + n, buflen - n, " ");
-		if (i%32 == 31)
-			n += scnprintf(buf + n, buflen - n, "\n");
-
-		i++;
-	}
-	n += scnprintf(buf + n, buflen - n, "\n");
-
-	return n;
-}
-
-static int
-p9_printdata(char *buf, int buflen, u8 *data, int datalen)
-{
-	return p9_dumpdata(buf, buflen, data, datalen < 16?datalen:16);
-}
-
-/**
- * p9_printfcall - decode and print a protocol structure into a buffer
- * @buf: buffer to deposit decoded structure into
- * @buflen: available space in buffer
- * @fc: protocol rpc structure of type &p9_fcall
- * @extended: whether or not session is operating with extended protocol
- */
-
-int
-p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended)
-{
-	int i, ret, type, tag;
-
-	if (!fc)
-		return scnprintf(buf, buflen, "<NULL>");
-
-	type = fc->id;
-	tag = fc->tag;
-
-	ret = 0;
-	switch (type) {
-	case P9_TVERSION:
-		ret += scnprintf(buf+ret, buflen-ret,
-				"Tversion tag %u msize %u version '%.*s'", tag,
-				fc->params.tversion.msize,
-				fc->params.tversion.version.len,
-				fc->params.tversion.version.str);
-		break;
-
-	case P9_RVERSION:
-		ret += scnprintf(buf+ret, buflen-ret,
-				"Rversion tag %u msize %u version '%.*s'", tag,
-				fc->params.rversion.msize,
-				fc->params.rversion.version.len,
-				fc->params.rversion.version.str);
-		break;
-
-	case P9_TAUTH:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag,
-			fc->params.tauth.afid, fc->params.tauth.uname.len,
-			fc->params.tauth.uname.str, fc->params.tauth.aname.len,
-			fc->params.tauth.aname.str);
-		break;
-
-	case P9_RAUTH:
-		ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag);
-		p9_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid);
-		break;
-
-	case P9_TATTACH:
-		ret += scnprintf(buf+ret, buflen-ret,
-		 "Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'", tag,
-		 fc->params.tattach.fid, fc->params.tattach.afid,
-		 fc->params.tattach.uname.len, fc->params.tattach.uname.str,
-		 fc->params.tattach.aname.len, fc->params.tattach.aname.str);
-		break;
-
-	case P9_RATTACH:
-		ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ",
-									tag);
-		p9_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid);
-		break;
-
-	case P9_RERROR:
-		ret += scnprintf(buf+ret, buflen-ret,
-				"Rerror tag %u ename '%.*s'", tag,
-				fc->params.rerror.error.len,
-				fc->params.rerror.error.str);
-		if (extended)
-			ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n",
-				fc->params.rerror.errno);
-		break;
-
-	case P9_TFLUSH:
-		ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u",
-			tag, fc->params.tflush.oldtag);
-		break;
-
-	case P9_RFLUSH:
-		ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag);
-		break;
-
-	case P9_TWALK:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Twalk tag %u fid %d newfid %d nwname %d", tag,
-			fc->params.twalk.fid, fc->params.twalk.newfid,
-			fc->params.twalk.nwname);
-		for (i = 0; i < fc->params.twalk.nwname; i++)
-			ret += scnprintf(buf+ret, buflen-ret, " '%.*s'",
-				fc->params.twalk.wnames[i].len,
-				fc->params.twalk.wnames[i].str);
-		break;
-
-	case P9_RWALK:
-		ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d",
-			tag, fc->params.rwalk.nwqid);
-		for (i = 0; i < fc->params.rwalk.nwqid; i++)
-			ret += p9_printqid(buf+ret, buflen-ret,
-				&fc->params.rwalk.wqids[i]);
-		break;
-
-	case P9_TOPEN:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Topen tag %u fid %d mode %d", tag,
-			fc->params.topen.fid, fc->params.topen.mode);
-		break;
-
-	case P9_ROPEN:
-		ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag);
-		ret += p9_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid);
-		ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
-			fc->params.ropen.iounit);
-		break;
-
-	case P9_TCREATE:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tcreate tag %u fid %d name '%.*s' perm ", tag,
-			fc->params.tcreate.fid, fc->params.tcreate.name.len,
-			fc->params.tcreate.name.str);
-
-		ret += p9_printperm(buf+ret, buflen-ret,
-						fc->params.tcreate.perm);
-		ret += scnprintf(buf+ret, buflen-ret, " mode %d",
-			fc->params.tcreate.mode);
-		break;
-
-	case P9_RCREATE:
-		ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag);
-		ret += p9_printqid(buf+ret, buflen-ret,
-						&fc->params.rcreate.qid);
-		ret += scnprintf(buf+ret, buflen-ret, " iounit %d",
-			fc->params.rcreate.iounit);
-		break;
-
-	case P9_TREAD:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Tread tag %u fid %d offset %lld count %u", tag,
-			fc->params.tread.fid,
-			(long long int) fc->params.tread.offset,
-			fc->params.tread.count);
-		break;
-
-	case P9_RREAD:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Rread tag %u count %u data ", tag,
-			fc->params.rread.count);
-		ret += p9_printdata(buf+ret, buflen-ret, fc->params.rread.data,
-			fc->params.rread.count);
-		break;
-
-	case P9_TWRITE:
-		ret += scnprintf(buf+ret, buflen-ret,
-			"Twrite tag %u fid %d offset %lld count %u data ",
-			tag, fc->params.twrite.fid,
-			(long long int) fc->params.twrite.offset,
-			fc->params.twrite.count);
-		ret += p9_printdata(buf+ret, buflen-ret, fc->params.twrite.data,
-			fc->params.twrite.count);
-		break;
-
-	case P9_RWRITE:
-		ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u",
-			tag, fc->params.rwrite.count);
-		break;
-
-	case P9_TCLUNK:
-		ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d",
-			tag, fc->params.tclunk.fid);
-		break;
-
-	case P9_RCLUNK:
-		ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag);
-		break;
-
-	case P9_TREMOVE:
-		ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d",
-			tag, fc->params.tremove.fid);
-		break;
-
-	case P9_RREMOVE:
-		ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag);
-		break;
-
-	case P9_TSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d",
-			tag, fc->params.tstat.fid);
-		break;
-
-	case P9_RSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag);
-		ret += p9_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat,
-			extended);
-		break;
-
-	case P9_TWSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ",
-			tag, fc->params.twstat.fid);
-		ret += p9_printstat(buf+ret, buflen-ret,
-					&fc->params.twstat.stat, extended);
-		break;
-
-	case P9_RWSTAT:
-		ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag);
-		break;
-
-	default:
-		ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type);
-		break;
-	}
-
-	return ret;
-}
-#else
-int
-p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended)
-{
-	return 0;
-}
-#endif /* CONFIG_NET_9P_DEBUG */
-EXPORT_SYMBOL(p9_printfcall);
-
diff --git a/net/9p/mod.c b/net/9p/mod.c
index 1084feb24cb0..cf8a4128cd5c 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -29,6 +29,7 @@
 #include <net/9p/9p.h>
 #include <linux/fs.h>
 #include <linux/parser.h>
+#include <net/9p/client.h>
 #include <net/9p/transport.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
new file mode 100644
index 000000000000..29be52439086
--- /dev/null
+++ b/net/9p/protocol.c
@@ -0,0 +1,558 @@
+/*
+ * net/9p/protocol.c
+ *
+ * 9P Protocol Support Code
+ *
+ *  Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  Base on code from Anthony Liguori <aliguori@us.ibm.com>
+ *  Copyright (C) 2008 by IBM, Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include "protocol.h"
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#ifndef offset_of
+#define offset_of(type, memb) \
+	((unsigned long)(&((type *)0)->memb))
+#endif
+#ifndef container_of
+#define container_of(obj, type, memb) \
+	((type *)(((char *)obj) - offset_of(type, memb)))
+#endif
+
+static int
+p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+
+void
+p9pdu_dump(int way, struct p9_fcall *pdu)
+{
+	int i, n;
+	u8 *data = pdu->sdata;
+	int datalen = pdu->size;
+	char buf[255];
+	int buflen = 255;
+
+	i = n = 0;
+	if (datalen > (buflen-16))
+		datalen = buflen-16;
+	while (i < datalen) {
+		n += scnprintf(buf + n, buflen - n, "%02x ", data[i]);
+		if (i%4 == 3)
+			n += scnprintf(buf + n, buflen - n, " ");
+		if (i%32 == 31)
+			n += scnprintf(buf + n, buflen - n, "\n");
+
+		i++;
+	}
+	n += scnprintf(buf + n, buflen - n, "\n");
+
+	if (way)
+		P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf);
+	else
+		P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf);
+}
+EXPORT_SYMBOL(p9pdu_dump);
+
+void p9stat_free(struct p9_wstat *stbuf)
+{
+	kfree(stbuf->name);
+	kfree(stbuf->uid);
+	kfree(stbuf->gid);
+	kfree(stbuf->muid);
+	kfree(stbuf->extension);
+}
+EXPORT_SYMBOL(p9stat_free);
+
+static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
+{
+	size_t len = MIN(pdu->size - pdu->offset, size);
+	memcpy(data, &pdu->sdata[pdu->offset], len);
+	pdu->offset += len;
+	return size - len;
+}
+
+static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
+{
+	size_t len = MIN(pdu->capacity - pdu->size, size);
+	memcpy(&pdu->sdata[pdu->size], data, len);
+	pdu->size += len;
+	return size - len;
+}
+
+static size_t
+pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
+{
+	size_t len = MIN(pdu->capacity - pdu->size, size);
+	int err = copy_from_user(&pdu->sdata[pdu->size], udata, len);
+	if (err)
+		printk(KERN_WARNING "pdu_write_u returning: %d\n", err);
+
+	pdu->size += len;
+	return size - len;
+}
+
+/*
+	b - int8_t
+	w - int16_t
+	d - int32_t
+	q - int64_t
+	s - string
+	S - stat
+	Q - qid
+	D - data blob (int32_t size followed by void *, results are not freed)
+	T - array of strings (int16_t count, followed by strings)
+	R - array of qids (int16_t count, followed by qids)
+	? - if optional = 1, continue parsing
+*/
+
+static int
+p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+{
+	const char *ptr;
+	int errcode = 0;
+
+	for (ptr = fmt; *ptr; ptr++) {
+		switch (*ptr) {
+		case 'b':{
+				int8_t *val = va_arg(ap, int8_t *);
+				if (pdu_read(pdu, val, sizeof(*val))) {
+					errcode = -EFAULT;
+					break;
+				}
+			}
+			break;
+		case 'w':{
+				int16_t *val = va_arg(ap, int16_t *);
+				if (pdu_read(pdu, val, sizeof(*val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*val = cpu_to_le16(*val);
+			}
+			break;
+		case 'd':{
+				int32_t *val = va_arg(ap, int32_t *);
+				if (pdu_read(pdu, val, sizeof(*val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*val = cpu_to_le32(*val);
+			}
+			break;
+		case 'q':{
+				int64_t *val = va_arg(ap, int64_t *);
+				if (pdu_read(pdu, val, sizeof(*val))) {
+					errcode = -EFAULT;
+					break;
+				}
+				*val = cpu_to_le64(*val);
+			}
+			break;
+		case 's':{
+				char **ptr = va_arg(ap, char **);
+				int16_t len;
+				int size;
+
+				errcode = p9pdu_readf(pdu, optional, "w", &len);
+				if (errcode)
+					break;
+
+				size = MAX(len, 0);
+
+				*ptr = kmalloc(size + 1, GFP_KERNEL);
+				if (*ptr == NULL) {
+					errcode = -EFAULT;
+					break;
+				}
+				if (pdu_read(pdu, *ptr, size)) {
+					errcode = -EFAULT;
+					kfree(*ptr);
+					*ptr = NULL;
+				} else
+					(*ptr)[size] = 0;
+			}
+			break;
+		case 'Q':{
+				struct p9_qid *qid =
+				    va_arg(ap, struct p9_qid *);
+
+				errcode = p9pdu_readf(pdu, optional, "bdq",
+						      &qid->type, &qid->version,
+						      &qid->path);
+			}
+			break;
+		case 'S':{
+				struct p9_wstat *stbuf =
+				    va_arg(ap, struct p9_wstat *);
+
+				memset(stbuf, 0, sizeof(struct p9_wstat));
+				stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
+									-1;
+				errcode =
+				    p9pdu_readf(pdu, optional,
+						"wwdQdddqssss?sddd",
+						&stbuf->size, &stbuf->type,
+						&stbuf->dev, &stbuf->qid,
+						&stbuf->mode, &stbuf->atime,
+						&stbuf->mtime, &stbuf->length,
+						&stbuf->name, &stbuf->uid,
+						&stbuf->gid, &stbuf->muid,
+						&stbuf->extension,
+						&stbuf->n_uid, &stbuf->n_gid,
+						&stbuf->n_muid);
+				if (errcode)
+					p9stat_free(stbuf);
+			}
+			break;
+		case 'D':{
+				int32_t *count = va_arg(ap, int32_t *);
+				void **data = va_arg(ap, void **);
+
+				errcode =
+				    p9pdu_readf(pdu, optional, "d", count);
+				if (!errcode) {
+					*count =
+					    MIN(*count,
+						pdu->size - pdu->offset);
+					*data = &pdu->sdata[pdu->offset];
+				}
+			}
+			break;
+		case 'T':{
+				int16_t *nwname = va_arg(ap, int16_t *);
+				char ***wnames = va_arg(ap, char ***);
+
+				errcode =
+				    p9pdu_readf(pdu, optional, "w", nwname);
+				if (!errcode) {
+					*wnames =
+					    kmalloc(sizeof(char *) * *nwname,
+						    GFP_KERNEL);
+					if (!*wnames)
+						errcode = -ENOMEM;
+				}
+
+				if (!errcode) {
+					int i;
+
+					for (i = 0; i < *nwname; i++) {
+						errcode =
+						    p9pdu_readf(pdu, optional,
+								"s",
+								&(*wnames)[i]);
+						if (errcode)
+							break;
+					}
+				}
+
+				if (errcode) {
+					if (*wnames) {
+						int i;
+
+						for (i = 0; i < *nwname; i++)
+							kfree((*wnames)[i]);
+					}
+					kfree(*wnames);
+					*wnames = NULL;
+				}
+			}
+			break;
+		case 'R':{
+				int16_t *nwqid = va_arg(ap, int16_t *);
+				struct p9_qid **wqids =
+				    va_arg(ap, struct p9_qid **);
+
+				*wqids = NULL;
+
+				errcode =
+				    p9pdu_readf(pdu, optional, "w", nwqid);
+				if (!errcode) {
+					*wqids =
+					    kmalloc(*nwqid *
+						    sizeof(struct p9_qid),
+						    GFP_KERNEL);
+					if (*wqids == NULL)
+						errcode = -ENOMEM;
+				}
+
+				if (!errcode) {
+					int i;
+
+					for (i = 0; i < *nwqid; i++) {
+						errcode =
+						    p9pdu_readf(pdu, optional,
+								"Q",
+								&(*wqids)[i]);
+						if (errcode)
+							break;
+					}
+				}
+
+				if (errcode) {
+					kfree(*wqids);
+					*wqids = NULL;
+				}
+			}
+			break;
+		case '?':
+			if (!optional)
+				return 0;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		if (errcode)
+			break;
+	}
+
+	return errcode;
+}
+
+int
+p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+{
+	const char *ptr;
+	int errcode = 0;
+
+	for (ptr = fmt; *ptr; ptr++) {
+		switch (*ptr) {
+		case 'b':{
+				int8_t val = va_arg(ap, int);
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'w':{
+				int16_t val = va_arg(ap, int);
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'd':{
+				int32_t val = va_arg(ap, int32_t);
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'q':{
+				int64_t val = va_arg(ap, int64_t);
+				if (pdu_write(pdu, &val, sizeof(val)))
+					errcode = -EFAULT;
+			}
+			break;
+		case 's':{
+				const char *ptr = va_arg(ap, const char *);
+				int16_t len = 0;
+				if (ptr)
+					len = MIN(strlen(ptr), USHORT_MAX);
+
+				errcode = p9pdu_writef(pdu, optional, "w", len);
+				if (!errcode && pdu_write(pdu, ptr, len))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'Q':{
+				const struct p9_qid *qid =
+				    va_arg(ap, const struct p9_qid *);
+				errcode =
+				    p9pdu_writef(pdu, optional, "bdq",
+						 qid->type, qid->version,
+						 qid->path);
+			} break;
+		case 'S':{
+				const struct p9_wstat *stbuf =
+				    va_arg(ap, const struct p9_wstat *);
+				errcode =
+				    p9pdu_writef(pdu, optional,
+						 "wwdQdddqssss?sddd",
+						 stbuf->size, stbuf->type,
+						 stbuf->dev, &stbuf->qid,
+						 stbuf->mode, stbuf->atime,
+						 stbuf->mtime, stbuf->length,
+						 stbuf->name, stbuf->uid,
+						 stbuf->gid, stbuf->muid,
+						 stbuf->extension, stbuf->n_uid,
+						 stbuf->n_gid, stbuf->n_muid);
+			} break;
+		case 'D':{
+				int32_t count = va_arg(ap, int32_t);
+				const void *data = va_arg(ap, const void *);
+
+				errcode =
+				    p9pdu_writef(pdu, optional, "d", count);
+				if (!errcode && pdu_write(pdu, data, count))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'U':{
+				int32_t count = va_arg(ap, int32_t);
+				const char __user *udata =
+						va_arg(ap, const void *);
+				errcode =
+				    p9pdu_writef(pdu, optional, "d", count);
+				if (!errcode && pdu_write_u(pdu, udata, count))
+					errcode = -EFAULT;
+			}
+			break;
+		case 'T':{
+				int16_t nwname = va_arg(ap, int);
+				const char **wnames = va_arg(ap, const char **);
+
+				errcode =
+				    p9pdu_writef(pdu, optional, "w", nwname);
+				if (!errcode) {
+					int i;
+
+					for (i = 0; i < nwname; i++) {
+						errcode =
+						    p9pdu_writef(pdu, optional,
+								 "s",
+								 wnames[i]);
+						if (errcode)
+							break;
+					}
+				}
+			}
+			break;
+		case 'R':{
+				int16_t nwqid = va_arg(ap, int);
+				struct p9_qid *wqids =
+				    va_arg(ap, struct p9_qid *);
+
+				errcode =
+				    p9pdu_writef(pdu, optional, "w", nwqid);
+				if (!errcode) {
+					int i;
+
+					for (i = 0; i < nwqid; i++) {
+						errcode =
+						    p9pdu_writef(pdu, optional,
+								 "Q",
+								 &wqids[i]);
+						if (errcode)
+							break;
+					}
+				}
+			}
+			break;
+		case '?':
+			if (!optional)
+				return 0;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		if (errcode)
+			break;
+	}
+
+	return errcode;
+}
+
+int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, fmt);
+	ret = p9pdu_vreadf(pdu, optional, fmt, ap);
+	va_end(ap);
+
+	return ret;
+}
+
+static int
+p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, fmt);
+	ret = p9pdu_vwritef(pdu, optional, fmt, ap);
+	va_end(ap);
+
+	return ret;
+}
+
+int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu)
+{
+	struct p9_fcall fake_pdu;
+	int ret;
+
+	fake_pdu.size = len;
+	fake_pdu.capacity = len;
+	fake_pdu.sdata = buf;
+	fake_pdu.offset = 0;
+
+	ret = p9pdu_readf(&fake_pdu, dotu, "S", st);
+	if (ret) {
+		P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
+		p9pdu_dump(1, &fake_pdu);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(p9stat_read);
+
+int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
+{
+	return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
+}
+
+int p9pdu_finalize(struct p9_fcall *pdu)
+{
+	int size = pdu->size;
+	int err;
+
+	pdu->size = 0;
+	err = p9pdu_writef(pdu, 0, "d", size);
+	pdu->size = size;
+
+	if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT)
+		p9pdu_dump(0, pdu);
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size,
+							pdu->id, pdu->tag);
+
+	return err;
+}
+
+void p9pdu_reset(struct p9_fcall *pdu)
+{
+	pdu->offset = 0;
+	pdu->size = 0;
+}
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
new file mode 100644
index 000000000000..ccde462e7ac5
--- /dev/null
+++ b/net/9p/protocol.h
@@ -0,0 +1,34 @@
+/*
+ * net/9p/protocol.h
+ *
+ * 9P Protocol Support Code
+ *
+ *  Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  Base on code from Anthony Liguori <aliguori@us.ibm.com>
+ *  Copyright (C) 2008 by IBM, Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+int
+p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap);
+int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
+int p9pdu_finalize(struct p9_fcall *pdu);
+void p9pdu_dump(int, struct p9_fcall *);
+void p9pdu_reset(struct p9_fcall *pdu);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index d652baf5ff91..be65d8242fd2 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -39,12 +39,11 @@
 #include <linux/file.h>
 #include <linux/parser.h>
 #include <net/9p/9p.h>
+#include <net/9p/client.h>
 #include <net/9p/transport.h>
 
 #define P9_PORT 564
 #define MAX_SOCK_BUF (64*1024)
-#define ERREQFLUSH	1
-#define SCHED_TIMEOUT	10
 #define MAXPOLLWADDR	2
 
 /**
@@ -61,7 +60,6 @@ struct p9_fd_opts {
 	u16 port;
 };
 
-
 /**
  * struct p9_trans_fd - transport state
  * @rd: reference to file to read from
@@ -86,7 +84,7 @@ enum {
 	Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_port, "port=%u"},
 	{Opt_rfdno, "rfdno=%u"},
 	{Opt_wfdno, "wfdno=%u"},
@@ -100,60 +98,22 @@ enum {
 	Wpending = 8,		/* can write */
 };
 
-enum {
-	None,
-	Flushing,
-	Flushed,
-};
-
-struct p9_req;
-typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a);
-
-/**
- * struct p9_req - fd mux encoding of an rpc transaction
- * @lock: protects req_list
- * @tag: numeric tag for rpc transaction
- * @tcall: request &p9_fcall structure
- * @rcall: response &p9_fcall structure
- * @err: error state
- * @cb: callback for when response is received
- * @cba: argument to pass to callback
- * @flush: flag to indicate RPC has been flushed
- * @req_list: list link for higher level objects to chain requests
- *
- */
-
-struct p9_req {
-	spinlock_t lock;
-	int tag;
-	struct p9_fcall *tcall;
-	struct p9_fcall *rcall;
-	int err;
-	p9_conn_req_callback cb;
-	void *cba;
-	int flush;
-	struct list_head req_list;
-};
-
-struct p9_mux_poll_task {
-	struct task_struct *task;
-	struct list_head mux_list;
-	int muxnum;
+struct p9_poll_wait {
+	struct p9_conn *conn;
+	wait_queue_t wait;
+	wait_queue_head_t *wait_addr;
 };
 
 /**
  * struct p9_conn - fd mux connection state information
- * @lock: protects mux_list (?)
  * @mux_list: list link for mux to manage multiple connections (?)
- * @poll_task: task polling on this connection
- * @msize: maximum size for connection (dup)
- * @extended: 9p2000.u flag (dup)
- * @trans: reference to transport instance for this connection
- * @tagpool: id accounting for transactions
+ * @client: reference to client instance for this connection
  * @err: error state
  * @req_list: accounting for requests which have been sent
  * @unsent_req_list: accounting for requests that haven't been sent
- * @rcall: current response &p9_fcall structure
+ * @req: current request being processed (if any)
+ * @tmp_buf: temporary buffer to read in header
+ * @rsize: amount to read for current frame
  * @rpos: read position in current frame
  * @rbuf: current read buffer
  * @wpos: write position for current frame
@@ -169,409 +129,300 @@ struct p9_mux_poll_task {
  */
 
 struct p9_conn {
-	spinlock_t lock; /* protect lock structure */
 	struct list_head mux_list;
-	struct p9_mux_poll_task *poll_task;
-	int msize;
-	unsigned char extended;
-	struct p9_trans *trans;
-	struct p9_idpool *tagpool;
+	struct p9_client *client;
 	int err;
 	struct list_head req_list;
 	struct list_head unsent_req_list;
-	struct p9_fcall *rcall;
+	struct p9_req_t *req;
+	char tmp_buf[7];
+	int rsize;
 	int rpos;
 	char *rbuf;
 	int wpos;
 	int wsize;
 	char *wbuf;
-	wait_queue_t poll_wait[MAXPOLLWADDR];
-	wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
+	struct list_head poll_pending_link;
+	struct p9_poll_wait poll_wait[MAXPOLLWADDR];
 	poll_table pt;
 	struct work_struct rq;
 	struct work_struct wq;
 	unsigned long wsched;
 };
 
-/**
- * struct p9_mux_rpc - fd mux rpc accounting structure
- * @m: connection this request was issued on
- * @err: error state
- * @tcall: request &p9_fcall
- * @rcall: response &p9_fcall
- * @wqueue: wait queue that client is blocked on for this rpc
- *
- * Bug: isn't this information duplicated elsewhere like &p9_req
- */
-
-struct p9_mux_rpc {
-	struct p9_conn *m;
-	int err;
-	struct p9_fcall *tcall;
-	struct p9_fcall *rcall;
-	wait_queue_head_t wqueue;
-};
-
-static int p9_poll_proc(void *);
-static void p9_read_work(struct work_struct *work);
-static void p9_write_work(struct work_struct *work);
-static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address,
-								poll_table *p);
-static int p9_fd_write(struct p9_trans *trans, void *v, int len);
-static int p9_fd_read(struct p9_trans *trans, void *v, int len);
-
-static DEFINE_MUTEX(p9_mux_task_lock);
+static DEFINE_SPINLOCK(p9_poll_lock);
+static LIST_HEAD(p9_poll_pending_list);
 static struct workqueue_struct *p9_mux_wq;
+static struct task_struct *p9_poll_task;
 
-static int p9_mux_num;
-static int p9_mux_poll_task_num;
-static struct p9_mux_poll_task p9_mux_poll_tasks[100];
-
-static void p9_conn_destroy(struct p9_conn *);
-static unsigned int p9_fd_poll(struct p9_trans *trans,
-						struct poll_table_struct *pt);
-
-#ifdef P9_NONBLOCK
-static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
-	p9_conn_req_callback cb, void *a);
-#endif /* P9_NONBLOCK */
-
-static void p9_conn_cancel(struct p9_conn *m, int err);
-
-static u16 p9_mux_get_tag(struct p9_conn *m)
+static void p9_mux_poll_stop(struct p9_conn *m)
 {
-	int tag;
+	unsigned long flags;
+	int i;
 
-	tag = p9_idpool_get(m->tagpool);
-	if (tag < 0)
-		return P9_NOTAG;
-	else
-		return (u16) tag;
-}
+	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
+		struct p9_poll_wait *pwait = &m->poll_wait[i];
 
-static void p9_mux_put_tag(struct p9_conn *m, u16 tag)
-{
-	if (tag != P9_NOTAG && p9_idpool_check(tag, m->tagpool))
-		p9_idpool_put(tag, m->tagpool);
+		if (pwait->wait_addr) {
+			remove_wait_queue(pwait->wait_addr, &pwait->wait);
+			pwait->wait_addr = NULL;
+		}
+	}
+
+	spin_lock_irqsave(&p9_poll_lock, flags);
+	list_del_init(&m->poll_pending_link);
+	spin_unlock_irqrestore(&p9_poll_lock, flags);
 }
 
 /**
- * p9_mux_calc_poll_procs - calculates the number of polling procs
- * @muxnum: number of mounts
+ * p9_conn_cancel - cancel all pending requests with error
+ * @m: mux data
+ * @err: error code
  *
- * Calculation is based on the number of mounted v9fs filesystems.
- * The current implementation returns sqrt of the number of mounts.
  */
 
-static int p9_mux_calc_poll_procs(int muxnum)
+static void p9_conn_cancel(struct p9_conn *m, int err)
 {
-	int n;
-
-	if (p9_mux_poll_task_num)
-		n = muxnum / p9_mux_poll_task_num +
-		    (muxnum % p9_mux_poll_task_num ? 1 : 0);
-	else
-		n = 1;
-
-	if (n > ARRAY_SIZE(p9_mux_poll_tasks))
-		n = ARRAY_SIZE(p9_mux_poll_tasks);
-
-	return n;
-}
+	struct p9_req_t *req, *rtmp;
+	unsigned long flags;
+	LIST_HEAD(cancel_list);
 
-static int p9_mux_poll_start(struct p9_conn *m)
-{
-	int i, n;
-	struct p9_mux_poll_task *vpt, *vptlast;
-	struct task_struct *pproc;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, p9_mux_num,
-		p9_mux_poll_task_num);
-	mutex_lock(&p9_mux_task_lock);
-
-	n = p9_mux_calc_poll_procs(p9_mux_num + 1);
-	if (n > p9_mux_poll_task_num) {
-		for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) {
-			if (p9_mux_poll_tasks[i].task == NULL) {
-				vpt = &p9_mux_poll_tasks[i];
-				P9_DPRINTK(P9_DEBUG_MUX, "create proc %p\n",
-									vpt);
-				pproc = kthread_create(p9_poll_proc, vpt,
-								"v9fs-poll");
-
-				if (!IS_ERR(pproc)) {
-					vpt->task = pproc;
-					INIT_LIST_HEAD(&vpt->mux_list);
-					vpt->muxnum = 0;
-					p9_mux_poll_task_num++;
-					wake_up_process(vpt->task);
-				}
-				break;
-			}
-		}
+	P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
 
-		if (i >= ARRAY_SIZE(p9_mux_poll_tasks))
-			P9_DPRINTK(P9_DEBUG_ERROR,
-					"warning: no free poll slots\n");
-	}
+	spin_lock_irqsave(&m->client->lock, flags);
 
-	n = (p9_mux_num + 1) / p9_mux_poll_task_num +
-	    ((p9_mux_num + 1) % p9_mux_poll_task_num ? 1 : 0);
-
-	vptlast = NULL;
-	for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) {
-		vpt = &p9_mux_poll_tasks[i];
-		if (vpt->task != NULL) {
-			vptlast = vpt;
-			if (vpt->muxnum < n) {
-				P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i);
-				list_add(&m->mux_list, &vpt->mux_list);
-				vpt->muxnum++;
-				m->poll_task = vpt;
-				memset(&m->poll_waddr, 0,
-							sizeof(m->poll_waddr));
-				init_poll_funcptr(&m->pt, p9_pollwait);
-				break;
-			}
-		}
+	if (m->err) {
+		spin_unlock_irqrestore(&m->client->lock, flags);
+		return;
 	}
 
-	if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) {
-		if (vptlast == NULL) {
-			mutex_unlock(&p9_mux_task_lock);
-			return -ENOMEM;
-		}
+	m->err = err;
 
-		P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i);
-		list_add(&m->mux_list, &vptlast->mux_list);
-		vptlast->muxnum++;
-		m->poll_task = vptlast;
-		memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
-		init_poll_funcptr(&m->pt, p9_pollwait);
+	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
+		req->status = REQ_STATUS_ERROR;
+		if (!req->t_err)
+			req->t_err = err;
+		list_move(&req->req_list, &cancel_list);
 	}
-
-	p9_mux_num++;
-	mutex_unlock(&p9_mux_task_lock);
-
-	return 0;
-}
-
-static void p9_mux_poll_stop(struct p9_conn *m)
-{
-	int i;
-	struct p9_mux_poll_task *vpt;
-
-	mutex_lock(&p9_mux_task_lock);
-	vpt = m->poll_task;
-	list_del(&m->mux_list);
-	for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
-		if (m->poll_waddr[i] != NULL) {
-			remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
-			m->poll_waddr[i] = NULL;
-		}
+	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
+		req->status = REQ_STATUS_ERROR;
+		if (!req->t_err)
+			req->t_err = err;
+		list_move(&req->req_list, &cancel_list);
 	}
-	vpt->muxnum--;
-	if (!vpt->muxnum) {
-		P9_DPRINTK(P9_DEBUG_MUX, "destroy proc %p\n", vpt);
-		kthread_stop(vpt->task);
-		vpt->task = NULL;
-		p9_mux_poll_task_num--;
+	spin_unlock_irqrestore(&m->client->lock, flags);
+
+	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
+		list_del(&req->req_list);
+		P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req);
+		p9_client_cb(m->client, req);
 	}
-	p9_mux_num--;
-	mutex_unlock(&p9_mux_task_lock);
 }
 
-/**
- * p9_conn_create - allocate and initialize the per-session mux data
- * @trans: transport structure
- *
- * Note: Creates the polling task if this is the first session.
- */
-
-static struct p9_conn *p9_conn_create(struct p9_trans *trans)
+static unsigned int
+p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
 {
-	int i, n;
-	struct p9_conn *m;
+	int ret, n;
+	struct p9_trans_fd *ts = NULL;
 
-	P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans,
-								trans->msize);
-	m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
-	if (!m)
-		return ERR_PTR(-ENOMEM);
+	if (client && client->status == Connected)
+		ts = client->trans;
 
-	spin_lock_init(&m->lock);
-	INIT_LIST_HEAD(&m->mux_list);
-	m->msize = trans->msize;
-	m->extended = trans->extended;
-	m->trans = trans;
-	m->tagpool = p9_idpool_create();
-	if (IS_ERR(m->tagpool)) {
-		kfree(m);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (!ts)
+		return -EREMOTEIO;
 
-	INIT_LIST_HEAD(&m->req_list);
-	INIT_LIST_HEAD(&m->unsent_req_list);
-	INIT_WORK(&m->rq, p9_read_work);
-	INIT_WORK(&m->wq, p9_write_work);
-	n = p9_mux_poll_start(m);
-	if (n) {
-		kfree(m);
-		return ERR_PTR(n);
-	}
+	if (!ts->rd->f_op || !ts->rd->f_op->poll)
+		return -EIO;
 
-	n = p9_fd_poll(trans, &m->pt);
-	if (n & POLLIN) {
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m);
-		set_bit(Rpending, &m->wsched);
-	}
+	if (!ts->wr->f_op || !ts->wr->f_op->poll)
+		return -EIO;
 
-	if (n & POLLOUT) {
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m);
-		set_bit(Wpending, &m->wsched);
-	}
+	ret = ts->rd->f_op->poll(ts->rd, pt);
+	if (ret < 0)
+		return ret;
 
-	for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
-		if (IS_ERR(m->poll_waddr[i])) {
-			p9_mux_poll_stop(m);
-			kfree(m);
-			return (void *)m->poll_waddr;	/* the error code */
-		}
+	if (ts->rd != ts->wr) {
+		n = ts->wr->f_op->poll(ts->wr, pt);
+		if (n < 0)
+			return n;
+		ret = (ret & ~POLLOUT) | (n & ~POLLIN);
 	}
 
-	return m;
+	return ret;
 }
 
 /**
- * p9_mux_destroy - cancels all pending requests and frees mux resources
- * @m: mux to destroy
+ * p9_fd_read- read from a fd
+ * @client: client instance
+ * @v: buffer to receive data into
+ * @len: size of receive buffer
  *
  */
 
-static void p9_conn_destroy(struct p9_conn *m)
+static int p9_fd_read(struct p9_client *client, void *v, int len)
 {
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m,
-		m->mux_list.prev, m->mux_list.next);
+	int ret;
+	struct p9_trans_fd *ts = NULL;
 
-	p9_mux_poll_stop(m);
-	cancel_work_sync(&m->rq);
-	cancel_work_sync(&m->wq);
+	if (client && client->status != Disconnected)
+		ts = client->trans;
 
-	p9_conn_cancel(m, -ECONNRESET);
+	if (!ts)
+		return -EREMOTEIO;
 
-	m->trans = NULL;
-	p9_idpool_destroy(m->tagpool);
-	kfree(m);
+	if (!(ts->rd->f_flags & O_NONBLOCK))
+		P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n");
+
+	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
+	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
+		client->status = Disconnected;
+	return ret;
 }
 
 /**
- * p9_pollwait - add poll task to the wait queue
- * @filp: file pointer being polled
- * @wait_address: wait_q to block on
- * @p: poll state
+ * p9_read_work - called when there is some data to be read from a transport
+ * @work: container of work to be done
  *
- * called by files poll operation to add v9fs-poll task to files wait queue
  */
 
-static void
-p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
+static void p9_read_work(struct work_struct *work)
 {
-	int i;
+	int n, err;
 	struct p9_conn *m;
 
-	m = container_of(p, struct p9_conn, pt);
-	for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
-		if (m->poll_waddr[i] == NULL)
-			break;
+	m = container_of(work, struct p9_conn, rq);
 
-	if (i >= ARRAY_SIZE(m->poll_waddr)) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n");
+	if (m->err < 0)
 		return;
-	}
 
-	m->poll_waddr[i] = wait_address;
+	P9_DPRINTK(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);
 
-	if (!wait_address) {
-		P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n");
-		m->poll_waddr[i] = ERR_PTR(-EIO);
+	if (!m->rbuf) {
+		m->rbuf = m->tmp_buf;
+		m->rpos = 0;
+		m->rsize = 7; /* start by reading header */
+	}
+
+	clear_bit(Rpending, &m->wsched);
+	P9_DPRINTK(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", m,
+					m->rpos, m->rsize, m->rsize-m->rpos);
+	err = p9_fd_read(m->client, m->rbuf + m->rpos,
+						m->rsize - m->rpos);
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
+	if (err == -EAGAIN) {
+		clear_bit(Rworksched, &m->wsched);
 		return;
 	}
 
-	init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
-	add_wait_queue(wait_address, &m->poll_wait[i]);
-}
+	if (err <= 0)
+		goto error;
 
-/**
- * p9_poll_mux - polls a mux and schedules read or write works if necessary
- * @m: connection to poll
- *
- */
+	m->rpos += err;
 
-static void p9_poll_mux(struct p9_conn *m)
-{
-	int n;
+	if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */
+		u16 tag;
+		P9_DPRINTK(P9_DEBUG_TRANS, "got new header\n");
 
-	if (m->err < 0)
-		return;
+		n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */
+		if (n >= m->client->msize) {
+			P9_DPRINTK(P9_DEBUG_ERROR,
+				"requested packet size too big: %d\n", n);
+			err = -EIO;
+			goto error;
+		}
 
-	n = p9_fd_poll(m->trans, NULL);
-	if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
-		P9_DPRINTK(P9_DEBUG_MUX, "error mux %p err %d\n", m, n);
-		if (n >= 0)
-			n = -ECONNRESET;
-		p9_conn_cancel(m, n);
-	}
+		tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */
+		P9_DPRINTK(P9_DEBUG_TRANS,
+			"mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
 
-	if (n & POLLIN) {
-		set_bit(Rpending, &m->wsched);
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m);
-		if (!test_and_set_bit(Rworksched, &m->wsched)) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m);
-			queue_work(p9_mux_wq, &m->rq);
+		m->req = p9_tag_lookup(m->client, tag);
+		if (!m->req) {
+			P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
+								 tag);
+			err = -EIO;
+			goto error;
 		}
-	}
 
-	if (n & POLLOUT) {
-		set_bit(Wpending, &m->wsched);
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m);
-		if ((m->wsize || !list_empty(&m->unsent_req_list))
-		    && !test_and_set_bit(Wworksched, &m->wsched)) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m);
-			queue_work(p9_mux_wq, &m->wq);
+		if (m->req->rc == NULL) {
+			m->req->rc = kmalloc(sizeof(struct p9_fcall) +
+						m->client->msize, GFP_KERNEL);
+			if (!m->req->rc) {
+				m->req = NULL;
+				err = -ENOMEM;
+				goto error;
+			}
 		}
+		m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall);
+		memcpy(m->rbuf, m->tmp_buf, m->rsize);
+		m->rsize = n;
 	}
+
+	/* not an else because some packets (like clunk) have no payload */
+	if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
+		P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n");
+		spin_lock(&m->client->lock);
+		list_del(&m->req->req_list);
+		spin_unlock(&m->client->lock);
+		p9_client_cb(m->client, m->req);
+
+		m->rbuf = NULL;
+		m->rpos = 0;
+		m->rsize = 0;
+		m->req = NULL;
+	}
+
+	if (!list_empty(&m->req_list)) {
+		if (test_and_clear_bit(Rpending, &m->wsched))
+			n = POLLIN;
+		else
+			n = p9_fd_poll(m->client, NULL);
+
+		if (n & POLLIN) {
+			P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
+			queue_work(p9_mux_wq, &m->rq);
+		} else
+			clear_bit(Rworksched, &m->wsched);
+	} else
+		clear_bit(Rworksched, &m->wsched);
+
+	return;
+error:
+	p9_conn_cancel(m, err);
+	clear_bit(Rworksched, &m->wsched);
 }
 
 /**
- * p9_poll_proc - poll worker thread
- * @a: thread state and arguments
- *
- * polls all v9fs transports for new events and queues the appropriate
- * work to the work queue
+ * p9_fd_write - write to a socket
+ * @client: client instance
+ * @v: buffer to send data from
+ * @len: size of send buffer
  *
  */
 
-static int p9_poll_proc(void *a)
+static int p9_fd_write(struct p9_client *client, void *v, int len)
 {
-	struct p9_conn *m, *mtmp;
-	struct p9_mux_poll_task *vpt;
+	int ret;
+	mm_segment_t oldfs;
+	struct p9_trans_fd *ts = NULL;
 
-	vpt = a;
-	P9_DPRINTK(P9_DEBUG_MUX, "start %p %p\n", current, vpt);
-	while (!kthread_should_stop()) {
-		set_current_state(TASK_INTERRUPTIBLE);
+	if (client && client->status != Disconnected)
+		ts = client->trans;
 
-		list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
-			p9_poll_mux(m);
-		}
+	if (!ts)
+		return -EREMOTEIO;
 
-		P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n");
-		schedule_timeout(SCHED_TIMEOUT * HZ);
-	}
+	if (!(ts->wr->f_flags & O_NONBLOCK))
+		P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n");
 
-	__set_current_state(TASK_RUNNING);
-	P9_DPRINTK(P9_DEBUG_MUX, "finish\n");
-	return 0;
+	oldfs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
+	set_fs(oldfs);
+
+	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
+		client->status = Disconnected;
+	return ret;
 }
 
 /**
@@ -584,7 +435,7 @@ static void p9_write_work(struct work_struct *work)
 {
 	int n, err;
 	struct p9_conn *m;
-	struct p9_req *req;
+	struct p9_req_t *req;
 
 	m = container_of(work, struct p9_conn, wq);
 
@@ -599,25 +450,23 @@ static void p9_write_work(struct work_struct *work)
 			return;
 		}
 
-		spin_lock(&m->lock);
-again:
-		req = list_entry(m->unsent_req_list.next, struct p9_req,
+		spin_lock(&m->client->lock);
+		req = list_entry(m->unsent_req_list.next, struct p9_req_t,
 			       req_list);
+		req->status = REQ_STATUS_SENT;
 		list_move_tail(&req->req_list, &m->req_list);
-		if (req->err == ERREQFLUSH)
-			goto again;
 
-		m->wbuf = req->tcall->sdata;
-		m->wsize = req->tcall->size;
+		m->wbuf = req->tc->sdata;
+		m->wsize = req->tc->size;
 		m->wpos = 0;
-		spin_unlock(&m->lock);
+		spin_unlock(&m->client->lock);
 	}
 
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos,
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos,
 								m->wsize);
 	clear_bit(Wpending, &m->wsched);
-	err = p9_fd_write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos);
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
+	err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos);
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err);
 	if (err == -EAGAIN) {
 		clear_bit(Wworksched, &m->wsched);
 		return;
@@ -638,10 +487,10 @@ again:
 		if (test_and_clear_bit(Wpending, &m->wsched))
 			n = POLLOUT;
 		else
-			n = p9_fd_poll(m->trans, NULL);
+			n = p9_fd_poll(m->client, NULL);
 
 		if (n & POLLOUT) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m);
+			P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
 			queue_work(p9_mux_wq, &m->wq);
 		} else
 			clear_bit(Wworksched, &m->wsched);
@@ -655,504 +504,197 @@ error:
 	clear_bit(Wworksched, &m->wsched);
 }
 
-static void process_request(struct p9_conn *m, struct p9_req *req)
+static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
-	int ecode;
-	struct p9_str *ename;
-
-	if (!req->err && req->rcall->id == P9_RERROR) {
-		ecode = req->rcall->params.rerror.errno;
-		ename = &req->rcall->params.rerror.error;
-
-		P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len,
-								ename->str);
-
-		if (m->extended)
-			req->err = -ecode;
+	struct p9_poll_wait *pwait =
+		container_of(wait, struct p9_poll_wait, wait);
+	struct p9_conn *m = pwait->conn;
+	unsigned long flags;
+	DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
 
-		if (!req->err) {
-			req->err = p9_errstr2errno(ename->str, ename->len);
+	spin_lock_irqsave(&p9_poll_lock, flags);
+	if (list_empty(&m->poll_pending_link))
+		list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
+	spin_unlock_irqrestore(&p9_poll_lock, flags);
 
-			/* string match failed */
-			if (!req->err) {
-				PRINT_FCALL_ERROR("unknown error", req->rcall);
-				req->err = -ESERVERFAULT;
-			}
-		}
-	} else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
-		P9_DPRINTK(P9_DEBUG_ERROR,
-				"fcall mismatch: expected %d, got %d\n",
-				req->tcall->id + 1, req->rcall->id);
-		if (!req->err)
-			req->err = -EIO;
-	}
+	/* perform the default wake up operation */
+	return default_wake_function(&dummy_wait, mode, sync, key);
 }
 
 /**
- * p9_read_work - called when there is some data to be read from a transport
- * @work: container of work to be done
+ * p9_pollwait - add poll task to the wait queue
+ * @filp: file pointer being polled
+ * @wait_address: wait_q to block on
+ * @p: poll state
  *
+ * called by files poll operation to add v9fs-poll task to files wait queue
  */
 
-static void p9_read_work(struct work_struct *work)
+static void
+p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
 {
-	int n, err;
-	struct p9_conn *m;
-	struct p9_req *req, *rptr, *rreq;
-	struct p9_fcall *rcall;
-	char *rbuf;
-
-	m = container_of(work, struct p9_conn, rq);
-
-	if (m->err < 0)
-		return;
-
-	rcall = NULL;
-	P9_DPRINTK(P9_DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
+	struct p9_conn *m = container_of(p, struct p9_conn, pt);
+	struct p9_poll_wait *pwait = NULL;
+	int i;
 
-	if (!m->rcall) {
-		m->rcall =
-		    kmalloc(sizeof(struct p9_fcall) + m->msize, GFP_KERNEL);
-		if (!m->rcall) {
-			err = -ENOMEM;
-			goto error;
+	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
+		if (m->poll_wait[i].wait_addr == NULL) {
+			pwait = &m->poll_wait[i];
+			break;
 		}
-
-		m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall);
-		m->rpos = 0;
 	}
 
-	clear_bit(Rpending, &m->wsched);
-	err = p9_fd_read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos);
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err);
-	if (err == -EAGAIN) {
-		clear_bit(Rworksched, &m->wsched);
+	if (!pwait) {
+		P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n");
 		return;
 	}
 
-	if (err <= 0)
-		goto error;
-
-	m->rpos += err;
-	while (m->rpos > 4) {
-		n = le32_to_cpu(*(__le32 *) m->rbuf);
-		if (n >= m->msize) {
-			P9_DPRINTK(P9_DEBUG_ERROR,
-				"requested packet size too big: %d\n", n);
-			err = -EIO;
-			goto error;
-		}
-
-		if (m->rpos < n)
-			break;
-
-		err =
-		    p9_deserialize_fcall(m->rbuf, n, m->rcall, m->extended);
-		if (err < 0)
-			goto error;
-
-#ifdef CONFIG_NET_9P_DEBUG
-		if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
-			char buf[150];
-
-			p9_printfcall(buf, sizeof(buf), m->rcall,
-				m->extended);
-			printk(KERN_NOTICE ">>> %p %s\n", m, buf);
-		}
-#endif
-
-		rcall = m->rcall;
-		rbuf = m->rbuf;
-		if (m->rpos > n) {
-			m->rcall = kmalloc(sizeof(struct p9_fcall) + m->msize,
-					   GFP_KERNEL);
-			if (!m->rcall) {
-				err = -ENOMEM;
-				goto error;
-			}
-
-			m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall);
-			memmove(m->rbuf, rbuf + n, m->rpos - n);
-			m->rpos -= n;
-		} else {
-			m->rcall = NULL;
-			m->rbuf = NULL;
-			m->rpos = 0;
-		}
-
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m,
-							rcall->id, rcall->tag);
-
-		req = NULL;
-		spin_lock(&m->lock);
-		list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-			if (rreq->tag == rcall->tag) {
-				req = rreq;
-				if (req->flush != Flushing)
-					list_del(&req->req_list);
-				break;
-			}
-		}
-		spin_unlock(&m->lock);
-
-		if (req) {
-			req->rcall = rcall;
-			process_request(m, req);
-
-			if (req->flush != Flushing) {
-				if (req->cb)
-					(*req->cb) (req, req->cba);
-				else
-					kfree(req->rcall);
-			}
-		} else {
-			if (err >= 0 && rcall->id != P9_RFLUSH)
-				P9_DPRINTK(P9_DEBUG_ERROR,
-				  "unexpected response mux %p id %d tag %d\n",
-				  m, rcall->id, rcall->tag);
-			kfree(rcall);
-		}
-	}
-
-	if (!list_empty(&m->req_list)) {
-		if (test_and_clear_bit(Rpending, &m->wsched))
-			n = POLLIN;
-		else
-			n = p9_fd_poll(m->trans, NULL);
-
-		if (n & POLLIN) {
-			P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m);
-			queue_work(p9_mux_wq, &m->rq);
-		} else
-			clear_bit(Rworksched, &m->wsched);
-	} else
-		clear_bit(Rworksched, &m->wsched);
-
-	return;
-
-error:
-	p9_conn_cancel(m, err);
-	clear_bit(Rworksched, &m->wsched);
+	pwait->conn = m;
+	pwait->wait_addr = wait_address;
+	init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
+	add_wait_queue(wait_address, &pwait->wait);
 }
 
 /**
- * p9_send_request - send 9P request
- * The function can sleep until the request is scheduled for sending.
- * The function can be interrupted. Return from the function is not
- * a guarantee that the request is sent successfully. Can return errors
- * that can be retrieved by PTR_ERR macros.
- *
- * @m: mux data
- * @tc: request to be sent
- * @cb: callback function to call when response is received
- * @cba: parameter to pass to the callback function
+ * p9_conn_create - allocate and initialize the per-session mux data
+ * @client: client instance
  *
+ * Note: Creates the polling task if this is the first session.
  */
 
-static struct p9_req *p9_send_request(struct p9_conn *m,
-					  struct p9_fcall *tc,
-					  p9_conn_req_callback cb, void *cba)
+static struct p9_conn *p9_conn_create(struct p9_client *client)
 {
 	int n;
-	struct p9_req *req;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
-		tc, tc->id);
-	if (m->err < 0)
-		return ERR_PTR(m->err);
-
-	req = kmalloc(sizeof(struct p9_req), GFP_KERNEL);
-	if (!req)
-		return ERR_PTR(-ENOMEM);
-
-	if (tc->id == P9_TVERSION)
-		n = P9_NOTAG;
-	else
-		n = p9_mux_get_tag(m);
+	struct p9_conn *m;
 
-	if (n < 0) {
-		kfree(req);
+	P9_DPRINTK(P9_DEBUG_TRANS, "client %p msize %d\n", client,
+								client->msize);
+	m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
+	if (!m)
 		return ERR_PTR(-ENOMEM);
-	}
 
-	p9_set_tag(tc, n);
+	INIT_LIST_HEAD(&m->mux_list);
+	m->client = client;
 
-#ifdef CONFIG_NET_9P_DEBUG
-	if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
-		char buf[150];
+	INIT_LIST_HEAD(&m->req_list);
+	INIT_LIST_HEAD(&m->unsent_req_list);
+	INIT_WORK(&m->rq, p9_read_work);
+	INIT_WORK(&m->wq, p9_write_work);
+	INIT_LIST_HEAD(&m->poll_pending_link);
+	init_poll_funcptr(&m->pt, p9_pollwait);
 
-		p9_printfcall(buf, sizeof(buf), tc, m->extended);
-		printk(KERN_NOTICE "<<< %p %s\n", m, buf);
+	n = p9_fd_poll(client, &m->pt);
+	if (n & POLLIN) {
+		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
+		set_bit(Rpending, &m->wsched);
 	}
-#endif
-
-	spin_lock_init(&req->lock);
-	req->tag = n;
-	req->tcall = tc;
-	req->rcall = NULL;
-	req->err = 0;
-	req->cb = cb;
-	req->cba = cba;
-	req->flush = None;
-
-	spin_lock(&m->lock);
-	list_add_tail(&req->req_list, &m->unsent_req_list);
-	spin_unlock(&m->lock);
-
-	if (test_and_clear_bit(Wpending, &m->wsched))
-		n = POLLOUT;
-	else
-		n = p9_fd_poll(m->trans, NULL);
 
-	if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
-		queue_work(p9_mux_wq, &m->wq);
+	if (n & POLLOUT) {
+		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m);
+		set_bit(Wpending, &m->wsched);
+	}
 
-	return req;
+	return m;
 }
 
-static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req)
-{
-	p9_mux_put_tag(m, req->tag);
-	kfree(req);
-}
+/**
+ * p9_poll_mux - polls a mux and schedules read or write works if necessary
+ * @m: connection to poll
+ *
+ */
 
-static void p9_mux_flush_cb(struct p9_req *freq, void *a)
+static void p9_poll_mux(struct p9_conn *m)
 {
-	int tag;
-	struct p9_conn *m;
-	struct p9_req *req, *rreq, *rptr;
-
-	m = a;
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m,
-		freq->tcall, freq->rcall, freq->err,
-		freq->tcall->params.tflush.oldtag);
-
-	spin_lock(&m->lock);
-	tag = freq->tcall->params.tflush.oldtag;
-	req = NULL;
-	list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
-		if (rreq->tag == tag) {
-			req = rreq;
-			list_del(&req->req_list);
-			break;
-		}
-	}
-	spin_unlock(&m->lock);
+	int n;
 
-	if (req) {
-		spin_lock(&req->lock);
-		req->flush = Flushed;
-		spin_unlock(&req->lock);
+	if (m->err < 0)
+		return;
 
-		if (req->cb)
-			(*req->cb) (req, req->cba);
-		else
-			kfree(req->rcall);
+	n = p9_fd_poll(m->client, NULL);
+	if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
+		P9_DPRINTK(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
+		if (n >= 0)
+			n = -ECONNRESET;
+		p9_conn_cancel(m, n);
 	}
 
-	kfree(freq->tcall);
-	kfree(freq->rcall);
-	p9_mux_free_request(m, freq);
-}
-
-static int
-p9_mux_flush_request(struct p9_conn *m, struct p9_req *req)
-{
-	struct p9_fcall *fc;
-	struct p9_req *rreq, *rptr;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
-
-	/* if a response was received for a request, do nothing */
-	spin_lock(&req->lock);
-	if (req->rcall || req->err) {
-		spin_unlock(&req->lock);
-		P9_DPRINTK(P9_DEBUG_MUX,
-			"mux %p req %p response already received\n", m, req);
-		return 0;
+	if (n & POLLIN) {
+		set_bit(Rpending, &m->wsched);
+		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
+		if (!test_and_set_bit(Rworksched, &m->wsched)) {
+			P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
+			queue_work(p9_mux_wq, &m->rq);
+		}
 	}
 
-	req->flush = Flushing;
-	spin_unlock(&req->lock);
-
-	spin_lock(&m->lock);
-	/* if the request is not sent yet, just remove it from the list */
-	list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) {
-		if (rreq->tag == req->tag) {
-			P9_DPRINTK(P9_DEBUG_MUX,
-			   "mux %p req %p request is not sent yet\n", m, req);
-			list_del(&rreq->req_list);
-			req->flush = Flushed;
-			spin_unlock(&m->lock);
-			if (req->cb)
-				(*req->cb) (req, req->cba);
-			return 0;
+	if (n & POLLOUT) {
+		set_bit(Wpending, &m->wsched);
+		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m);
+		if ((m->wsize || !list_empty(&m->unsent_req_list))
+		    && !test_and_set_bit(Wworksched, &m->wsched)) {
+			P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
+			queue_work(p9_mux_wq, &m->wq);
 		}
 	}
-	spin_unlock(&m->lock);
-
-	clear_thread_flag(TIF_SIGPENDING);
-	fc = p9_create_tflush(req->tag);
-	p9_send_request(m, fc, p9_mux_flush_cb, m);
-	return 1;
-}
-
-static void
-p9_conn_rpc_cb(struct p9_req *req, void *a)
-{
-	struct p9_mux_rpc *r;
-
-	P9_DPRINTK(P9_DEBUG_MUX, "req %p r %p\n", req, a);
-	r = a;
-	r->rcall = req->rcall;
-	r->err = req->err;
-
-	if (req->flush != None && !req->err)
-		r->err = -ERESTARTSYS;
-
-	wake_up(&r->wqueue);
 }
 
 /**
- * p9_fd_rpc- sends 9P request and waits until a response is available.
- *	The function can be interrupted.
- * @t: transport data
- * @tc: request to be sent
- * @rc: pointer where a pointer to the response is stored
+ * p9_fd_request - send 9P request
+ * The function can sleep until the request is scheduled for sending.
+ * The function can be interrupted. Return from the function is not
+ * a guarantee that the request is sent successfully.
+ *
+ * @client: client instance
+ * @req: request to be sent
  *
  */
 
-int
-p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
+static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
 {
-	struct p9_trans_fd *p = t->priv;
-	struct p9_conn *m = p->conn;
-	int err, sigpending;
-	unsigned long flags;
-	struct p9_req *req;
-	struct p9_mux_rpc r;
-
-	r.err = 0;
-	r.tcall = tc;
-	r.rcall = NULL;
-	r.m = m;
-	init_waitqueue_head(&r.wqueue);
-
-	if (rc)
-		*rc = NULL;
-
-	sigpending = 0;
-	if (signal_pending(current)) {
-		sigpending = 1;
-		clear_thread_flag(TIF_SIGPENDING);
-	}
-
-	req = p9_send_request(m, tc, p9_conn_rpc_cb, &r);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err);
-		return err;
-	}
+	int n;
+	struct p9_trans_fd *ts = client->trans;
+	struct p9_conn *m = ts->conn;
 
-	err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
-	if (r.err < 0)
-		err = r.err;
-
-	if (err == -ERESTARTSYS && m->trans->status == Connected
-							&& m->err == 0) {
-		if (p9_mux_flush_request(m, req)) {
-			/* wait until we get response of the flush message */
-			do {
-				clear_thread_flag(TIF_SIGPENDING);
-				err = wait_event_interruptible(r.wqueue,
-					r.rcall || r.err);
-			} while (!r.rcall && !r.err && err == -ERESTARTSYS &&
-				m->trans->status == Connected && !m->err);
-
-			err = -ERESTARTSYS;
-		}
-		sigpending = 1;
-	}
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m,
+						current, req->tc, req->tc->id);
+	if (m->err < 0)
+		return m->err;
 
-	if (sigpending) {
-		spin_lock_irqsave(&current->sighand->siglock, flags);
-		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
-	}
+	spin_lock(&client->lock);
+	req->status = REQ_STATUS_UNSENT;
+	list_add_tail(&req->req_list, &m->unsent_req_list);
+	spin_unlock(&client->lock);
 
-	if (rc)
-		*rc = r.rcall;
+	if (test_and_clear_bit(Wpending, &m->wsched))
+		n = POLLOUT;
 	else
-		kfree(r.rcall);
+		n = p9_fd_poll(m->client, NULL);
 
-	p9_mux_free_request(m, req);
-	if (err > 0)
-		err = -EIO;
+	if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
+		queue_work(p9_mux_wq, &m->wq);
 
-	return err;
+	return 0;
 }
 
-#ifdef P9_NONBLOCK
-/**
- * p9_conn_rpcnb - sends 9P request without waiting for response.
- * @m: mux data
- * @tc: request to be sent
- * @cb: callback function to be called when response arrives
- * @a: value to pass to the callback function
- *
- */
-
-int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
-		   p9_conn_req_callback cb, void *a)
+static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 {
-	int err;
-	struct p9_req *req;
+	struct p9_trans_fd *ts = client->trans;
+	struct p9_conn *m = ts->conn;
+	int ret = 1;
 
-	req = p9_send_request(m, tc, cb, a);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err);
-		return PTR_ERR(req);
-	}
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p req %p\n", m, req);
 
-	P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
-	return 0;
-}
-#endif /* P9_NONBLOCK */
+	spin_lock(&client->lock);
+	list_del(&req->req_list);
 
-/**
- * p9_conn_cancel - cancel all pending requests with error
- * @m: mux data
- * @err: error code
- *
- */
-
-void p9_conn_cancel(struct p9_conn *m, int err)
-{
-	struct p9_req *req, *rtmp;
-	LIST_HEAD(cancel_list);
-
-	P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
-	m->err = err;
-	spin_lock(&m->lock);
-	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
-		list_move(&req->req_list, &cancel_list);
+	if (req->status == REQ_STATUS_UNSENT) {
+		req->status = REQ_STATUS_FLSHD;
+		ret = 0;
 	}
-	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
-		list_move(&req->req_list, &cancel_list);
-	}
-	spin_unlock(&m->lock);
 
-	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
-		list_del(&req->req_list);
-		if (!req->err)
-			req->err = err;
+	spin_unlock(&client->lock);
 
-		if (req->cb)
-			(*req->cb) (req, req->cba);
-		else
-			kfree(req->rcall);
-	}
+	return ret;
 }
 
 /**
@@ -1216,7 +758,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 	return 0;
 }
 
-static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd)
+static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
 {
 	struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd),
 					   GFP_KERNEL);
@@ -1234,13 +776,13 @@ static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd)
 		return -EIO;
 	}
 
-	trans->priv = ts;
-	trans->status = Connected;
+	client->trans = ts;
+	client->status = Connected;
 
 	return 0;
 }
 
-static int p9_socket_open(struct p9_trans *trans, struct socket *csocket)
+static int p9_socket_open(struct p9_client *client, struct socket *csocket)
 {
 	int fd, ret;
 
@@ -1251,137 +793,65 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket)
 		return fd;
 	}
 
-	ret = p9_fd_open(trans, fd, fd);
+	ret = p9_fd_open(client, fd, fd);
 	if (ret < 0) {
 		P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to open fd\n");
 		sockfd_put(csocket);
 		return ret;
 	}
 
-	((struct p9_trans_fd *)trans->priv)->rd->f_flags |= O_NONBLOCK;
+	((struct p9_trans_fd *)client->trans)->rd->f_flags |= O_NONBLOCK;
 
 	return 0;
 }
 
 /**
- * p9_fd_read- read from a fd
- * @trans: transport instance state
- * @v: buffer to receive data into
- * @len: size of receive buffer
- *
- */
-
-static int p9_fd_read(struct p9_trans *trans, void *v, int len)
-{
-	int ret;
-	struct p9_trans_fd *ts = NULL;
-
-	if (trans && trans->status != Disconnected)
-		ts = trans->priv;
-
-	if (!ts)
-		return -EREMOTEIO;
-
-	if (!(ts->rd->f_flags & O_NONBLOCK))
-		P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n");
-
-	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
-	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
-		trans->status = Disconnected;
-	return ret;
-}
-
-/**
- * p9_fd_write - write to a socket
- * @trans: transport instance state
- * @v: buffer to send data from
- * @len: size of send buffer
+ * p9_mux_destroy - cancels all pending requests and frees mux resources
+ * @m: mux to destroy
  *
  */
 
-static int p9_fd_write(struct p9_trans *trans, void *v, int len)
-{
-	int ret;
-	mm_segment_t oldfs;
-	struct p9_trans_fd *ts = NULL;
-
-	if (trans && trans->status != Disconnected)
-		ts = trans->priv;
-
-	if (!ts)
-		return -EREMOTEIO;
-
-	if (!(ts->wr->f_flags & O_NONBLOCK))
-		P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n");
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-	/* The cast to a user pointer is valid due to the set_fs() */
-	ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos);
-	set_fs(oldfs);
-
-	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
-		trans->status = Disconnected;
-	return ret;
-}
-
-static unsigned int
-p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt)
+static void p9_conn_destroy(struct p9_conn *m)
 {
-	int ret, n;
-	struct p9_trans_fd *ts = NULL;
-
-	if (trans && trans->status == Connected)
-		ts = trans->priv;
-
-	if (!ts)
-		return -EREMOTEIO;
-
-	if (!ts->rd->f_op || !ts->rd->f_op->poll)
-		return -EIO;
-
-	if (!ts->wr->f_op || !ts->wr->f_op->poll)
-		return -EIO;
+	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m,
+		m->mux_list.prev, m->mux_list.next);
 
-	ret = ts->rd->f_op->poll(ts->rd, pt);
-	if (ret < 0)
-		return ret;
+	p9_mux_poll_stop(m);
+	cancel_work_sync(&m->rq);
+	cancel_work_sync(&m->wq);
 
-	if (ts->rd != ts->wr) {
-		n = ts->wr->f_op->poll(ts->wr, pt);
-		if (n < 0)
-			return n;
-		ret = (ret & ~POLLOUT) | (n & ~POLLIN);
-	}
+	p9_conn_cancel(m, -ECONNRESET);
 
-	return ret;
+	m->client = NULL;
+	kfree(m);
 }
 
 /**
- * p9_fd_close - shutdown socket
- * @trans: private socket structure
+ * p9_fd_close - shutdown file descriptor transport
+ * @client: client instance
  *
  */
 
-static void p9_fd_close(struct p9_trans *trans)
+static void p9_fd_close(struct p9_client *client)
 {
 	struct p9_trans_fd *ts;
 
-	if (!trans)
+	if (!client)
 		return;
 
-	ts = xchg(&trans->priv, NULL);
-
+	ts = client->trans;
 	if (!ts)
 		return;
 
+	client->status = Disconnected;
+
 	p9_conn_destroy(ts->conn);
 
-	trans->status = Disconnected;
 	if (ts->rd)
 		fput(ts->rd);
 	if (ts->wr)
 		fput(ts->wr);
+
 	kfree(ts);
 }
 
@@ -1402,31 +872,23 @@ static inline int valid_ipaddr4(const char *buf)
 	return 0;
 }
 
-static struct p9_trans *
-p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu)
+static int
+p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 {
 	int err;
-	struct p9_trans *trans;
 	struct socket *csocket;
 	struct sockaddr_in sin_server;
 	struct p9_fd_opts opts;
-	struct p9_trans_fd *p;
+	struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */
 
 	err = parse_opts(args, &opts);
 	if (err < 0)
-		return ERR_PTR(err);
+		return err;
 
 	if (valid_ipaddr4(addr) < 0)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	csocket = NULL;
-	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
-	if (!trans)
-		return ERR_PTR(-ENOMEM);
-	trans->msize = msize;
-	trans->extended = dotu;
-	trans->rpc = p9_fd_rpc;
-	trans->close = p9_fd_close;
 
 	sin_server.sin_family = AF_INET;
 	sin_server.sin_addr.s_addr = in_aton(addr);
@@ -1449,45 +911,38 @@ p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu)
 		goto error;
 	}
 
-	err = p9_socket_open(trans, csocket);
+	err = p9_socket_open(client, csocket);
 	if (err < 0)
 		goto error;
 
-	p = (struct p9_trans_fd *) trans->priv;
-	p->conn = p9_conn_create(trans);
+	p = (struct p9_trans_fd *) client->trans;
+	p->conn = p9_conn_create(client);
 	if (IS_ERR(p->conn)) {
 		err = PTR_ERR(p->conn);
 		p->conn = NULL;
 		goto error;
 	}
 
-	return trans;
+	return 0;
 
 error:
 	if (csocket)
 		sock_release(csocket);
 
-	kfree(trans);
-	return ERR_PTR(err);
+	kfree(p);
+
+	return err;
 }
 
-static struct p9_trans *
-p9_trans_create_unix(const char *addr, char *args, int msize,
-							unsigned char dotu)
+static int
+p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
 {
 	int err;
 	struct socket *csocket;
 	struct sockaddr_un sun_server;
-	struct p9_trans *trans;
-	struct p9_trans_fd *p;
+	struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */
 
 	csocket = NULL;
-	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
-	if (!trans)
-		return ERR_PTR(-ENOMEM);
-
-	trans->rpc = p9_fd_rpc;
-	trans->close = p9_fd_close;
 
 	if (strlen(addr) > UNIX_PATH_MAX) {
 		P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n",
@@ -1508,79 +963,69 @@ p9_trans_create_unix(const char *addr, char *args, int msize,
 		goto error;
 	}
 
-	err = p9_socket_open(trans, csocket);
+	err = p9_socket_open(client, csocket);
 	if (err < 0)
 		goto error;
 
-	trans->msize = msize;
-	trans->extended = dotu;
-	p = (struct p9_trans_fd *) trans->priv;
-	p->conn = p9_conn_create(trans);
+	p = (struct p9_trans_fd *) client->trans;
+	p->conn = p9_conn_create(client);
 	if (IS_ERR(p->conn)) {
 		err = PTR_ERR(p->conn);
 		p->conn = NULL;
 		goto error;
 	}
 
-	return trans;
+	return 0;
 
 error:
 	if (csocket)
 		sock_release(csocket);
 
-	kfree(trans);
-	return ERR_PTR(err);
+	kfree(p);
+	return err;
 }
 
-static struct p9_trans *
-p9_trans_create_fd(const char *name, char *args, int msize,
-							unsigned char extended)
+static int
+p9_fd_create(struct p9_client *client, const char *addr, char *args)
 {
 	int err;
-	struct p9_trans *trans;
 	struct p9_fd_opts opts;
-	struct p9_trans_fd *p;
+	struct p9_trans_fd *p = NULL; /* this get allocated in p9_fd_open */
 
 	parse_opts(args, &opts);
 
 	if (opts.rfd == ~0 || opts.wfd == ~0) {
 		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
-		return ERR_PTR(-ENOPROTOOPT);
+		return -ENOPROTOOPT;
 	}
 
-	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
-	if (!trans)
-		return ERR_PTR(-ENOMEM);
-
-	trans->rpc = p9_fd_rpc;
-	trans->close = p9_fd_close;
-
-	err = p9_fd_open(trans, opts.rfd, opts.wfd);
+	err = p9_fd_open(client, opts.rfd, opts.wfd);
 	if (err < 0)
 		goto error;
 
-	trans->msize = msize;
-	trans->extended = extended;
-	p = (struct p9_trans_fd *) trans->priv;
-	p->conn = p9_conn_create(trans);
+	p = (struct p9_trans_fd *) client->trans;
+	p->conn = p9_conn_create(client);
 	if (IS_ERR(p->conn)) {
 		err = PTR_ERR(p->conn);
 		p->conn = NULL;
 		goto error;
 	}
 
-	return trans;
+	return 0;
 
 error:
-	kfree(trans);
-	return ERR_PTR(err);
+	kfree(p);
+	return err;
 }
 
 static struct p9_trans_module p9_tcp_trans = {
 	.name = "tcp",
 	.maxsize = MAX_SOCK_BUF,
 	.def = 1,
-	.create = p9_trans_create_tcp,
+	.create = p9_fd_create_tcp,
+	.close = p9_fd_close,
+	.request = p9_fd_request,
+	.cancel = p9_fd_cancel,
 	.owner = THIS_MODULE,
 };
 
@@ -1588,7 +1033,10 @@ static struct p9_trans_module p9_unix_trans = {
 	.name = "unix",
 	.maxsize = MAX_SOCK_BUF,
 	.def = 0,
-	.create = p9_trans_create_unix,
+	.create = p9_fd_create_unix,
+	.close = p9_fd_close,
+	.request = p9_fd_request,
+	.cancel = p9_fd_cancel,
 	.owner = THIS_MODULE,
 };
 
@@ -1596,23 +1044,71 @@ static struct p9_trans_module p9_fd_trans = {
 	.name = "fd",
 	.maxsize = MAX_SOCK_BUF,
 	.def = 0,
-	.create = p9_trans_create_fd,
+	.create = p9_fd_create,
+	.close = p9_fd_close,
+	.request = p9_fd_request,
+	.cancel = p9_fd_cancel,
 	.owner = THIS_MODULE,
 };
 
-int p9_trans_fd_init(void)
+/**
+ * p9_poll_proc - poll worker thread
+ * @a: thread state and arguments
+ *
+ * polls all v9fs transports for new events and queues the appropriate
+ * work to the work queue
+ *
+ */
+
+static int p9_poll_proc(void *a)
 {
-	int i;
+	unsigned long flags;
+
+	P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current);
+ repeat:
+	spin_lock_irqsave(&p9_poll_lock, flags);
+	while (!list_empty(&p9_poll_pending_list)) {
+		struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
+							struct p9_conn,
+							poll_pending_link);
+		list_del_init(&conn->poll_pending_link);
+		spin_unlock_irqrestore(&p9_poll_lock, flags);
+
+		p9_poll_mux(conn);
+
+		spin_lock_irqsave(&p9_poll_lock, flags);
+	}
+	spin_unlock_irqrestore(&p9_poll_lock, flags);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (list_empty(&p9_poll_pending_list)) {
+		P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n");
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+
+	if (!kthread_should_stop())
+		goto repeat;
 
-	for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++)
-		p9_mux_poll_tasks[i].task = NULL;
+	P9_DPRINTK(P9_DEBUG_TRANS, "finish\n");
+	return 0;
+}
 
+int p9_trans_fd_init(void)
+{
 	p9_mux_wq = create_workqueue("v9fs");
 	if (!p9_mux_wq) {
 		printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
 		return -ENOMEM;
 	}
 
+	p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll");
+	if (IS_ERR(p9_poll_task)) {
+		destroy_workqueue(p9_mux_wq);
+		printk(KERN_WARNING "v9fs: mux: creating poll task failed\n");
+		return PTR_ERR(p9_poll_task);
+	}
+
 	v9fs_register_trans(&p9_tcp_trans);
 	v9fs_register_trans(&p9_unix_trans);
 	v9fs_register_trans(&p9_fd_trans);
@@ -1622,6 +1118,7 @@ int p9_trans_fd_init(void)
 
 void p9_trans_fd_exit(void)
 {
+	kthread_stop(p9_poll_task);
 	v9fs_unregister_trans(&p9_tcp_trans);
 	v9fs_unregister_trans(&p9_unix_trans);
 	v9fs_unregister_trans(&p9_fd_trans);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 94912e077a55..2d7781ec663b 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -1,12 +1,10 @@
 /*
- * The Guest 9p transport driver
+ * The Virtio 9p transport driver
  *
  * This is a block based transport driver based on the lguest block driver
  * code.
  *
- */
-/*
- *  Copyright (C) 2007 Eric Van Hensbergen, IBM Corporation
+ *  Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation
  *
  *  Based on virtio console driver
  *  Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
@@ -41,6 +39,7 @@
 #include <linux/file.h>
 #include <net/9p/9p.h>
 #include <linux/parser.h>
+#include <net/9p/client.h>
 #include <net/9p/transport.h>
 #include <linux/scatterlist.h>
 #include <linux/virtio.h>
@@ -53,50 +52,6 @@ static DEFINE_MUTEX(virtio_9p_lock);
 /* global which tracks highest initialized channel */
 static int chan_index;
 
-#define P9_INIT_MAXTAG	16
-
-
-/**
- * enum p9_req_status_t - virtio request status
- * @REQ_STATUS_IDLE: request slot unused
- * @REQ_STATUS_SENT: request sent to server
- * @REQ_STATUS_RCVD: response received from server
- * @REQ_STATUS_FLSH: request has been flushed
- *
- * The @REQ_STATUS_IDLE state is used to mark a request slot as unused
- * but use is actually tracked by the idpool structure which handles tag
- * id allocation.
- *
- */
-
-enum p9_req_status_t {
-	REQ_STATUS_IDLE,
-	REQ_STATUS_SENT,
-	REQ_STATUS_RCVD,
-	REQ_STATUS_FLSH,
-};
-
-/**
- * struct p9_req_t - virtio request slots
- * @status: status of this request slot
- * @wq: wait_queue for the client to block on for this request
- *
- * The virtio transport uses an array to track outstanding requests
- * instead of a list.  While this may incurr overhead during initial
- * allocation or expansion, it makes request lookup much easier as the
- * tag id is a index into an array.  (We use tag+1 so that we can accomodate
- * the -1 tag for the T_VERSION request).
- * This also has the nice effect of only having to allocate wait_queues
- * once, instead of constantly allocating and freeing them.  Its possible
- * other resources could benefit from this scheme as well.
- *
- */
-
-struct p9_req_t {
-	int status;
-	wait_queue_head_t *wq;
-};
-
 /**
  * struct virtio_chan - per-instance transport information
  * @initialized: whether the channel is initialized
@@ -121,67 +76,14 @@ static struct virtio_chan {
 
 	spinlock_t lock;
 
+	struct p9_client *client;
 	struct virtio_device *vdev;
 	struct virtqueue *vq;
 
-	struct p9_idpool *tagpool;
-	struct p9_req_t *reqs;
-	int max_tag;
-
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
 } channels[MAX_9P_CHAN];
 
-/**
- * p9_lookup_tag - Lookup requests by tag
- * @c: virtio channel to lookup tag within
- * @tag: numeric id for transaction
- *
- * this is a simple array lookup, but will grow the
- * request_slots as necessary to accomodate transaction
- * ids which did not previously have a slot.
- *
- * Bugs: there is currently no upper limit on request slots set
- * here, but that should be constrained by the id accounting.
- */
-
-static struct p9_req_t *p9_lookup_tag(struct virtio_chan *c, u16 tag)
-{
-	/* This looks up the original request by tag so we know which
-	 * buffer to read the data into */
-	tag++;
-
-	while (tag >= c->max_tag) {
-		int old_max = c->max_tag;
-		int count;
-
-		if (c->max_tag)
-			c->max_tag *= 2;
-		else
-			c->max_tag = P9_INIT_MAXTAG;
-
-		c->reqs = krealloc(c->reqs, sizeof(struct p9_req_t)*c->max_tag,
-								GFP_ATOMIC);
-		if (!c->reqs) {
-			printk(KERN_ERR "Couldn't grow tag array\n");
-			BUG();
-		}
-		for (count = old_max; count < c->max_tag; count++) {
-			c->reqs[count].status = REQ_STATUS_IDLE;
-			c->reqs[count].wq = kmalloc(sizeof(wait_queue_head_t),
-								GFP_ATOMIC);
-			if (!c->reqs[count].wq) {
-				printk(KERN_ERR "Couldn't grow tag array\n");
-				BUG();
-			}
-			init_waitqueue_head(c->reqs[count].wq);
-		}
-	}
-
-	return &c->reqs[tag];
-}
-
-
 /* How many bytes left in this page. */
 static unsigned int rest_of_page(void *data)
 {
@@ -197,25 +99,13 @@ static unsigned int rest_of_page(void *data)
  *
  */
 
-static void p9_virtio_close(struct p9_trans *trans)
+static void p9_virtio_close(struct p9_client *client)
 {
-	struct virtio_chan *chan = trans->priv;
-	int count;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->lock, flags);
-	p9_idpool_destroy(chan->tagpool);
-	for (count = 0; count < chan->max_tag; count++)
-		kfree(chan->reqs[count].wq);
-	kfree(chan->reqs);
-	chan->max_tag = 0;
-	spin_unlock_irqrestore(&chan->lock, flags);
+	struct virtio_chan *chan = client->trans;
 
 	mutex_lock(&virtio_9p_lock);
 	chan->inuse = false;
 	mutex_unlock(&virtio_9p_lock);
-
-	kfree(trans);
 }
 
 /**
@@ -236,17 +126,16 @@ static void req_done(struct virtqueue *vq)
 	struct virtio_chan *chan = vq->vdev->priv;
 	struct p9_fcall *rc;
 	unsigned int len;
-	unsigned long flags;
 	struct p9_req_t *req;
 
-	spin_lock_irqsave(&chan->lock, flags);
+	P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
+
 	while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) {
-		req = p9_lookup_tag(chan, rc->tag);
-		req->status = REQ_STATUS_RCVD;
-		wake_up(req->wq);
+		P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
+		P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
+		req = p9_tag_lookup(chan->client, rc->tag);
+		p9_client_cb(chan->client, req);
 	}
-	/* In case queue is stopped waiting for more buffers. */
-	spin_unlock_irqrestore(&chan->lock, flags);
 }
 
 /**
@@ -283,8 +172,14 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
 	return index-start;
 }
 
+/* We don't currently allow canceling of virtio requests */
+static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
+{
+	return 1;
+}
+
 /**
- * p9_virtio_rpc - issue a request and wait for a response
+ * p9_virtio_request - issue a request
  * @t: transport state
  * @tc: &p9_fcall request to transmit
  * @rc: &p9_fcall to put reponse into
@@ -292,44 +187,22 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
  */
 
 static int
-p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
+p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 {
 	int in, out;
-	int n, err, size;
-	struct virtio_chan *chan = t->priv;
-	char *rdata;
-	struct p9_req_t *req;
-	unsigned long flags;
-
-	if (*rc == NULL) {
-		*rc = kmalloc(sizeof(struct p9_fcall) + t->msize, GFP_KERNEL);
-		if (!*rc)
-			return -ENOMEM;
-	}
-
-	rdata = (char *)*rc+sizeof(struct p9_fcall);
-
-	n = P9_NOTAG;
-	if (tc->id != P9_TVERSION) {
-		n = p9_idpool_get(chan->tagpool);
-		if (n < 0)
-			return -ENOMEM;
-	}
-
-	spin_lock_irqsave(&chan->lock, flags);
-	req = p9_lookup_tag(chan, n);
-	spin_unlock_irqrestore(&chan->lock, flags);
+	struct virtio_chan *chan = client->trans;
+	char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
 
-	p9_set_tag(tc, n);
+	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
 
-	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio rpc tag %d\n", n);
-
-	out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, tc->sdata, tc->size);
-	in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, t->msize);
+	out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
+								req->tc->size);
+	in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata,
+								client->msize);
 
 	req->status = REQ_STATUS_SENT;
 
-	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, tc)) {
+	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc)) {
 		P9_DPRINTK(P9_DEBUG_TRANS,
 			"9p debug: virtio rpc add_buf returned failure");
 		return -EIO;
@@ -337,31 +210,7 @@ p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
 
 	chan->vq->vq_ops->kick(chan->vq);
 
-	wait_event(*req->wq, req->status == REQ_STATUS_RCVD);
-
-	size = le32_to_cpu(*(__le32 *) rdata);
-
-	err = p9_deserialize_fcall(rdata, size, *rc, t->extended);
-	if (err < 0) {
-		P9_DPRINTK(P9_DEBUG_TRANS,
-			"9p debug: virtio rpc deserialize returned %d\n", err);
-		return err;
-	}
-
-#ifdef CONFIG_NET_9P_DEBUG
-	if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) {
-		char buf[150];
-
-		p9_printfcall(buf, sizeof(buf), *rc, t->extended);
-		printk(KERN_NOTICE ">>> %p %s\n", t, buf);
-	}
-#endif
-
-	if (n != P9_NOTAG && p9_idpool_check(n, chan->tagpool))
-		p9_idpool_put(n, chan->tagpool);
-
-	req->status = REQ_STATUS_IDLE;
-
+	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
 	return 0;
 }
 
@@ -422,10 +271,9 @@ fail:
 
 /**
  * p9_virtio_create - allocate a new virtio channel
+ * @client: client instance invoking this transport
  * @devname: string identifying the channel to connect to (unused)
  * @args: args passed from sys_mount() for per-transport options (unused)
- * @msize: requested maximum packet size
- * @extended: 9p2000.u enabled flag
  *
  * This sets up a transport channel for 9p communication.  Right now
  * we only match the first available channel, but eventually we couldlook up
@@ -441,11 +289,9 @@ fail:
  *
  */
 
-static struct p9_trans *
-p9_virtio_create(const char *devname, char *args, int msize,
-							unsigned char extended)
+static int
+p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 {
-	struct p9_trans *trans;
 	struct virtio_chan *chan = channels;
 	int index = 0;
 
@@ -463,30 +309,13 @@ p9_virtio_create(const char *devname, char *args, int msize,
 
 	if (index >= MAX_9P_CHAN) {
 		printk(KERN_ERR "9p: no channels available\n");
-		return ERR_PTR(-ENODEV);
+		return -ENODEV;
 	}
 
-	chan->tagpool = p9_idpool_create();
-	if (IS_ERR(chan->tagpool)) {
-		printk(KERN_ERR "9p: couldn't allocate tagpool\n");
-		return ERR_PTR(-ENOMEM);
-	}
-	p9_idpool_get(chan->tagpool); /* reserve tag 0 */
-	chan->max_tag = 0;
-	chan->reqs = NULL;
-
-	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
-	if (!trans) {
-		printk(KERN_ERR "9p: couldn't allocate transport\n");
-		return ERR_PTR(-ENOMEM);
-	}
-	trans->extended = extended;
-	trans->msize = msize;
-	trans->close = p9_virtio_close;
-	trans->rpc = p9_virtio_rpc;
-	trans->priv = chan;
+	client->trans = (void *)chan;
+	chan->client = client;
 
-	return trans;
+	return 0;
 }
 
 /**
@@ -526,6 +355,9 @@ static struct virtio_driver p9_virtio_drv = {
 static struct p9_trans_module p9_virtio_trans = {
 	.name = "virtio",
 	.create = p9_virtio_create,
+	.close = p9_virtio_close,
+	.request = p9_virtio_request,
+	.cancel = p9_virtio_cancel,
 	.maxsize = PAGE_SIZE*16,
 	.def = 0,
 	.owner = THIS_MODULE,
diff --git a/net/9p/util.c b/net/9p/util.c
index 958fc58cd1ff..dc4ec05ad93d 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -105,6 +105,7 @@ retry:
 	else if (error)
 		return -1;
 
+	P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", i, p);
 	return i;
 }
 EXPORT_SYMBOL(p9_idpool_get);
@@ -121,6 +122,9 @@ EXPORT_SYMBOL(p9_idpool_get);
 void p9_idpool_put(int id, struct p9_idpool *p)
 {
 	unsigned long flags;
+
+	P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", id, p);
+
 	spin_lock_irqsave(&p->lock, flags);
 	idr_remove(&p->pool, id);
 	spin_unlock_irqrestore(&p->lock, flags);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 0c850427a85b..d3134e7e6ee8 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -2,7 +2,7 @@
  *	DDP:	An implementation of the AppleTalk DDP protocol for
  *		Ethernet 'ELAP'.
  *
- *		Alan Cox  <Alan.Cox@linux.org>
+ *		Alan Cox  <alan@lxorguk.ukuu.org.uk>
  *
  *		With more than a little assistance from
  *
@@ -1934,6 +1934,6 @@ static void __exit atalk_exit(void)
 module_exit(atalk_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Alan Cox <Alan.Cox@linux.org>");
+MODULE_AUTHOR("Alan Cox <alan@lxorguk.ukuu.org.uk>");
 MODULE_DESCRIPTION("AppleTalk 0.20\n");
 MODULE_ALIAS_NETPROTO(PF_APPLETALK);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index f6348e078aa4..8f9431a12c6f 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -37,10 +37,7 @@
 #include <linux/poll.h>
 #include <net/sock.h>
 #include <asm/ioctls.h>
-
-#if defined(CONFIG_KMOD)
 #include <linux/kmod.h>
-#endif
 
 #include <net/bluetooth/bluetooth.h>
 
@@ -145,11 +142,8 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto)
 	if (proto < 0 || proto >= BT_MAX_PROTO)
 		return -EINVAL;
 
-#if defined(CONFIG_KMOD)
-	if (!bt_proto[proto]) {
+	if (!bt_proto[proto])
 		request_module("bt-proto-%d", proto);
-	}
-#endif
 
 	err = -EPROTONOSUPPORT;
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 96434d774c84..acdeab3d9807 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -578,7 +578,7 @@ static int hidp_session(void *arg)
 	if (session->hid) {
 		if (session->hid->claimed & HID_CLAIMED_INPUT)
 			hidinput_disconnect(session->hid);
-		hid_free_device(session->hid);
+		hid_destroy_device(session->hid);
 	}
 
 	/* Wakeup user-space polling for socket errors */
@@ -623,9 +623,15 @@ static struct device *hidp_get_device(struct hidp_session *session)
 static int hidp_setup_input(struct hidp_session *session,
 				struct hidp_connadd_req *req)
 {
-	struct input_dev *input = session->input;
+	struct input_dev *input;
 	int i;
 
+	input = input_allocate_device();
+	if (!input)
+		return -ENOMEM;
+
+	session->input = input;
+
 	input_set_drvdata(input, session);
 
 	input->name = "Bluetooth HID Boot Protocol Device";
@@ -677,67 +683,114 @@ static void hidp_close(struct hid_device *hid)
 {
 }
 
-static const struct {
-	__u16 idVendor;
-	__u16 idProduct;
-	unsigned quirks;
-} hidp_blacklist[] = {
-	/* Apple wireless Mighty Mouse */
-	{ 0x05ac, 0x030c, HID_QUIRK_MIGHTYMOUSE | HID_QUIRK_INVERT_HWHEEL },
+static int hidp_parse(struct hid_device *hid)
+{
+	struct hidp_session *session = hid->driver_data;
+	struct hidp_connadd_req *req = session->req;
+	unsigned char *buf;
+	int ret;
 
-	{ }	/* Terminating entry */
-};
+	buf = kmalloc(req->rd_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, req->rd_data, req->rd_size)) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	ret = hid_parse_report(session->hid, buf, req->rd_size);
+
+	kfree(buf);
+
+	if (ret)
+		return ret;
+
+	session->req = NULL;
+
+	return 0;
+}
+
+static int hidp_start(struct hid_device *hid)
+{
+	struct hidp_session *session = hid->driver_data;
+	struct hid_report *report;
 
-static void hidp_setup_quirks(struct hid_device *hid)
+	list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].
+			report_list, list)
+		hidp_send_report(session, report);
+
+	list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].
+			report_list, list)
+		hidp_send_report(session, report);
+
+	return 0;
+}
+
+static void hidp_stop(struct hid_device *hid)
 {
-	unsigned int n;
+	struct hidp_session *session = hid->driver_data;
+
+	skb_queue_purge(&session->ctrl_transmit);
+	skb_queue_purge(&session->intr_transmit);
 
-	for (n = 0; hidp_blacklist[n].idVendor; n++)
-		if (hidp_blacklist[n].idVendor == le16_to_cpu(hid->vendor) &&
-				hidp_blacklist[n].idProduct == le16_to_cpu(hid->product))
-			hid->quirks = hidp_blacklist[n].quirks;
+	if (hid->claimed & HID_CLAIMED_INPUT)
+		hidinput_disconnect(hid);
+	hid->claimed = 0;
 }
 
-static void hidp_setup_hid(struct hidp_session *session,
+static struct hid_ll_driver hidp_hid_driver = {
+	.parse = hidp_parse,
+	.start = hidp_start,
+	.stop = hidp_stop,
+	.open  = hidp_open,
+	.close = hidp_close,
+	.hidinput_input_event = hidp_hidinput_event,
+};
+
+static int hidp_setup_hid(struct hidp_session *session,
 				struct hidp_connadd_req *req)
 {
-	struct hid_device *hid = session->hid;
-	struct hid_report *report;
+	struct hid_device *hid;
 	bdaddr_t src, dst;
+	int ret;
 
-	baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
-	baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst);
+	hid = hid_allocate_device();
+	if (IS_ERR(hid)) {
+		ret = PTR_ERR(session->hid);
+		goto err;
+	}
 
+	session->hid = hid;
+	session->req = req;
 	hid->driver_data = session;
 
-	hid->country = req->country;
+	baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
+	baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst);
 
 	hid->bus     = BUS_BLUETOOTH;
 	hid->vendor  = req->vendor;
 	hid->product = req->product;
 	hid->version = req->version;
+	hid->country = req->country;
 
 	strncpy(hid->name, req->name, 128);
 	strncpy(hid->phys, batostr(&src), 64);
 	strncpy(hid->uniq, batostr(&dst), 64);
 
-	hid->dev = hidp_get_device(session);
-
-	hid->hid_open  = hidp_open;
-	hid->hid_close = hidp_close;
-
-	hid->hidinput_input_event = hidp_hidinput_event;
+	hid->dev.parent = hidp_get_device(session);
+	hid->ll_driver = &hidp_hid_driver;
 
-	hidp_setup_quirks(hid);
+	ret = hid_add_device(hid);
+	if (ret)
+		goto err_hid;
 
-	list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].report_list, list)
-		hidp_send_report(session, report);
-
-	list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].report_list, list)
-		hidp_send_report(session, report);
-
-	if (hidinput_connect(hid) == 0)
-		hid->claimed |= HID_CLAIMED_INPUT;
+	return 0;
+err_hid:
+	hid_destroy_device(hid);
+	session->hid = NULL;
+err:
+	return ret;
 }
 
 int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock)
@@ -757,38 +810,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
 
 	BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size);
 
-	if (req->rd_size > 0) {
-		unsigned char *buf = kmalloc(req->rd_size, GFP_KERNEL);
-
-		if (!buf) {
-			kfree(session);
-			return -ENOMEM;
-		}
-
-		if (copy_from_user(buf, req->rd_data, req->rd_size)) {
-			kfree(buf);
-			kfree(session);
-			return -EFAULT;
-		}
-
-		session->hid = hid_parse_report(buf, req->rd_size);
-
-		kfree(buf);
-
-		if (!session->hid) {
-			kfree(session);
-			return -EINVAL;
-		}
-	}
-
-	if (!session->hid) {
-		session->input = input_allocate_device();
-		if (!session->input) {
-			kfree(session);
-			return -ENOMEM;
-		}
-	}
-
 	down_write(&hidp_session_sem);
 
 	s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst);
@@ -816,15 +837,18 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
 	session->flags   = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
 	session->idle_to = req->idle_to;
 
-	if (session->input) {
+	if (req->rd_size > 0) {
+		err = hidp_setup_hid(session, req);
+		if (err && err != -ENODEV)
+			goto err_skb;
+	}
+
+	if (!session->hid) {
 		err = hidp_setup_input(session, req);
 		if (err < 0)
-			goto failed;
+			goto err_skb;
 	}
 
-	if (session->hid)
-		hidp_setup_hid(session, req);
-
 	__hidp_link_session(session);
 
 	hidp_set_timer(session);
@@ -850,17 +874,16 @@ unlink:
 
 	__hidp_unlink_session(session);
 
-	if (session->input) {
+	if (session->input)
 		input_unregister_device(session->input);
-		session->input = NULL; /* don't try to free it here */
-	}
-
+	if (session->hid)
+		hid_destroy_device(session->hid);
+err_skb:
+	skb_queue_purge(&session->ctrl_transmit);
+	skb_queue_purge(&session->intr_transmit);
 failed:
 	up_write(&hidp_session_sem);
 
-	if (session->hid)
-		hid_free_device(session->hid);
-
 	input_free_device(session->input);
 	kfree(session);
 	return err;
@@ -950,18 +973,43 @@ int hidp_get_conninfo(struct hidp_conninfo *ci)
 	return err;
 }
 
+static const struct hid_device_id hidp_table[] = {
+	{ HID_BLUETOOTH_DEVICE(HID_ANY_ID, HID_ANY_ID) },
+	{ }
+};
+
+static struct hid_driver hidp_driver = {
+	.name = "generic-bluetooth",
+	.id_table = hidp_table,
+};
+
 static int __init hidp_init(void)
 {
+	int ret;
+
 	l2cap_load();
 
 	BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION);
 
-	return hidp_init_sockets();
+	ret = hid_register_driver(&hidp_driver);
+	if (ret)
+		goto err;
+
+	ret = hidp_init_sockets();
+	if (ret)
+		goto err_drv;
+
+	return 0;
+err_drv:
+	hid_unregister_driver(&hidp_driver);
+err:
+	return ret;
 }
 
 static void __exit hidp_exit(void)
 {
 	hidp_cleanup_sockets();
+	hid_unregister_driver(&hidp_driver);
 }
 
 module_init(hidp_init);
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index 343fb0566b3e..e503c89057ad 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -151,6 +151,8 @@ struct hidp_session {
 
 	struct sk_buff_head ctrl_transmit;
 	struct sk_buff_head intr_transmit;
+
+	struct hidp_connadd_req *req;
 };
 
 static inline void hidp_schedule(struct hidp_session *session)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index a4abed5b4c44..fa5cda4e552a 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -719,7 +719,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 		return NF_ACCEPT;
 	}
 	*d = (struct net_device *)in;
-	NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
+	NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
 		(struct net_device *)out, br_nf_forward_finish);
 
 	return NF_STOLEN;
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 366d3e9d51f8..ba6f73eb06c6 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -4,6 +4,7 @@
 
 menuconfig BRIDGE_NF_EBTABLES
 	tristate "Ethernet Bridge tables (ebtables) support"
+	depends on BRIDGE && BRIDGE_NETFILTER
 	select NETFILTER_XTABLES
 	help
 	  ebtables is a general, extensible frame/packet identification
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5bb88eb0aad4..0fa208e86405 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -305,23 +305,14 @@ find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
 	return NULL;
 }
 
-#ifndef CONFIG_KMOD
-#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
-#else
 static void *
 find_inlist_lock(struct list_head *head, const char *name, const char *prefix,
    int *error, struct mutex *mutex)
 {
-	void *ret;
-
-	ret = find_inlist_lock_noload(head, name, error, mutex);
-	if (!ret) {
-		request_module("%s%s", prefix, name);
-		ret = find_inlist_lock_noload(head, name, error, mutex);
-	}
-	return ret;
+	return try_then_request_module(
+			find_inlist_lock_noload(head, name, error, mutex),
+			"%s%s", prefix, name);
 }
-#endif
 
 static inline struct ebt_table *
 find_table_lock(const char *name, int *error, struct mutex *mutex)
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 8035fbf526ae..7d4d2b3c137e 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -128,8 +128,8 @@ static int can_create(struct net *net, struct socket *sock, int protocol)
 	if (net != &init_net)
 		return -EAFNOSUPPORT;
 
-#ifdef CONFIG_KMOD
-	/* try to load protocol module, when CONFIG_KMOD is defined */
+#ifdef CONFIG_MODULES
+	/* try to load protocol module kernel is modular */
 	if (!proto_tab[protocol]) {
 		err = request_module("can-proto-%d", protocol);
 
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 52f577a0f544..ee631843c2f5 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -9,7 +9,7 @@
  *	identical recvmsg() code. So we share it here. The poll was
  *	shared before but buried in udp.c so I moved it.
  *
- *	Authors:	Alan Cox <alan@redhat.com>. (datagram_poll() from old
+ *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
  *						     udp.c code)
  *
  *	Fixes:
diff --git a/net/core/dev.c b/net/core/dev.c
index 1408a083fe4e..b8a4fd0806af 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -924,10 +924,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
 		strlcpy(dev->name, newname, IFNAMSIZ);
 
 rollback:
-	err = device_rename(&dev->dev, dev->name);
-	if (err) {
+	ret = device_rename(&dev->dev, dev->name);
+	if (ret) {
 		memcpy(dev->name, oldname, IFNAMSIZ);
-		return err;
+		return ret;
 	}
 
 	write_lock_bh(&dev_base_lock);
@@ -4956,8 +4956,6 @@ EXPORT_SYMBOL(br_fdb_get_hook);
 EXPORT_SYMBOL(br_fdb_put_hook);
 #endif
 
-#ifdef CONFIG_KMOD
 EXPORT_SYMBOL(dev_load);
-#endif
 
 EXPORT_PER_CPU_SYMBOL(softnet_data);
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 5402b3b38e0d..9e2fa39f22a3 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -6,7 +6,7 @@
  *		Richard Underwood <richard@wuzz.demon.co.uk>
  *
  *	Stir fried together from the IP multicast and CAP patches above
- *		Alan Cox <Alan.Cox@linux.org>
+ *		Alan Cox <alan@lxorguk.ukuu.org.uk>
  *
  *	Fixes:
  *		Alan Cox	:	Update the device on a real delete
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b0dc818a91d7..f1d07b5c1e17 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -96,7 +96,7 @@ static void net_free(struct net *net)
 		return;
 	}
 #endif
-
+	kfree(net->gen);
 	kmem_cache_free(net_cachep, net);
 }
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a756847e3814..99f656d35b4f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2474,7 +2474,7 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
 				if (ret < 0) {
 					printk(KERN_ERR "Error expanding "
 					       "ipsec packet %d\n",ret);
-					return 0;
+					goto err;
 				}
 			}
 
@@ -2484,8 +2484,7 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
 			if (ret) {
 				printk(KERN_ERR "Error creating ipsec "
 				       "packet %d\n",ret);
-				kfree_skb(skb);
-				return 0;
+				goto err;
 			}
 			/* restore ll */
 			eth = (__u8 *) skb_push(skb, ETH_HLEN);
@@ -2494,6 +2493,9 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
 		}
 	}
 	return 1;
+err:
+	kfree_skb(skb);
+	return 0;
 }
 #endif
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3630131fa1fa..31f29d2989fd 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1040,7 +1040,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct nlattr *linkinfo[IFLA_INFO_MAX+1];
 	int err;
 
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 replay:
 #endif
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
@@ -1129,7 +1129,7 @@ replay:
 			return -EOPNOTSUPP;
 
 		if (!ops) {
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 			if (kind[0]) {
 				__rtnl_unlock();
 				request_module("rtnl-link-%s", kind);
diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
index 1f49afcd8e86..86234923a3b7 100644
--- a/net/core/skb_dma_map.c
+++ b/net/core/skb_dma_map.c
@@ -35,7 +35,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
 	return 0;
 
 unwind:
-	while (i-- >= 0) {
+	while (--i >= 0) {
 		skb_frag_t *fp = &sp->frags[i];
 
 		dma_unmap_page(dev, sp->dma_maps[i + 1],
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7f7bb1a636d9..4e22e3a35359 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1,7 +1,7 @@
 /*
  *	Routines having to do with the 'struct sk_buff' memory handlers.
  *
- *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
+ *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>
  *			Florian La Roche <rzsfl@rz.uni-sb.de>
  *
  *	Fixes:
diff --git a/net/core/stream.c b/net/core/stream.c
index a6b3437ff082..8727cead64ad 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -9,7 +9,7 @@
  *
  *     Authors:        Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *                     (from old tcp.c code)
- *                     Alan Cox <alan@redhat.com> (Borrowed comments 8-))
+ *                     Alan Cox <alan@lxorguk.ukuu.org.uk> (Borrowed comments 8-))
  */
 
 #include <linux/module.h>
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 4809753d12ae..8fe931a3d7a1 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -154,7 +154,7 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
 	struct ccid *ccid = NULL;
 
 	ccids_read_lock();
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 	if (ccids[id] == NULL) {
 		/* We only try to load if in process context */
 		ccids_read_unlock();
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 11062780bb02..d4ce1224e008 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -259,7 +259,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 	fl.fl6_flowlabel = 0;
 	fl.oif = ireq6->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-	fl.fl_ip_sport = inet_sk(sk)->sport;
+	fl.fl_ip_sport = inet_rsk(req)->loc_port;
 	security_req_classify_flow(req, &fl);
 
 	opt = np->opt;
@@ -558,7 +558,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-		fl.fl_ip_sport = inet_sk(sk)->sport;
+		fl.fl_ip_sport = inet_rsk(req)->loc_port;
 		security_sk_classify_flow(sk, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index b2804e2d1b8c..e6bf99e3e41a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -309,6 +309,7 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 
 	inet_rsk(req)->rmt_port	  = dccp_hdr(skb)->dccph_sport;
+	inet_rsk(req)->loc_port	  = dccp_hdr(skb)->dccph_dport;
 	inet_rsk(req)->acked	  = 0;
 	req->rcv_wnd		  = sysctl_dccp_feat_sequence_window;
 	dreq->dreq_timestamp_echo = 0;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d06945c7d3df..809d803d5006 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -347,7 +347,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	/* Build and checksum header */
 	dh = dccp_zeroed_hdr(skb, dccp_header_size);
 
-	dh->dccph_sport	= inet_sk(sk)->sport;
+	dh->dccph_sport	= inet_rsk(req)->loc_port;
 	dh->dccph_dport	= inet_rsk(req)->rmt_port;
 	dh->dccph_doff	= (dccp_header_size +
 			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 2f0ac3c3eb71..28e26bd08e24 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -152,7 +152,7 @@ static struct dn_dev_parms dn_dev_list[] =  {
 
 #define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list)
 
-#define DN_DEV_PARMS_OFFSET(x) ((int) ((char *) &((struct dn_dev_parms *)0)->x))
+#define DN_DEV_PARMS_OFFSET(x) offsetof(struct dn_dev_parms, x)
 
 #ifdef CONFIG_SYSCTL
 
@@ -166,7 +166,7 @@ static int max_priority[] = { 127 }; /* From DECnet spec */
 
 static int dn_forwarding_proc(ctl_table *, int, struct file *,
 			void __user *, size_t *, loff_t *);
-static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
+static int dn_forwarding_sysctl(ctl_table *table,
 			void __user *oldval, size_t __user *oldlenp,
 			void __user *newval, size_t newlen);
 
@@ -318,7 +318,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
 #endif
 }
 
-static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
+static int dn_forwarding_sysctl(ctl_table *table,
 			void __user *oldval, size_t __user *oldlenp,
 			void __user *newval, size_t newlen)
 {
@@ -490,9 +490,7 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg)
 		return -EFAULT;
 	ifr->ifr_name[IFNAMSIZ-1] = 0;
 
-#ifdef CONFIG_KMOD
 	dev_load(&init_net, ifr->ifr_name);
-#endif
 
 	switch(cmd) {
 		case SIOCGIFADDR:
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 228067c571ba..36400b266896 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str)
 }
 
 
-static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen,
+static int dn_node_address_strategy(ctl_table *table,
 				void __user *oldval, size_t __user *oldlenp,
 				void __user *newval, size_t newlen)
 {
@@ -217,7 +217,7 @@ static int dn_node_address_handler(ctl_table *table, int write,
 }
 
 
-static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen,
+static int dn_def_dev_strategy(ctl_table *table,
 				void __user *oldval, size_t __user *oldlenp,
 				void __user *newval, size_t newlen)
 {
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index cdce4c6c672a..49211b35725b 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,7 +1,7 @@
 menuconfig NET_DSA
 	bool "Distributed Switch Architecture support"
 	default n
-	depends on EXPERIMENTAL
+	depends on EXPERIMENTAL && !S390
 	select PHYLIB
 	---help---
 	  This allows you to use hardware switch chips that use
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b043eda60b04..1a9dd66511fc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -663,7 +663,7 @@ out:
 void arp_xmit(struct sk_buff *skb)
 {
 	/* Send it off, maybe filter it using firewalling first.  */
-	NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
+	NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
 }
 
 /*
@@ -928,7 +928,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
 
-	return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
+	return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
 
 freeskb:
 	kfree_skb(skb);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2c0e4572cc90..490e035c6d90 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -13,7 +13,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -47,17 +47,7 @@
 #include <asm/bug.h>
 #include <asm/unaligned.h>
 
-struct cipso_v4_domhsh_entry {
-	char *domain;
-	u32 valid;
-	struct list_head list;
-	struct rcu_head rcu;
-};
-
 /* List of available DOI definitions */
-/* XXX - Updates should be minimal so having a single lock for the
- * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be
- * okay. */
 /* XXX - This currently assumes a minimal number of different DOIs in use,
  * if in practice there are a lot of different DOIs this list should
  * probably be turned into a hash table or something similar so we
@@ -119,6 +109,19 @@ int cipso_v4_rbm_strictvalid = 1;
  * be omitted. */
 #define CIPSO_V4_TAG_RNG_CAT_MAX      8
 
+/* Base length of the local tag (non-standard tag).
+ *  Tag definition (may change between kernel versions)
+ *
+ * 0          8          16         24         32
+ * +----------+----------+----------+----------+
+ * | 10000000 | 00000110 | 32-bit secid value  |
+ * +----------+----------+----------+----------+
+ * | in (host byte order)|
+ * +----------+----------+
+ *
+ */
+#define CIPSO_V4_TAG_LOC_BLEN         6
+
 /*
  * Helper Functions
  */
@@ -194,25 +197,6 @@ static void cipso_v4_bitmap_setbit(unsigned char *bitmap,
 }
 
 /**
- * cipso_v4_doi_domhsh_free - Frees a domain list entry
- * @entry: the entry's RCU field
- *
- * Description:
- * This function is designed to be used as a callback to the call_rcu()
- * function so that the memory allocated to a domain list entry can be released
- * safely.
- *
- */
-static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
-{
-	struct cipso_v4_domhsh_entry *ptr;
-
-	ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu);
-	kfree(ptr->domain);
-	kfree(ptr);
-}
-
-/**
  * cipso_v4_cache_entry_free - Frees a cache entry
  * @entry: the entry to free
  *
@@ -457,7 +441,7 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
 	struct cipso_v4_doi *iter;
 
 	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
-		if (iter->doi == doi && iter->valid)
+		if (iter->doi == doi && atomic_read(&iter->refcount))
 			return iter;
 	return NULL;
 }
@@ -496,14 +480,17 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
 			if (doi_def->type != CIPSO_V4_MAP_PASS)
 				return -EINVAL;
 			break;
+		case CIPSO_V4_TAG_LOCAL:
+			if (doi_def->type != CIPSO_V4_MAP_LOCAL)
+				return -EINVAL;
+			break;
 		default:
 			return -EINVAL;
 		}
 	}
 
-	doi_def->valid = 1;
+	atomic_set(&doi_def->refcount, 1);
 	INIT_RCU_HEAD(&doi_def->rcu);
-	INIT_LIST_HEAD(&doi_def->dom_list);
 
 	spin_lock(&cipso_v4_doi_list_lock);
 	if (cipso_v4_doi_search(doi_def->doi) != NULL)
@@ -519,59 +506,129 @@ doi_add_failure:
 }
 
 /**
+ * cipso_v4_doi_free - Frees a DOI definition
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function frees all of the memory associated with a DOI definition.
+ *
+ */
+void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
+{
+	if (doi_def == NULL)
+		return;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_TRANS:
+		kfree(doi_def->map.std->lvl.cipso);
+		kfree(doi_def->map.std->lvl.local);
+		kfree(doi_def->map.std->cat.cipso);
+		kfree(doi_def->map.std->cat.local);
+		break;
+	}
+	kfree(doi_def);
+}
+
+/**
+ * cipso_v4_doi_free_rcu - Frees a DOI definition via the RCU pointer
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to the DOI definition can be released
+ * safely.
+ *
+ */
+static void cipso_v4_doi_free_rcu(struct rcu_head *entry)
+{
+	struct cipso_v4_doi *doi_def;
+
+	doi_def = container_of(entry, struct cipso_v4_doi, rcu);
+	cipso_v4_doi_free(doi_def);
+}
+
+/**
  * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
  * @doi: the DOI value
  * @audit_secid: the LSM secid to use in the audit message
- * @callback: the DOI cleanup/free callback
  *
  * Description:
- * Removes a DOI definition from the CIPSO engine, @callback is called to
- * free any memory.  The NetLabel routines will be called to release their own
- * LSM domain mappings as well as our own domain list.  Returns zero on
- * success and negative values on failure.
+ * Removes a DOI definition from the CIPSO engine.  The NetLabel routines will
+ * be called to release their own LSM domain mappings as well as our own
+ * domain list.  Returns zero on success and negative values on failure.
  *
  */
-int cipso_v4_doi_remove(u32 doi,
-			struct netlbl_audit *audit_info,
-			void (*callback) (struct rcu_head * head))
+int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info)
 {
 	struct cipso_v4_doi *doi_def;
-	struct cipso_v4_domhsh_entry *dom_iter;
 
 	spin_lock(&cipso_v4_doi_list_lock);
 	doi_def = cipso_v4_doi_search(doi);
-	if (doi_def != NULL) {
-		doi_def->valid = 0;
-		list_del_rcu(&doi_def->list);
+	if (doi_def == NULL) {
 		spin_unlock(&cipso_v4_doi_list_lock);
-		rcu_read_lock();
-		list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
-			if (dom_iter->valid)
-				netlbl_cfg_map_del(dom_iter->domain,
-						   audit_info);
-		rcu_read_unlock();
-		cipso_v4_cache_invalidate();
-		call_rcu(&doi_def->rcu, callback);
-		return 0;
+		return -ENOENT;
+	}
+	if (!atomic_dec_and_test(&doi_def->refcount)) {
+		spin_unlock(&cipso_v4_doi_list_lock);
+		return -EBUSY;
 	}
+	list_del_rcu(&doi_def->list);
 	spin_unlock(&cipso_v4_doi_list_lock);
 
-	return -ENOENT;
+	cipso_v4_cache_invalidate();
+	call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
+
+	return 0;
 }
 
 /**
- * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition
+ * cipso_v4_doi_getdef - Returns a reference to a valid DOI definition
  * @doi: the DOI value
  *
  * Description:
  * Searches for a valid DOI definition and if one is found it is returned to
  * the caller.  Otherwise NULL is returned.  The caller must ensure that
- * rcu_read_lock() is held while accessing the returned definition.
+ * rcu_read_lock() is held while accessing the returned definition and the DOI
+ * definition reference count is decremented when the caller is done.
  *
  */
 struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
 {
-	return cipso_v4_doi_search(doi);
+	struct cipso_v4_doi *doi_def;
+
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_search(doi);
+	if (doi_def == NULL)
+		goto doi_getdef_return;
+	if (!atomic_inc_not_zero(&doi_def->refcount))
+		doi_def = NULL;
+
+doi_getdef_return:
+	rcu_read_unlock();
+	return doi_def;
+}
+
+/**
+ * cipso_v4_doi_putdef - Releases a reference for the given DOI definition
+ * @doi_def: the DOI definition
+ *
+ * Description:
+ * Releases a DOI definition reference obtained from cipso_v4_doi_getdef().
+ *
+ */
+void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def)
+{
+	if (doi_def == NULL)
+		return;
+
+	if (!atomic_dec_and_test(&doi_def->refcount))
+		return;
+	spin_lock(&cipso_v4_doi_list_lock);
+	list_del_rcu(&doi_def->list);
+	spin_unlock(&cipso_v4_doi_list_lock);
+
+	cipso_v4_cache_invalidate();
+	call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
 }
 
 /**
@@ -597,7 +654,7 @@ int cipso_v4_doi_walk(u32 *skip_cnt,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list)
-		if (iter_doi->valid) {
+		if (atomic_read(&iter_doi->refcount) > 0) {
 			if (doi_cnt++ < *skip_cnt)
 				continue;
 			ret_val = callback(iter_doi, cb_arg);
@@ -613,85 +670,6 @@ doi_walk_return:
 	return ret_val;
 }
 
-/**
- * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition
- * @doi_def: the DOI definition
- * @domain: the domain to add
- *
- * Description:
- * Adds the @domain to the DOI specified by @doi_def, this function
- * should only be called by external functions (i.e. NetLabel).  This function
- * does allocate memory.  Returns zero on success, negative values on failure.
- *
- */
-int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain)
-{
-	struct cipso_v4_domhsh_entry *iter;
-	struct cipso_v4_domhsh_entry *new_dom;
-
-	new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL);
-	if (new_dom == NULL)
-		return -ENOMEM;
-	if (domain) {
-		new_dom->domain = kstrdup(domain, GFP_KERNEL);
-		if (new_dom->domain == NULL) {
-			kfree(new_dom);
-			return -ENOMEM;
-		}
-	}
-	new_dom->valid = 1;
-	INIT_RCU_HEAD(&new_dom->rcu);
-
-	spin_lock(&cipso_v4_doi_list_lock);
-	list_for_each_entry(iter, &doi_def->dom_list, list)
-		if (iter->valid &&
-		    ((domain != NULL && iter->domain != NULL &&
-		      strcmp(iter->domain, domain) == 0) ||
-		     (domain == NULL && iter->domain == NULL))) {
-			spin_unlock(&cipso_v4_doi_list_lock);
-			kfree(new_dom->domain);
-			kfree(new_dom);
-			return -EEXIST;
-		}
-	list_add_tail_rcu(&new_dom->list, &doi_def->dom_list);
-	spin_unlock(&cipso_v4_doi_list_lock);
-
-	return 0;
-}
-
-/**
- * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition
- * @doi_def: the DOI definition
- * @domain: the domain to remove
- *
- * Description:
- * Removes the @domain from the DOI specified by @doi_def, this function
- * should only be called by external functions (i.e. NetLabel).   Returns zero
- * on success and negative values on error.
- *
- */
-int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
-			       const char *domain)
-{
-	struct cipso_v4_domhsh_entry *iter;
-
-	spin_lock(&cipso_v4_doi_list_lock);
-	list_for_each_entry(iter, &doi_def->dom_list, list)
-		if (iter->valid &&
-		    ((domain != NULL && iter->domain != NULL &&
-		      strcmp(iter->domain, domain) == 0) ||
-		     (domain == NULL && iter->domain == NULL))) {
-			iter->valid = 0;
-			list_del_rcu(&iter->list);
-			spin_unlock(&cipso_v4_doi_list_lock);
-			call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free);
-			return 0;
-		}
-	spin_unlock(&cipso_v4_doi_list_lock);
-
-	return -ENOENT;
-}
-
 /*
  * Label Mapping Functions
  */
@@ -712,7 +690,7 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
 	switch (doi_def->type) {
 	case CIPSO_V4_MAP_PASS:
 		return 0;
-	case CIPSO_V4_MAP_STD:
+	case CIPSO_V4_MAP_TRANS:
 		if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
 			return 0;
 		break;
@@ -741,7 +719,7 @@ static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def,
 	case CIPSO_V4_MAP_PASS:
 		*net_lvl = host_lvl;
 		return 0;
-	case CIPSO_V4_MAP_STD:
+	case CIPSO_V4_MAP_TRANS:
 		if (host_lvl < doi_def->map.std->lvl.local_size &&
 		    doi_def->map.std->lvl.local[host_lvl] < CIPSO_V4_INV_LVL) {
 			*net_lvl = doi_def->map.std->lvl.local[host_lvl];
@@ -775,7 +753,7 @@ static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def,
 	case CIPSO_V4_MAP_PASS:
 		*host_lvl = net_lvl;
 		return 0;
-	case CIPSO_V4_MAP_STD:
+	case CIPSO_V4_MAP_TRANS:
 		map_tbl = doi_def->map.std;
 		if (net_lvl < map_tbl->lvl.cipso_size &&
 		    map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) {
@@ -812,7 +790,7 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
 	switch (doi_def->type) {
 	case CIPSO_V4_MAP_PASS:
 		return 0;
-	case CIPSO_V4_MAP_STD:
+	case CIPSO_V4_MAP_TRANS:
 		cipso_cat_size = doi_def->map.std->cat.cipso_size;
 		cipso_array = doi_def->map.std->cat.cipso;
 		for (;;) {
@@ -860,7 +838,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
 	u32 host_cat_size = 0;
 	u32 *host_cat_array = NULL;
 
-	if (doi_def->type == CIPSO_V4_MAP_STD) {
+	if (doi_def->type == CIPSO_V4_MAP_TRANS) {
 		host_cat_size = doi_def->map.std->cat.local_size;
 		host_cat_array = doi_def->map.std->cat.local;
 	}
@@ -875,7 +853,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
 		case CIPSO_V4_MAP_PASS:
 			net_spot = host_spot;
 			break;
-		case CIPSO_V4_MAP_STD:
+		case CIPSO_V4_MAP_TRANS:
 			if (host_spot >= host_cat_size)
 				return -EPERM;
 			net_spot = host_cat_array[host_spot];
@@ -921,7 +899,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
 	u32 net_cat_size = 0;
 	u32 *net_cat_array = NULL;
 
-	if (doi_def->type == CIPSO_V4_MAP_STD) {
+	if (doi_def->type == CIPSO_V4_MAP_TRANS) {
 		net_cat_size = doi_def->map.std->cat.cipso_size;
 		net_cat_array = doi_def->map.std->cat.cipso;
 	}
@@ -941,7 +919,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
 		case CIPSO_V4_MAP_PASS:
 			host_spot = net_spot;
 			break;
-		case CIPSO_V4_MAP_STD:
+		case CIPSO_V4_MAP_TRANS:
 			if (net_spot >= net_cat_size)
 				return -EPERM;
 			host_spot = net_cat_array[net_spot];
@@ -1277,7 +1255,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
 	} else
 		tag_len = 4;
 
-	buffer[0] = 0x01;
+	buffer[0] = CIPSO_V4_TAG_RBITMAP;
 	buffer[1] = tag_len;
 	buffer[3] = level;
 
@@ -1373,7 +1351,7 @@ static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def,
 	} else
 		tag_len = 4;
 
-	buffer[0] = 0x02;
+	buffer[0] = CIPSO_V4_TAG_ENUM;
 	buffer[1] = tag_len;
 	buffer[3] = level;
 
@@ -1469,7 +1447,7 @@ static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def,
 	} else
 		tag_len = 4;
 
-	buffer[0] = 0x05;
+	buffer[0] = CIPSO_V4_TAG_RANGE;
 	buffer[1] = tag_len;
 	buffer[3] = level;
 
@@ -1523,6 +1501,54 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
 }
 
 /**
+ * cipso_v4_gentag_loc - Generate a CIPSO local tag (non-standard)
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @buffer: the option buffer
+ * @buffer_len: length of buffer in bytes
+ *
+ * Description:
+ * Generate a CIPSO option using the local tag.  Returns the size of the tag
+ * on success, negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_loc(const struct cipso_v4_doi *doi_def,
+			       const struct netlbl_lsm_secattr *secattr,
+			       unsigned char *buffer,
+			       u32 buffer_len)
+{
+	if (!(secattr->flags & NETLBL_SECATTR_SECID))
+		return -EPERM;
+
+	buffer[0] = CIPSO_V4_TAG_LOCAL;
+	buffer[1] = CIPSO_V4_TAG_LOC_BLEN;
+	*(u32 *)&buffer[2] = secattr->attr.secid;
+
+	return CIPSO_V4_TAG_LOC_BLEN;
+}
+
+/**
+ * cipso_v4_parsetag_loc - Parse a CIPSO local tag
+ * @doi_def: the DOI definition
+ * @tag: the CIPSO tag
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse a CIPSO local tag and return the security attributes in @secattr.
+ * Return zero on success, negatives values on failure.
+ *
+ */
+static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def,
+				 const unsigned char *tag,
+				 struct netlbl_lsm_secattr *secattr)
+{
+	secattr->attr.secid = *(u32 *)&tag[2];
+	secattr->flags |= NETLBL_SECATTR_SECID;
+
+	return 0;
+}
+
+/**
  * cipso_v4_validate - Validate a CIPSO option
  * @option: the start of the option, on error it is set to point to the error
  *
@@ -1541,7 +1567,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
  *   that is unrecognized."
  *
  */
-int cipso_v4_validate(unsigned char **option)
+int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
 {
 	unsigned char *opt = *option;
 	unsigned char *tag;
@@ -1566,7 +1592,7 @@ int cipso_v4_validate(unsigned char **option)
 		goto validate_return_locked;
 	}
 
-	opt_iter = 6;
+	opt_iter = CIPSO_V4_HDR_LEN;
 	tag = opt + opt_iter;
 	while (opt_iter < opt_len) {
 		for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];)
@@ -1584,7 +1610,7 @@ int cipso_v4_validate(unsigned char **option)
 
 		switch (tag[0]) {
 		case CIPSO_V4_TAG_RBITMAP:
-			if (tag_len < 4) {
+			if (tag_len < CIPSO_V4_TAG_RBM_BLEN) {
 				err_offset = opt_iter + 1;
 				goto validate_return_locked;
 			}
@@ -1602,7 +1628,7 @@ int cipso_v4_validate(unsigned char **option)
 					err_offset = opt_iter + 3;
 					goto validate_return_locked;
 				}
-				if (tag_len > 4 &&
+				if (tag_len > CIPSO_V4_TAG_RBM_BLEN &&
 				    cipso_v4_map_cat_rbm_valid(doi_def,
 							    &tag[4],
 							    tag_len - 4) < 0) {
@@ -1612,7 +1638,7 @@ int cipso_v4_validate(unsigned char **option)
 			}
 			break;
 		case CIPSO_V4_TAG_ENUM:
-			if (tag_len < 4) {
+			if (tag_len < CIPSO_V4_TAG_ENUM_BLEN) {
 				err_offset = opt_iter + 1;
 				goto validate_return_locked;
 			}
@@ -1622,7 +1648,7 @@ int cipso_v4_validate(unsigned char **option)
 				err_offset = opt_iter + 3;
 				goto validate_return_locked;
 			}
-			if (tag_len > 4 &&
+			if (tag_len > CIPSO_V4_TAG_ENUM_BLEN &&
 			    cipso_v4_map_cat_enum_valid(doi_def,
 							&tag[4],
 							tag_len - 4) < 0) {
@@ -1631,7 +1657,7 @@ int cipso_v4_validate(unsigned char **option)
 			}
 			break;
 		case CIPSO_V4_TAG_RANGE:
-			if (tag_len < 4) {
+			if (tag_len < CIPSO_V4_TAG_RNG_BLEN) {
 				err_offset = opt_iter + 1;
 				goto validate_return_locked;
 			}
@@ -1641,7 +1667,7 @@ int cipso_v4_validate(unsigned char **option)
 				err_offset = opt_iter + 3;
 				goto validate_return_locked;
 			}
-			if (tag_len > 4 &&
+			if (tag_len > CIPSO_V4_TAG_RNG_BLEN &&
 			    cipso_v4_map_cat_rng_valid(doi_def,
 						       &tag[4],
 						       tag_len - 4) < 0) {
@@ -1649,6 +1675,19 @@ int cipso_v4_validate(unsigned char **option)
 				goto validate_return_locked;
 			}
 			break;
+		case CIPSO_V4_TAG_LOCAL:
+			/* This is a non-standard tag that we only allow for
+			 * local connections, so if the incoming interface is
+			 * not the loopback device drop the packet. */
+			if (!(skb->dev->flags & IFF_LOOPBACK)) {
+				err_offset = opt_iter;
+				goto validate_return_locked;
+			}
+			if (tag_len != CIPSO_V4_TAG_LOC_BLEN) {
+				err_offset = opt_iter + 1;
+				goto validate_return_locked;
+			}
+			break;
 		default:
 			err_offset = opt_iter;
 			goto validate_return_locked;
@@ -1704,48 +1743,27 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
 }
 
 /**
- * cipso_v4_sock_setattr - Add a CIPSO option to a socket
- * @sk: the socket
+ * cipso_v4_genopt - Generate a CIPSO option
+ * @buf: the option buffer
+ * @buf_len: the size of opt_buf
  * @doi_def: the CIPSO DOI to use
- * @secattr: the specific security attributes of the socket
+ * @secattr: the security attributes
  *
  * Description:
- * Set the CIPSO option on the given socket using the DOI definition and
- * security attributes passed to the function.  This function requires
- * exclusive access to @sk, which means it either needs to be in the
- * process of being created or locked.  Returns zero on success and negative
- * values on failure.
+ * Generate a CIPSO option using the DOI definition and security attributes
+ * passed to the function.  Returns the length of the option on success and
+ * negative values on failure.
  *
  */
-int cipso_v4_sock_setattr(struct sock *sk,
-			  const struct cipso_v4_doi *doi_def,
-			  const struct netlbl_lsm_secattr *secattr)
+static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
+			   const struct cipso_v4_doi *doi_def,
+			   const struct netlbl_lsm_secattr *secattr)
 {
-	int ret_val = -EPERM;
+	int ret_val;
 	u32 iter;
-	unsigned char *buf;
-	u32 buf_len = 0;
-	u32 opt_len;
-	struct ip_options *opt = NULL;
-	struct inet_sock *sk_inet;
-	struct inet_connection_sock *sk_conn;
 
-	/* In the case of sock_create_lite(), the sock->sk field is not
-	 * defined yet but it is not a problem as the only users of these
-	 * "lite" PF_INET sockets are functions which do an accept() call
-	 * afterwards so we will label the socket as part of the accept(). */
-	if (sk == NULL)
-		return 0;
-
-	/* We allocate the maximum CIPSO option size here so we are probably
-	 * being a little wasteful, but it makes our life _much_ easier later
-	 * on and after all we are only talking about 40 bytes. */
-	buf_len = CIPSO_V4_OPT_LEN_MAX;
-	buf = kmalloc(buf_len, GFP_ATOMIC);
-	if (buf == NULL) {
-		ret_val = -ENOMEM;
-		goto socket_setattr_failure;
-	}
+	if (buf_len <= CIPSO_V4_HDR_LEN)
+		return -ENOSPC;
 
 	/* XXX - This code assumes only one tag per CIPSO option which isn't
 	 * really a good assumption to make but since we only support the MAC
@@ -1772,9 +1790,14 @@ int cipso_v4_sock_setattr(struct sock *sk,
 						   &buf[CIPSO_V4_HDR_LEN],
 						   buf_len - CIPSO_V4_HDR_LEN);
 			break;
+		case CIPSO_V4_TAG_LOCAL:
+			ret_val = cipso_v4_gentag_loc(doi_def,
+						   secattr,
+						   &buf[CIPSO_V4_HDR_LEN],
+						   buf_len - CIPSO_V4_HDR_LEN);
+			break;
 		default:
-			ret_val = -EPERM;
-			goto socket_setattr_failure;
+			return -EPERM;
 		}
 
 		iter++;
@@ -1782,9 +1805,58 @@ int cipso_v4_sock_setattr(struct sock *sk,
 		 iter < CIPSO_V4_TAG_MAXCNT &&
 		 doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
 	if (ret_val < 0)
-		goto socket_setattr_failure;
+		return ret_val;
 	cipso_v4_gentag_hdr(doi_def, buf, ret_val);
-	buf_len = CIPSO_V4_HDR_LEN + ret_val;
+	return CIPSO_V4_HDR_LEN + ret_val;
+}
+
+/**
+ * cipso_v4_sock_setattr - Add a CIPSO option to a socket
+ * @sk: the socket
+ * @doi_def: the CIPSO DOI to use
+ * @secattr: the specific security attributes of the socket
+ *
+ * Description:
+ * Set the CIPSO option on the given socket using the DOI definition and
+ * security attributes passed to the function.  This function requires
+ * exclusive access to @sk, which means it either needs to be in the
+ * process of being created or locked.  Returns zero on success and negative
+ * values on failure.
+ *
+ */
+int cipso_v4_sock_setattr(struct sock *sk,
+			  const struct cipso_v4_doi *doi_def,
+			  const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -EPERM;
+	unsigned char *buf = NULL;
+	u32 buf_len;
+	u32 opt_len;
+	struct ip_options *opt = NULL;
+	struct inet_sock *sk_inet;
+	struct inet_connection_sock *sk_conn;
+
+	/* In the case of sock_create_lite(), the sock->sk field is not
+	 * defined yet but it is not a problem as the only users of these
+	 * "lite" PF_INET sockets are functions which do an accept() call
+	 * afterwards so we will label the socket as part of the accept(). */
+	if (sk == NULL)
+		return 0;
+
+	/* We allocate the maximum CIPSO option size here so we are probably
+	 * being a little wasteful, but it makes our life _much_ easier later
+	 * on and after all we are only talking about 40 bytes. */
+	buf_len = CIPSO_V4_OPT_LEN_MAX;
+	buf = kmalloc(buf_len, GFP_ATOMIC);
+	if (buf == NULL) {
+		ret_val = -ENOMEM;
+		goto socket_setattr_failure;
+	}
+
+	ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
+	if (ret_val < 0)
+		goto socket_setattr_failure;
+	buf_len = ret_val;
 
 	/* We can't use ip_options_get() directly because it makes a call to
 	 * ip_options_get_alloc() which allocates memory with GFP_KERNEL and
@@ -1822,6 +1894,80 @@ socket_setattr_failure:
 }
 
 /**
+ * cipso_v4_sock_delattr - Delete the CIPSO option from a socket
+ * @sk: the socket
+ *
+ * Description:
+ * Removes the CIPSO option from a socket, if present.
+ *
+ */
+void cipso_v4_sock_delattr(struct sock *sk)
+{
+	u8 hdr_delta;
+	struct ip_options *opt;
+	struct inet_sock *sk_inet;
+
+	sk_inet = inet_sk(sk);
+	opt = sk_inet->opt;
+	if (opt == NULL || opt->cipso == 0)
+		return;
+
+	if (opt->srr || opt->rr || opt->ts || opt->router_alert) {
+		u8 cipso_len;
+		u8 cipso_off;
+		unsigned char *cipso_ptr;
+		int iter;
+		int optlen_new;
+
+		cipso_off = opt->cipso - sizeof(struct iphdr);
+		cipso_ptr = &opt->__data[cipso_off];
+		cipso_len = cipso_ptr[1];
+
+		if (opt->srr > opt->cipso)
+			opt->srr -= cipso_len;
+		if (opt->rr > opt->cipso)
+			opt->rr -= cipso_len;
+		if (opt->ts > opt->cipso)
+			opt->ts -= cipso_len;
+		if (opt->router_alert > opt->cipso)
+			opt->router_alert -= cipso_len;
+		opt->cipso = 0;
+
+		memmove(cipso_ptr, cipso_ptr + cipso_len,
+			opt->optlen - cipso_off - cipso_len);
+
+		/* determining the new total option length is tricky because of
+		 * the padding necessary, the only thing i can think to do at
+		 * this point is walk the options one-by-one, skipping the
+		 * padding at the end to determine the actual option size and
+		 * from there we can determine the new total option length */
+		iter = 0;
+		optlen_new = 0;
+		while (iter < opt->optlen)
+			if (opt->__data[iter] != IPOPT_NOP) {
+				iter += opt->__data[iter + 1];
+				optlen_new = iter;
+			} else
+				iter++;
+		hdr_delta = opt->optlen;
+		opt->optlen = (optlen_new + 3) & ~3;
+		hdr_delta -= opt->optlen;
+	} else {
+		/* only the cipso option was present on the socket so we can
+		 * remove the entire option struct */
+		sk_inet->opt = NULL;
+		hdr_delta = opt->optlen;
+		kfree(opt);
+	}
+
+	if (sk_inet->is_icsk && hdr_delta > 0) {
+		struct inet_connection_sock *sk_conn = inet_csk(sk);
+		sk_conn->icsk_ext_hdr_len -= hdr_delta;
+		sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+	}
+}
+
+/**
  * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions
  * @cipso: the CIPSO v4 option
  * @secattr: the security attributes
@@ -1859,6 +2005,9 @@ static int cipso_v4_getattr(const unsigned char *cipso,
 	case CIPSO_V4_TAG_RANGE:
 		ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr);
 		break;
+	case CIPSO_V4_TAG_LOCAL:
+		ret_val = cipso_v4_parsetag_loc(doi_def, &cipso[6], secattr);
+		break;
 	}
 	if (ret_val == 0)
 		secattr->type = NETLBL_NLTYPE_CIPSOV4;
@@ -1893,6 +2042,123 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
 }
 
 /**
+ * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Set the CIPSO option on the given packet based on the security attributes.
+ * Returns a pointer to the IP header on success and NULL on failure.
+ *
+ */
+int cipso_v4_skbuff_setattr(struct sk_buff *skb,
+			    const struct cipso_v4_doi *doi_def,
+			    const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+	struct iphdr *iph;
+	struct ip_options *opt = &IPCB(skb)->opt;
+	unsigned char buf[CIPSO_V4_OPT_LEN_MAX];
+	u32 buf_len = CIPSO_V4_OPT_LEN_MAX;
+	u32 opt_len;
+	int len_delta;
+
+	buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
+	if (buf_len < 0)
+		return buf_len;
+	opt_len = (buf_len + 3) & ~3;
+
+	/* we overwrite any existing options to ensure that we have enough
+	 * room for the CIPSO option, the reason is that we _need_ to guarantee
+	 * that the security label is applied to the packet - we do the same
+	 * thing when using the socket options and it hasn't caused a problem,
+	 * if we need to we can always revisit this choice later */
+
+	len_delta = opt_len - opt->optlen;
+	/* if we don't ensure enough headroom we could panic on the skb_push()
+	 * call below so make sure we have enough, we are also "mangling" the
+	 * packet so we should probably do a copy-on-write call anyway */
+	ret_val = skb_cow(skb, skb_headroom(skb) + len_delta);
+	if (ret_val < 0)
+		return ret_val;
+
+	if (len_delta > 0) {
+		/* we assume that the header + opt->optlen have already been
+		 * "pushed" in ip_options_build() or similar */
+		iph = ip_hdr(skb);
+		skb_push(skb, len_delta);
+		memmove((char *)iph - len_delta, iph, iph->ihl << 2);
+		skb_reset_network_header(skb);
+		iph = ip_hdr(skb);
+	} else if (len_delta < 0) {
+		iph = ip_hdr(skb);
+		memset(iph + 1, IPOPT_NOP, opt->optlen);
+	} else
+		iph = ip_hdr(skb);
+
+	if (opt->optlen > 0)
+		memset(opt, 0, sizeof(*opt));
+	opt->optlen = opt_len;
+	opt->cipso = sizeof(struct iphdr);
+	opt->is_changed = 1;
+
+	/* we have to do the following because we are being called from a
+	 * netfilter hook which means the packet already has had the header
+	 * fields populated and the checksum calculated - yes this means we
+	 * are doing more work than needed but we do it to keep the core
+	 * stack clean and tidy */
+	memcpy(iph + 1, buf, buf_len);
+	if (opt_len > buf_len)
+		memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len);
+	if (len_delta != 0) {
+		iph->ihl = 5 + (opt_len >> 2);
+		iph->tot_len = htons(skb->len);
+	}
+	ip_send_check(iph);
+
+	return 0;
+}
+
+/**
+ * cipso_v4_skbuff_delattr - Delete any CIPSO options from a packet
+ * @skb: the packet
+ *
+ * Description:
+ * Removes any and all CIPSO options from the given packet.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+int cipso_v4_skbuff_delattr(struct sk_buff *skb)
+{
+	int ret_val;
+	struct iphdr *iph;
+	struct ip_options *opt = &IPCB(skb)->opt;
+	unsigned char *cipso_ptr;
+
+	if (opt->cipso == 0)
+		return 0;
+
+	/* since we are changing the packet we should make a copy */
+	ret_val = skb_cow(skb, skb_headroom(skb));
+	if (ret_val < 0)
+		return ret_val;
+
+	/* the easiest thing to do is just replace the cipso option with noop
+	 * options since we don't change the size of the packet, although we
+	 * still need to recalculate the checksum */
+
+	iph = ip_hdr(skb);
+	cipso_ptr = (unsigned char *)iph + opt->cipso;
+	memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]);
+	opt->cipso = 0;
+	opt->is_changed = 1;
+
+	ip_send_check(iph);
+
+	return 0;
+}
+
+/**
  * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
  * @skb: the packet
  * @secattr: the security attributes
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index b12dae2b0b2d..56fce3ab6c55 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -613,9 +613,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	if (colon)
 		*colon = 0;
 
-#ifdef CONFIG_KMOD
 	dev_load(net, ifr.ifr_name);
-#endif
 
 	switch (cmd) {
 	case SIOCGIFADDR:	/* Get interface address */
@@ -1283,7 +1281,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
 	return ret;
 }
 
-static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
+static int devinet_conf_sysctl(ctl_table *table,
 			       void __user *oldval, size_t __user *oldlenp,
 			       void __user *newval, size_t newlen)
 {
@@ -1379,12 +1377,11 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
 	return ret;
 }
 
-int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
+int ipv4_doint_and_flush_strategy(ctl_table *table,
 				  void __user *oldval, size_t __user *oldlenp,
 				  void __user *newval, size_t newlen)
 {
-	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
-				      newval, newlen);
+	int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
 	struct net *net = table->extra2;
 
 	if (ret == 1)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 55c355e63234..72b2de76f1cd 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1,7 +1,7 @@
 /*
  *	NET3:	Implementation of the ICMP protocol layer.
  *
- *		Alan Cox, <alan@redhat.com>
+ *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7f9e337e3908..a0d86455c53e 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -9,7 +9,7 @@
  *	seems to fall out with gcc 2.6.2.
  *
  *	Authors:
- *		Alan Cox <Alan.Cox@linux.org>
+ *		Alan Cox <alan@lxorguk.ukuu.org.uk>
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 89cb047ab314..564230dabcb8 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -53,11 +53,9 @@ static DEFINE_MUTEX(inet_diag_table_mutex);
 
 static const struct inet_diag_handler *inet_diag_lock_handler(int type)
 {
-#ifdef CONFIG_KMOD
 	if (!inet_diag_table[type])
 		request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
 			       NETLINK_INET_DIAG, type);
-#endif
 
 	mutex_lock(&inet_diag_table_mutex);
 	if (!inet_diag_table[type])
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2152d222b954..e4f81f54befe 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -6,7 +6,7 @@
  *		The IP fragmentation functionality.
  *
  * Authors:	Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
- *		Alan Cox <Alan.Cox@linux.org>
+ *		Alan Cox <alan@lxorguk.ukuu.org.uk>
  *
  * Fixes:
  *		Alan Cox	:	Split from ip.c , see ip_input.c for history.
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e0bed56c51f1..861978a4f1a8 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -8,7 +8,7 @@
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Donald Becker, <becker@super.org>
- *		Alan Cox, <Alan.Cox@linux.org>
+ *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *		Richard Underwood
  *		Stefan Becker, <stefanb@yello.ping.de>
  *		Jorge Cwik, <jorge@laser.satlink.net>
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index be3f18a7a40e..2c88da6e7862 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -438,7 +438,7 @@ int ip_options_compile(struct net *net,
 				goto error;
 			}
 			opt->cipso = optptr - iph;
-			if (cipso_v4_validate(&optptr)) {
+			if (cipso_v4_validate(skb, &optptr)) {
 				pp_ptr = optptr;
 				goto error;
 			}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 4c6d2caf9203..29609d29df76 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -41,7 +41,7 @@
 		Made the tunnels use dev->name not tunnel: when error reporting.
 		Added tx_dropped stat
 
-		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
+		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
 
 	Reworked:
 		Changed to tunnel to destination gateway in addition to the
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c519b8d30eee..b42e082cc170 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1,7 +1,7 @@
 /*
  *	IP multicast routing support for mrouted 3.6/3.8
  *
- *		(c) 1995 Alan Cox, <alan@redhat.com>
+ *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *	  Linux Consultancy and Custom Driver Development
  *
  *	This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index aa2c50a180f7..fa2d6b6fc3e5 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -41,12 +41,13 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 					  int (*okfn)(struct sk_buff *))
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
 	/* Previously seen (loopback)?  Ignore.  Do this before
 	   fragment check. */
 	if (skb->nfct)
 		return NF_ACCEPT;
 #endif
-
+#endif
 	/* Gather fragments. */
 	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 		if (nf_ct_ipv4_gather_frags(skb,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 2ac9eaf1a8c9..a65cf692359f 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -584,6 +584,98 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
 	.flags		= NF_CT_EXT_F_PREALLOC,
 };
 
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
+	[CTA_PROTONAT_PORT_MIN]	= { .type = NLA_U16 },
+	[CTA_PROTONAT_PORT_MAX]	= { .type = NLA_U16 },
+};
+
+static int nfnetlink_parse_nat_proto(struct nlattr *attr,
+				     const struct nf_conn *ct,
+				     struct nf_nat_range *range)
+{
+	struct nlattr *tb[CTA_PROTONAT_MAX+1];
+	const struct nf_nat_protocol *npt;
+	int err;
+
+	err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
+	if (err < 0)
+		return err;
+
+	npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
+	if (npt->nlattr_to_range)
+		err = npt->nlattr_to_range(tb, range);
+	nf_nat_proto_put(npt);
+	return err;
+}
+
+static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
+	[CTA_NAT_MINIP]		= { .type = NLA_U32 },
+	[CTA_NAT_MAXIP]		= { .type = NLA_U32 },
+};
+
+static int
+nfnetlink_parse_nat(struct nlattr *nat,
+		    const struct nf_conn *ct, struct nf_nat_range *range)
+{
+	struct nlattr *tb[CTA_NAT_MAX+1];
+	int err;
+
+	memset(range, 0, sizeof(*range));
+
+	err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[CTA_NAT_MINIP])
+		range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
+
+	if (!tb[CTA_NAT_MAXIP])
+		range->max_ip = range->min_ip;
+	else
+		range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
+
+	if (range->min_ip)
+		range->flags |= IP_NAT_RANGE_MAP_IPS;
+
+	if (!tb[CTA_NAT_PROTO])
+		return 0;
+
+	err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+			  enum nf_nat_manip_type manip,
+			  struct nlattr *attr)
+{
+	struct nf_nat_range range;
+
+	if (nfnetlink_parse_nat(attr, ct, &range) < 0)
+		return -EINVAL;
+	if (nf_nat_initialized(ct, manip))
+		return -EEXIST;
+
+	return nf_nat_setup_info(ct, &range, manip);
+}
+#else
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+			  enum nf_nat_manip_type manip,
+			  struct nlattr *attr)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 static int __net_init nf_nat_net_init(struct net *net)
 {
 	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
@@ -654,6 +746,9 @@ static int __init nf_nat_init(void)
 
 	BUG_ON(nf_nat_seq_adjust_hook != NULL);
 	rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
+	BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
+	rcu_assign_pointer(nfnetlink_parse_nat_setup_hook,
+			   nfnetlink_parse_nat_setup);
 	return 0;
 
  cleanup_extend:
@@ -667,10 +762,12 @@ static void __exit nf_nat_cleanup(void)
 	nf_ct_l3proto_put(l3proto);
 	nf_ct_extend_unregister(&nat_extend);
 	rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
+	rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL);
 	synchronize_net();
 }
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-ipv4");
 
 module_init(nf_nat_init);
 module_exit(nf_nat_cleanup);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ffeaffc3fffe..8303e4b406c0 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -742,6 +742,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 			*obj = kmalloc(sizeof(struct snmp_object) + len,
 				       GFP_ATOMIC);
 			if (*obj == NULL) {
+				kfree(p);
 				kfree(id);
 				if (net_ratelimit())
 					printk("OOM in bsalg (%d)\n", __LINE__);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a6d7c584f53b..2ea6dcc3e2cc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1109,7 +1109,12 @@ restart:
 		printk("\n");
 	}
 #endif
-	rt_hash_table[hash].chain = rt;
+	/*
+	 * Since lookup is lockfree, we must make sure
+	 * previous writes to rt are comitted to memory
+	 * before making rt visible to other CPUS.
+	 */
+	rcu_assign_pointer(rt_hash_table[hash].chain, rt);
 	spin_unlock_bh(rt_hash_lock_addr(hash));
 	*rp = rt;
 	return 0;
@@ -2908,8 +2913,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 }
 
 static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
-						int __user *name,
-						int nlen,
 						void __user *oldval,
 						size_t __user *oldlenp,
 						void __user *newval,
@@ -2972,16 +2975,13 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
 }
 
 static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table,
-						   int __user *name,
-						   int nlen,
 						   void __user *oldval,
 						   size_t __user *oldlenp,
 						   void __user *newval,
 						   size_t newlen)
 {
 	int old = ip_rt_secret_interval;
-	int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval,
-				 newlen);
+	int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen);
 
 	rt_secret_reschedule(old);
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 276d047fb85a..1bb10df8ce7d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -64,8 +64,8 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
 }
 
 /* Validate changes from sysctl interface. */
-static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
-					 int nlen, void __user *oldval,
+static int ipv4_sysctl_local_port_range(ctl_table *table,
+					 void __user *oldval,
 					 size_t __user *oldlenp,
 					void __user *newval, size_t newlen)
 {
@@ -80,7 +80,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
 	};
 
 	inet_get_local_port_range(range, range + 1);
-	ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen);
+	ret = sysctl_intvec(&tmp, oldval, oldlenp, newval, newlen);
 	if (ret == 0 && newval && newlen) {
 		if (range[1] < range[0])
 			ret = -EINVAL;
@@ -109,8 +109,8 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
 	return ret;
 }
 
-static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
-					 int nlen, void __user *oldval,
+static int sysctl_tcp_congestion_control(ctl_table *table,
+					 void __user *oldval,
 					 size_t __user *oldlenp,
 					 void __user *newval, size_t newlen)
 {
@@ -122,7 +122,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
 	int ret;
 
 	tcp_get_default_congestion_control(val);
-	ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
+	ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen);
 	if (ret == 1 && newval && newlen)
 		ret = tcp_set_default_congestion_control(val);
 	return ret;
@@ -165,8 +165,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
 	return ret;
 }
 
-static int strategy_allowed_congestion_control(ctl_table *table, int __user *name,
-					       int nlen, void __user *oldval,
+static int strategy_allowed_congestion_control(ctl_table *table,
+					       void __user *oldval,
 					       size_t __user *oldlenp,
 					       void __user *newval,
 					       size_t newlen)
@@ -179,7 +179,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam
 		return -ENOMEM;
 
 	tcp_get_available_congestion_control(tbl.data, tbl.maxlen);
-	ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
+	ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen);
 	if (ret == 1 && newval && newlen)
 		ret = tcp_set_allowed_congestion_control(tbl.data);
 	kfree(tbl.data);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 6a250828b767..4ec5b4e97c4e 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -115,7 +115,7 @@ int tcp_set_default_congestion_control(const char *name)
 
 	spin_lock(&tcp_cong_list_lock);
 	ca = tcp_ca_find(name);
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 	if (!ca && capable(CAP_SYS_MODULE)) {
 		spin_unlock(&tcp_cong_list_lock);
 
@@ -244,7 +244,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 	if (ca == icsk->icsk_ca_ops)
 		goto out;
 
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 	/* not found attempt to autoload module */
 	if (!ca && capable(CAP_SYS_MODULE)) {
 		rcu_read_unlock();
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index eacf4cfef146..2095abc3caba 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -8,7 +8,7 @@
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- *		Alan Cox, <Alan.Cox@linux.org>
+ *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *		Hirokazu Takahashi, <taka@valinux.co.jp>
  *
  * Fixes:
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7b6a584b62dd..eea9542728ca 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3982,7 +3982,6 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
 }
 
 static int addrconf_sysctl_forward_strategy(ctl_table *table,
-					    int __user *name, int nlen,
 					    void __user *oldval,
 					    size_t __user *oldlenp,
 					    void __user *newval, size_t newlen)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 050e14b7f701..01edac888510 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -834,7 +834,7 @@ static void __net_exit ipv6_cleanup_mibs(struct net *net)
 	snmp_mib_free((void **)net->mib.icmpv6msg_statistics);
 }
 
-static int inet6_net_init(struct net *net)
+static int __net_init inet6_net_init(struct net *net)
 {
 	int err = 0;
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 840b15780a36..172438320eec 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1199,7 +1199,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		}
 		neigh->flags |= NTF_ROUTER;
 	} else if (rt) {
-		rt->rt6i_flags |= (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+		rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 	}
 
 	if (rt)
@@ -1730,9 +1730,8 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
 	return ret;
 }
 
-int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
-				 int nlen, void __user *oldval,
-				 size_t __user *oldlenp,
+int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl,
+				 void __user *oldval, size_t __user *oldlenp,
 				 void __user *newval, size_t newlen)
 {
 	struct net_device *dev = ctl->extra1;
@@ -1745,13 +1744,11 @@ int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
 
 	switch (ctl->ctl_name) {
 	case NET_NEIGH_REACHABLE_TIME:
-		ret = sysctl_jiffies(ctl, name, nlen,
-				     oldval, oldlenp, newval, newlen);
+		ret = sysctl_jiffies(ctl, oldval, oldlenp, newval, newlen);
 		break;
 	case NET_NEIGH_RETRANS_TIME_MS:
 	case NET_NEIGH_REACHABLE_TIME_MS:
-		 ret = sysctl_ms_jiffies(ctl, name, nlen,
-					 oldval, oldlenp, newval, newlen);
+		 ret = sysctl_ms_jiffies(ctl, oldval, oldlenp, newval, newlen);
 		 break;
 	default:
 		ret = 0;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 6b29b03925f1..fd5b3a4e3329 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -12,6 +12,7 @@
 
 int ip6_route_me_harder(struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb->dst->dev);
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct dst_entry *dst;
 	struct flowi fl = {
@@ -23,7 +24,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
 		    .saddr = iph->saddr, } },
 	};
 
-	dst = ip6_route_output(dev_net(skb->dst->dev), skb->sk, &fl);
+	dst = ip6_route_output(net, skb->sk, &fl);
 
 #ifdef CONFIG_XFRM
 	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -33,8 +34,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
 #endif
 
 	if (dst->error) {
-		IP6_INC_STATS(&init_net, ip6_dst_idev(dst),
-			      IPSTATS_MIB_OUTNOROUTES);
+		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 		LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
 		dst_release(dst);
 		return -EINVAL;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index ec394cf5a19b..676c80b5b14b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -204,6 +204,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	req->mss = mss;
 	ireq->rmt_port = th->source;
+	ireq->loc_port = th->dest;
 	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
 	if (ipv6_opt_accepted(sk, skb) ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e5310c9b84dc..b6b356b7912a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -476,7 +476,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
 	fl.fl6_flowlabel = 0;
 	fl.oif = treq->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-	fl.fl_ip_sport = inet_sk(sk)->sport;
+	fl.fl_ip_sport = inet_rsk(req)->loc_port;
 	security_req_classify_flow(req, &fl);
 
 	opt = np->opt;
@@ -1309,7 +1309,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-		fl.fl_ip_sport = inet_sk(sk)->sport;
+		fl.fl_ip_sport = inet_rsk(req)->loc_port;
 		security_req_classify_flow(req, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
@@ -1865,7 +1865,7 @@ static void get_openreq6(struct seq_file *seq,
 		   i,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3],
-		   ntohs(inet_sk(sk)->sport),
+		   ntohs(inet_rsk(req)->loc_port),
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3],
 		   ntohs(inet_rsk(req)->rmt_port),
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 2a4515623776..2ad504fc3414 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -545,8 +545,12 @@ static int netdev_notify(struct notifier_block *nb,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	sprintf(buf, "netdev:%s", dev->name);
 	dir = sdata->debugfsdir;
+
+	if (!dir)
+		return 0;
+
+	sprintf(buf, "netdev:%s", dev->name);
 	if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf))
 		printk(KERN_ERR "mac80211: debugfs: failed to rename debugfs "
 		       "dir to %s\n", buf);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index b9902e425f09..189d0bafa91a 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -249,11 +249,22 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
 	DECLARE_MAC_BUF(mbuf);
 	u8 *mac;
 
+	sta->debugfs.add_has_run = true;
+
 	if (!stations_dir)
 		return;
 
 	mac = print_mac(mbuf, sta->sta.addr);
 
+	/*
+	 * This might fail due to a race condition:
+	 * When mac80211 unlinks a station, the debugfs entries
+	 * remain, but it is already possible to link a new
+	 * station with the same address which triggers adding
+	 * it to debugfs; therefore, if the old station isn't
+	 * destroyed quickly enough the old station's debugfs
+	 * dir might still be around.
+	 */
 	sta->debugfs.dir = debugfs_create_dir(mac, stations_dir);
 	if (!sta->debugfs.dir)
 		return;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8025b294588b..156e42a003ae 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -816,8 +816,8 @@ struct ieee802_11_elems {
 	u8 *ext_supp_rates;
 	u8 *wmm_info;
 	u8 *wmm_param;
-	u8 *ht_cap_elem;
-	u8 *ht_info_elem;
+	struct ieee80211_ht_cap *ht_cap_elem;
+	struct ieee80211_ht_addt_info *ht_info_elem;
 	u8 *mesh_config;
 	u8 *mesh_id;
 	u8 *peer_link;
@@ -844,8 +844,6 @@ struct ieee802_11_elems {
 	u8 ext_supp_rates_len;
 	u8 wmm_info_len;
 	u8 wmm_param_len;
-	u8 ht_cap_elem_len;
-	u8 ht_info_elem_len;
 	u8 mesh_config_len;
 	u8 mesh_id_len;
 	u8 peer_link_len;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 49f86fa56bff..87665d7bb4f9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1348,10 +1348,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	    (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
 		struct ieee80211_ht_bss_info bss_info;
 		ieee80211_ht_cap_ie_to_ht_info(
-				(struct ieee80211_ht_cap *)
 				elems.ht_cap_elem, &sta->sta.ht_info);
 		ieee80211_ht_addt_info_ie_to_ht_bss_info(
-				(struct ieee80211_ht_addt_info *)
 				elems.ht_info_elem, &bss_info);
 		ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info);
 	}
@@ -1709,7 +1707,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 		struct ieee80211_ht_bss_info bss_info;
 
 		ieee80211_ht_addt_info_ie_to_ht_bss_info(
-				(struct ieee80211_ht_addt_info *)
 				elems.ht_info_elem, &bss_info);
 		changed |= ieee80211_handle_ht(local, 1, &conf->ht_conf,
 					       &bss_info);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 77e7b014872b..cf6b121e1bbf 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1379,6 +1379,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	return RX_QUEUED;
 }
 
+#ifdef CONFIG_MAC80211_MESH
 static ieee80211_rx_result
 ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 {
@@ -1453,7 +1454,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	else
 		return RX_DROP_MONITOR;
 }
-
+#endif
 
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
@@ -1780,8 +1781,10 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
 	/* must be after MMIC verify so header is counted in MPDU mic */
 	CALL_RXH(ieee80211_rx_h_remove_qos_control)
 	CALL_RXH(ieee80211_rx_h_amsdu)
+#ifdef CONFIG_MAC80211_MESH
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		CALL_RXH(ieee80211_rx_h_mesh_fwding);
+#endif
 	CALL_RXH(ieee80211_rx_h_data)
 	CALL_RXH(ieee80211_rx_h_ctrl)
 	CALL_RXH(ieee80211_rx_h_action)
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 8e6685e7ae85..416bb41099f3 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -388,7 +388,8 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 	bss = ieee80211_bss_info_update(sdata->local, rx_status,
 					mgmt, skb->len, &elems,
 					freq, beacon);
-	ieee80211_rx_bss_put(sdata->local, bss);
+	if (bss)
+		ieee80211_rx_bss_put(sdata->local, bss);
 
 	dev_kfree_skb(skb);
 	return RX_QUEUED;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 9b72d15bc8dc..7fef8ea1f5ec 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -635,7 +635,12 @@ static void sta_info_debugfs_add_work(struct work_struct *work)
 
 		spin_lock_irqsave(&local->sta_lock, flags);
 		list_for_each_entry(tmp, &local->sta_list, list) {
-			if (!tmp->debugfs.dir) {
+			/*
+			 * debugfs.add_has_run will be set by
+			 * ieee80211_sta_debugfs_add regardless
+			 * of what else it does.
+			 */
+			if (!tmp->debugfs.add_has_run) {
 				sta = tmp;
 				__sta_info_pin(sta);
 				break;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index a6b51862a89d..168a39a298bd 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -300,6 +300,7 @@ struct sta_info {
 		struct dentry *inactive_ms;
 		struct dentry *last_seq_ctrl;
 		struct dentry *agg_status;
+		bool add_has_run;
 	} debugfs;
 #endif
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index f32561ec224c..cee4884b9d06 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -529,12 +529,12 @@ void ieee802_11_parse_elems(u8 *start, size_t len,
 			elems->ext_supp_rates_len = elen;
 			break;
 		case WLAN_EID_HT_CAPABILITY:
-			elems->ht_cap_elem = pos;
-			elems->ht_cap_elem_len = elen;
+			if (elen >= sizeof(struct ieee80211_ht_cap))
+				elems->ht_cap_elem = (void *)pos;
 			break;
 		case WLAN_EID_HT_EXTRA_INFO:
-			elems->ht_info_elem = pos;
-			elems->ht_info_elem_len = elen;
+			if (elen >= sizeof(struct ieee80211_ht_addt_info))
+				elems->ht_info_elem = (void *)pos;
 			break;
 		case WLAN_EID_MESH_ID:
 			elems->mesh_id = pos;
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 7e0d53abde24..742f811ca416 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -775,7 +775,7 @@ static int ieee80211_ioctl_siwfrag(struct net_device *dev,
 	 * configure it here */
 
 	if (local->ops->set_frag_threshold)
-		local->ops->set_frag_threshold(
+		return local->ops->set_frag_threshold(
 			local_to_hw(local),
 			local->fragmentation_threshold);
 
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 78892cf2b021..25dcef9f2194 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -271,7 +271,6 @@ config NF_CONNTRACK_TFTP
 config NF_CT_NETLINK
 	tristate 'Connection tracking netlink interface'
 	select NETFILTER_NETLINK
-	depends on NF_NAT=n || NF_NAT
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option enables support for a netlink-based userspace interface
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 05048e403266..79a698052218 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -25,11 +25,13 @@ menuconfig IP_VS
 if IP_VS
 
 config	IP_VS_IPV6
-	bool "IPv6 support for IPVS (DANGEROUS)"
+	bool "IPv6 support for IPVS"
 	depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
 	---help---
 	  Add IPv6 support to IPVS. This is incomplete and might be dangerous.
 
+	  See http://www.mindbasket.com/ipvs for more information.
+
 	  Say N if unsure.
 
 config	IP_VS_DEBUG
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 03591d37b9cc..b92df5c1dfcf 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -115,7 +115,7 @@ int nf_conntrack_acct_init(struct net *net)
 
 	if (net_eq(net, &init_net)) {
 #ifdef CONFIG_NF_CT_ACCT
-		printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n");
+	printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n");
 		printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
 		printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
 #endif
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 27de3c7b006e..622d7c671cb7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -38,9 +38,16 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_nat.h>
 
 #define NF_CONNTRACK_VERSION	"0.5.0"
 
+unsigned int
+(*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
+				  enum nf_nat_manip_type manip,
+				  struct nlattr *attr) __read_mostly;
+EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
+
 DEFINE_SPINLOCK(nf_conntrack_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index cadfd15b44f6..a040d46f85d6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -689,71 +689,6 @@ ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple,
 	return 0;
 }
 
-#ifdef CONFIG_NF_NAT_NEEDED
-static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
-	[CTA_PROTONAT_PORT_MIN]	= { .type = NLA_U16 },
-	[CTA_PROTONAT_PORT_MAX]	= { .type = NLA_U16 },
-};
-
-static int nfnetlink_parse_nat_proto(struct nlattr *attr,
-				     const struct nf_conn *ct,
-				     struct nf_nat_range *range)
-{
-	struct nlattr *tb[CTA_PROTONAT_MAX+1];
-	const struct nf_nat_protocol *npt;
-	int err;
-
-	err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
-	if (err < 0)
-		return err;
-
-	npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
-	if (npt->nlattr_to_range)
-		err = npt->nlattr_to_range(tb, range);
-	nf_nat_proto_put(npt);
-	return err;
-}
-
-static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
-	[CTA_NAT_MINIP]		= { .type = NLA_U32 },
-	[CTA_NAT_MAXIP]		= { .type = NLA_U32 },
-};
-
-static inline int
-nfnetlink_parse_nat(struct nlattr *nat,
-		    const struct nf_conn *ct, struct nf_nat_range *range)
-{
-	struct nlattr *tb[CTA_NAT_MAX+1];
-	int err;
-
-	memset(range, 0, sizeof(*range));
-
-	err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
-	if (err < 0)
-		return err;
-
-	if (tb[CTA_NAT_MINIP])
-		range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
-
-	if (!tb[CTA_NAT_MAXIP])
-		range->max_ip = range->min_ip;
-	else
-		range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
-
-	if (range->min_ip)
-		range->flags |= IP_NAT_RANGE_MAP_IPS;
-
-	if (!tb[CTA_NAT_PROTO])
-		return 0;
-
-	err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-#endif
-
 static inline int
 ctnetlink_parse_help(struct nlattr *attr, char **helper_name)
 {
@@ -878,6 +813,36 @@ out:
 	return err;
 }
 
+#ifdef CONFIG_NF_NAT_NEEDED
+static int
+ctnetlink_parse_nat_setup(struct nf_conn *ct,
+			  enum nf_nat_manip_type manip,
+			  struct nlattr *attr)
+{
+	typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup;
+
+	parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook);
+	if (!parse_nat_setup) {
+#ifdef CONFIG_MODULES
+		rcu_read_unlock();
+		nfnl_unlock();
+		if (request_module("nf-nat-ipv4") < 0) {
+			nfnl_lock();
+			rcu_read_lock();
+			return -EOPNOTSUPP;
+		}
+		nfnl_lock();
+		rcu_read_lock();
+		if (nfnetlink_parse_nat_setup_hook)
+			return -EAGAIN;
+#endif
+		return -EOPNOTSUPP;
+	}
+
+	return parse_nat_setup(ct, manip, attr);
+}
+#endif
+
 static int
 ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
 {
@@ -897,31 +862,6 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
 		/* ASSURED bit can only be set */
 		return -EBUSY;
 
-	if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
-#ifndef CONFIG_NF_NAT_NEEDED
-		return -EOPNOTSUPP;
-#else
-		struct nf_nat_range range;
-
-		if (cda[CTA_NAT_DST]) {
-			if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct,
-						&range) < 0)
-				return -EINVAL;
-			if (nf_nat_initialized(ct, IP_NAT_MANIP_DST))
-				return -EEXIST;
-			nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
-		}
-		if (cda[CTA_NAT_SRC]) {
-			if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct,
-						&range) < 0)
-				return -EINVAL;
-			if (nf_nat_initialized(ct, IP_NAT_MANIP_SRC))
-				return -EEXIST;
-			nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
-		}
-#endif
-	}
-
 	/* Be careful here, modifying NAT bits can screw up things,
 	 * so don't let users modify them directly if they don't pass
 	 * nf_nat_range. */
@@ -929,6 +869,31 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
 	return 0;
 }
 
+static int
+ctnetlink_change_nat(struct nf_conn *ct, struct nlattr *cda[])
+{
+#ifdef CONFIG_NF_NAT_NEEDED
+	int ret;
+
+	if (cda[CTA_NAT_DST]) {
+		ret = ctnetlink_parse_nat_setup(ct,
+						IP_NAT_MANIP_DST,
+						cda[CTA_NAT_DST]);
+		if (ret < 0)
+			return ret;
+	}
+	if (cda[CTA_NAT_SRC]) {
+		ret = ctnetlink_parse_nat_setup(ct,
+						IP_NAT_MANIP_SRC,
+						cda[CTA_NAT_SRC]);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+#else
+	return -EOPNOTSUPP;
+#endif
+}
 
 static inline int
 ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
@@ -1157,6 +1122,14 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 		}
 	}
 
+	if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
+		err = ctnetlink_change_nat(ct, cda);
+		if (err < 0) {
+			rcu_read_unlock();
+			goto err;
+		}
+	}
+
 	if (cda[CTA_PROTOINFO]) {
 		err = ctnetlink_change_protoinfo(ct, cda);
 		if (err < 0) {
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 373e51e91ce5..1bc3001d1827 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -65,7 +65,7 @@ void
 			     struct nf_conntrack_expect *exp) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
 
-#ifdef DEBUG
+#if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
 /* PptpControlMessageType names */
 const char *const pptp_msg_name[] = {
 	"UNKNOWN_MESSAGE",
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index fe34f4bf74cc..cdc97f3105a3 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -10,7 +10,6 @@
  *
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 
 #include <linux/net.h>
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index b75c9c4a995d..9c0ba17a1ddb 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -44,15 +44,17 @@ static struct sock *nfnl = NULL;
 static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
 static DEFINE_MUTEX(nfnl_mutex);
 
-static inline void nfnl_lock(void)
+void nfnl_lock(void)
 {
 	mutex_lock(&nfnl_mutex);
 }
+EXPORT_SYMBOL_GPL(nfnl_lock);
 
-static inline void nfnl_unlock(void)
+void nfnl_unlock(void)
 {
 	mutex_unlock(&nfnl_mutex);
 }
+EXPORT_SYMBOL_GPL(nfnl_unlock);
 
 int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
 {
@@ -132,9 +134,10 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return 0;
 
 	type = nlh->nlmsg_type;
+replay:
 	ss = nfnetlink_get_subsys(type);
 	if (!ss) {
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 		nfnl_unlock();
 		request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
 		nfnl_lock();
@@ -165,7 +168,10 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		} else
 			return -EINVAL;
 
-		return nc->call(nfnl, skb, nlh, cda);
+		err = nc->call(nfnl, skb, nlh, cda);
+		if (err == -EAGAIN)
+			goto replay;
+		return err;
 	}
 }
 
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 2cc1fff49307..f9977b3311f7 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -48,7 +48,7 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "NFQUEUE",
-		.family		= NF_ARP,
+		.family		= NFPROTO_ARP,
 		.target		= nfqueue_tg,
 		.targetsize	= sizeof(struct xt_NFQ_info),
 		.me		= THIS_MODULE,
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 6f62c36948d9..7ac54eab0b00 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -61,7 +61,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (info->flags & IPRANGE_SRC) {
 		m  = ntohl(iph->saddr) < ntohl(info->src_min.ip);
 		m |= ntohl(iph->saddr) > ntohl(info->src_max.ip);
-		m ^= info->flags & IPRANGE_SRC_INV;
+		m ^= !!(info->flags & IPRANGE_SRC_INV);
 		if (m) {
 			pr_debug("src IP " NIPQUAD_FMT " NOT in range %s"
 			         NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
@@ -75,7 +75,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (info->flags & IPRANGE_DST) {
 		m  = ntohl(iph->daddr) < ntohl(info->dst_min.ip);
 		m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip);
-		m ^= info->flags & IPRANGE_DST_INV;
+		m ^= !!(info->flags & IPRANGE_DST_INV);
 		if (m) {
 			pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s"
 			         NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
@@ -114,14 +114,14 @@ iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (info->flags & IPRANGE_SRC) {
 		m  = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
 		m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
-		m ^= info->flags & IPRANGE_SRC_INV;
+		m ^= !!(info->flags & IPRANGE_SRC_INV);
 		if (m)
 			return false;
 	}
 	if (info->flags & IPRANGE_DST) {
 		m  = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
 		m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
-		m ^= info->flags & IPRANGE_DST_INV;
+		m ^= !!(info->flags & IPRANGE_DST_INV);
 		if (m)
 			return false;
 	}
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 4ebd4ca9a991..280c471bcdf4 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -318,15 +318,15 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	for (i = 0; i < ip_list_hash_size; i++)
 		INIT_LIST_HEAD(&t->iphash[i]);
 #ifdef CONFIG_PROC_FS
-	t->proc = proc_create(t->name, ip_list_perms, recent_proc_dir,
-		  &recent_mt_fops);
+	t->proc = proc_create_data(t->name, ip_list_perms, recent_proc_dir,
+		  &recent_mt_fops, t);
 	if (t->proc == NULL) {
 		kfree(t);
 		goto out;
 	}
 #ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	t->proc_old = proc_create(t->name, ip_list_perms, proc_old_dir,
-		      &recent_old_fops);
+	t->proc_old = proc_create_data(t->name, ip_list_perms, proc_old_dir,
+		      &recent_old_fops, t);
 	if (t->proc_old == NULL) {
 		remove_proc_entry(t->name, proc_old_dir);
 		kfree(t);
@@ -334,11 +334,9 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	}
 	t->proc_old->uid   = ip_list_uid;
 	t->proc_old->gid   = ip_list_gid;
-	t->proc_old->data  = t;
 #endif
 	t->proc->uid       = ip_list_uid;
 	t->proc->gid       = ip_list_gid;
-	t->proc->data      = t;
 #endif
 	spin_lock_bh(&recent_lock);
 	list_add_tail(&t->list, &tables);
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
index 8af18c0a47d9..ea750e9df65f 100644
--- a/net/netlabel/Makefile
+++ b/net/netlabel/Makefile
@@ -5,7 +5,8 @@
 #
 
 # base objects
-obj-y	:= netlabel_user.o netlabel_kapi.o netlabel_domainhash.o
+obj-y	:= netlabel_user.o netlabel_kapi.o
+obj-y	+= netlabel_domainhash.o netlabel_addrlist.o
 
 # management objects
 obj-y	+= netlabel_mgmt.o
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
new file mode 100644
index 000000000000..b0925a303353
--- /dev/null
+++ b/net/netlabel/netlabel_addrlist.c
@@ -0,0 +1,388 @@
+/*
+ * NetLabel Network Address Lists
+ *
+ * This file contains network address list functions used to manage ordered
+ * lists of network addresses for use by the NetLabel subsystem.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2008
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <linux/audit.h>
+
+#include "netlabel_addrlist.h"
+
+/*
+ * Address List Functions
+ */
+
+/**
+ * netlbl_af4list_search - Search for a matching IPv4 address entry
+ * @addr: IPv4 address
+ * @head: the list head
+ *
+ * Description:
+ * Searches the IPv4 address list given by @head.  If a matching address entry
+ * is found it is returned, otherwise NULL is returned.  The caller is
+ * responsible for calling the rcu_read_[un]lock() functions.
+ *
+ */
+struct netlbl_af4list *netlbl_af4list_search(__be32 addr,
+					     struct list_head *head)
+{
+	struct netlbl_af4list *iter;
+
+	list_for_each_entry_rcu(iter, head, list)
+		if (iter->valid && (addr & iter->mask) == iter->addr)
+			return iter;
+
+	return NULL;
+}
+
+/**
+ * netlbl_af4list_search_exact - Search for an exact IPv4 address entry
+ * @addr: IPv4 address
+ * @mask: IPv4 address mask
+ * @head: the list head
+ *
+ * Description:
+ * Searches the IPv4 address list given by @head.  If an exact match if found
+ * it is returned, otherwise NULL is returned.  The caller is responsible for
+ * calling the rcu_read_[un]lock() functions.
+ *
+ */
+struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr,
+						   __be32 mask,
+						   struct list_head *head)
+{
+	struct netlbl_af4list *iter;
+
+	list_for_each_entry_rcu(iter, head, list)
+		if (iter->valid && iter->addr == addr && iter->mask == mask)
+			return iter;
+
+	return NULL;
+}
+
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_af6list_search - Search for a matching IPv6 address entry
+ * @addr: IPv6 address
+ * @head: the list head
+ *
+ * Description:
+ * Searches the IPv6 address list given by @head.  If a matching address entry
+ * is found it is returned, otherwise NULL is returned.  The caller is
+ * responsible for calling the rcu_read_[un]lock() functions.
+ *
+ */
+struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr,
+					     struct list_head *head)
+{
+	struct netlbl_af6list *iter;
+
+	list_for_each_entry_rcu(iter, head, list)
+		if (iter->valid &&
+		    ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0)
+			return iter;
+
+	return NULL;
+}
+
+/**
+ * netlbl_af6list_search_exact - Search for an exact IPv6 address entry
+ * @addr: IPv6 address
+ * @mask: IPv6 address mask
+ * @head: the list head
+ *
+ * Description:
+ * Searches the IPv6 address list given by @head.  If an exact match if found
+ * it is returned, otherwise NULL is returned.  The caller is responsible for
+ * calling the rcu_read_[un]lock() functions.
+ *
+ */
+struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr,
+						   const struct in6_addr *mask,
+						   struct list_head *head)
+{
+	struct netlbl_af6list *iter;
+
+	list_for_each_entry_rcu(iter, head, list)
+		if (iter->valid &&
+		    ipv6_addr_equal(&iter->addr, addr) &&
+		    ipv6_addr_equal(&iter->mask, mask))
+			return iter;
+
+	return NULL;
+}
+#endif /* IPv6 */
+
+/**
+ * netlbl_af4list_add - Add a new IPv4 address entry to a list
+ * @entry: address entry
+ * @head: the list head
+ *
+ * Description:
+ * Add a new address entry to the list pointed to by @head.  On success zero is
+ * returned, otherwise a negative value is returned.  The caller is responsible
+ * for calling the necessary locking functions.
+ *
+ */
+int netlbl_af4list_add(struct netlbl_af4list *entry, struct list_head *head)
+{
+	struct netlbl_af4list *iter;
+
+	iter = netlbl_af4list_search(entry->addr, head);
+	if (iter != NULL &&
+	    iter->addr == entry->addr && iter->mask == entry->mask)
+		return -EEXIST;
+
+	/* in order to speed up address searches through the list (the common
+	 * case) we need to keep the list in order based on the size of the
+	 * address mask such that the entry with the widest mask (smallest
+	 * numerical value) appears first in the list */
+	list_for_each_entry_rcu(iter, head, list)
+		if (iter->valid &&
+		    ntohl(entry->mask) > ntohl(iter->mask)) {
+			__list_add_rcu(&entry->list,
+				       iter->list.prev,
+				       &iter->list);
+			return 0;
+		}
+	list_add_tail_rcu(&entry->list, head);
+	return 0;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_af6list_add - Add a new IPv6 address entry to a list
+ * @entry: address entry
+ * @head: the list head
+ *
+ * Description:
+ * Add a new address entry to the list pointed to by @head.  On success zero is
+ * returned, otherwise a negative value is returned.  The caller is responsible
+ * for calling the necessary locking functions.
+ *
+ */
+int netlbl_af6list_add(struct netlbl_af6list *entry, struct list_head *head)
+{
+	struct netlbl_af6list *iter;
+
+	iter = netlbl_af6list_search(&entry->addr, head);
+	if (iter != NULL &&
+	    ipv6_addr_equal(&iter->addr, &entry->addr) &&
+	    ipv6_addr_equal(&iter->mask, &entry->mask))
+		return -EEXIST;
+
+	/* in order to speed up address searches through the list (the common
+	 * case) we need to keep the list in order based on the size of the
+	 * address mask such that the entry with the widest mask (smallest
+	 * numerical value) appears first in the list */
+	list_for_each_entry_rcu(iter, head, list)
+		if (iter->valid &&
+		    ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) {
+			__list_add_rcu(&entry->list,
+				       iter->list.prev,
+				       &iter->list);
+			return 0;
+		}
+	list_add_tail_rcu(&entry->list, head);
+	return 0;
+}
+#endif /* IPv6 */
+
+/**
+ * netlbl_af4list_remove_entry - Remove an IPv4 address entry
+ * @entry: address entry
+ *
+ * Description:
+ * Remove the specified IP address entry.  The caller is responsible for
+ * calling the necessary locking functions.
+ *
+ */
+void netlbl_af4list_remove_entry(struct netlbl_af4list *entry)
+{
+	entry->valid = 0;
+	list_del_rcu(&entry->list);
+}
+
+/**
+ * netlbl_af4list_remove - Remove an IPv4 address entry
+ * @addr: IP address
+ * @mask: IP address mask
+ * @head: the list head
+ *
+ * Description:
+ * Remove an IP address entry from the list pointed to by @head.  Returns the
+ * entry on success, NULL on failure.  The caller is responsible for calling
+ * the necessary locking functions.
+ *
+ */
+struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
+					     struct list_head *head)
+{
+	struct netlbl_af4list *entry;
+
+	entry = netlbl_af4list_search(addr, head);
+	if (entry != NULL && entry->addr == addr && entry->mask == mask) {
+		netlbl_af4list_remove_entry(entry);
+		return entry;
+	}
+
+	return NULL;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_af6list_remove_entry - Remove an IPv6 address entry
+ * @entry: address entry
+ *
+ * Description:
+ * Remove the specified IP address entry.  The caller is responsible for
+ * calling the necessary locking functions.
+ *
+ */
+void netlbl_af6list_remove_entry(struct netlbl_af6list *entry)
+{
+	entry->valid = 0;
+	list_del_rcu(&entry->list);
+}
+
+/**
+ * netlbl_af6list_remove - Remove an IPv6 address entry
+ * @addr: IP address
+ * @mask: IP address mask
+ * @head: the list head
+ *
+ * Description:
+ * Remove an IP address entry from the list pointed to by @head.  Returns the
+ * entry on success, NULL on failure.  The caller is responsible for calling
+ * the necessary locking functions.
+ *
+ */
+struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
+					     const struct in6_addr *mask,
+					     struct list_head *head)
+{
+	struct netlbl_af6list *entry;
+
+	entry = netlbl_af6list_search(addr, head);
+	if (entry != NULL &&
+	    ipv6_addr_equal(&entry->addr, addr) &&
+	    ipv6_addr_equal(&entry->mask, mask)) {
+		netlbl_af6list_remove_entry(entry);
+		return entry;
+	}
+
+	return NULL;
+}
+#endif /* IPv6 */
+
+/*
+ * Audit Helper Functions
+ */
+
+/**
+ * netlbl_af4list_audit_addr - Audit an IPv4 address
+ * @audit_buf: audit buffer
+ * @src: true if source address, false if destination
+ * @dev: network interface
+ * @addr: IP address
+ * @mask: IP address mask
+ *
+ * Description:
+ * Write the IPv4 address and address mask, if necessary, to @audit_buf.
+ *
+ */
+void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
+					int src, const char *dev,
+					__be32 addr, __be32 mask)
+{
+	u32 mask_val = ntohl(mask);
+	char *dir = (src ? "src" : "dst");
+
+	if (dev != NULL)
+		audit_log_format(audit_buf, " netif=%s", dev);
+	audit_log_format(audit_buf, " %s=" NIPQUAD_FMT, dir, NIPQUAD(addr));
+	if (mask_val != 0xffffffff) {
+		u32 mask_len = 0;
+		while (mask_val > 0) {
+			mask_val <<= 1;
+			mask_len++;
+		}
+		audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len);
+	}
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_af6list_audit_addr - Audit an IPv6 address
+ * @audit_buf: audit buffer
+ * @src: true if source address, false if destination
+ * @dev: network interface
+ * @addr: IP address
+ * @mask: IP address mask
+ *
+ * Description:
+ * Write the IPv6 address and address mask, if necessary, to @audit_buf.
+ *
+ */
+void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
+				 int src,
+				 const char *dev,
+				 const struct in6_addr *addr,
+				 const struct in6_addr *mask)
+{
+	char *dir = (src ? "src" : "dst");
+
+	if (dev != NULL)
+		audit_log_format(audit_buf, " netif=%s", dev);
+	audit_log_format(audit_buf, " %s=" NIP6_FMT, dir, NIP6(*addr));
+	if (ntohl(mask->s6_addr32[3]) != 0xffffffff) {
+		u32 mask_len = 0;
+		u32 mask_val;
+		int iter = -1;
+		while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff)
+			mask_len += 32;
+		mask_val = ntohl(mask->s6_addr32[iter]);
+		while (mask_val > 0) {
+			mask_val <<= 1;
+			mask_len++;
+		}
+		audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len);
+	}
+}
+#endif /* IPv6 */
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
new file mode 100644
index 000000000000..0242bead405f
--- /dev/null
+++ b/net/netlabel/netlabel_addrlist.h
@@ -0,0 +1,189 @@
+/*
+ * NetLabel Network Address Lists
+ *
+ * This file contains network address list functions used to manage ordered
+ * lists of network addresses for use by the NetLabel subsystem.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2008
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_ADDRLIST_H
+#define _NETLABEL_ADDRLIST_H
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/in6.h>
+#include <linux/audit.h>
+
+/**
+ * struct netlbl_af4list - NetLabel IPv4 address list
+ * @addr: IPv4 address
+ * @mask: IPv4 address mask
+ * @valid: valid flag
+ * @list: list structure, used internally
+ */
+struct netlbl_af4list {
+	__be32 addr;
+	__be32 mask;
+
+	u32 valid;
+	struct list_head list;
+};
+
+/**
+ * struct netlbl_af6list - NetLabel IPv6 address list
+ * @addr: IPv6 address
+ * @mask: IPv6 address mask
+ * @valid: valid flag
+ * @list: list structure, used internally
+ */
+struct netlbl_af6list {
+	struct in6_addr addr;
+	struct in6_addr mask;
+
+	u32 valid;
+	struct list_head list;
+};
+
+#define __af4list_entry(ptr) container_of(ptr, struct netlbl_af4list, list)
+
+static inline struct netlbl_af4list *__af4list_valid(struct list_head *s,
+						     struct list_head *h)
+{
+	struct list_head *i = s;
+	struct netlbl_af4list *n = __af4list_entry(s);
+	while (i != h && !n->valid) {
+		i = i->next;
+		n = __af4list_entry(i);
+	}
+	return n;
+}
+
+static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
+							 struct list_head *h)
+{
+	struct list_head *i = s;
+	struct netlbl_af4list *n = __af4list_entry(s);
+	while (i != h && !n->valid) {
+		i = rcu_dereference(i->next);
+		n = __af4list_entry(i);
+	}
+	return n;
+}
+
+#define netlbl_af4list_foreach(iter, head)				\
+	for (iter = __af4list_valid((head)->next, head);		\
+	     prefetch(iter->list.next), &iter->list != (head);		\
+	     iter = __af4list_valid(iter->list.next, head))
+
+#define netlbl_af4list_foreach_rcu(iter, head)				\
+	for (iter = __af4list_valid_rcu((head)->next, head);		\
+	     prefetch(iter->list.next),	&iter->list != (head);		\
+	     iter = __af4list_valid_rcu(iter->list.next, head))
+
+#define netlbl_af4list_foreach_safe(iter, tmp, head)			\
+	for (iter = __af4list_valid((head)->next, head),		\
+		     tmp = __af4list_valid(iter->list.next, head);	\
+	     &iter->list != (head);					\
+	     iter = tmp, tmp = __af4list_valid(iter->list.next, head))
+
+int netlbl_af4list_add(struct netlbl_af4list *entry,
+		       struct list_head *head);
+struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
+					     struct list_head *head);
+void netlbl_af4list_remove_entry(struct netlbl_af4list *entry);
+struct netlbl_af4list *netlbl_af4list_search(__be32 addr,
+					     struct list_head *head);
+struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr,
+						   __be32 mask,
+						   struct list_head *head);
+void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
+			       int src, const char *dev,
+			       __be32 addr, __be32 mask);
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
+#define __af6list_entry(ptr) container_of(ptr, struct netlbl_af6list, list)
+
+static inline struct netlbl_af6list *__af6list_valid(struct list_head *s,
+						     struct list_head *h)
+{
+	struct list_head *i = s;
+	struct netlbl_af6list *n = __af6list_entry(s);
+	while (i != h && !n->valid) {
+		i = i->next;
+		n = __af6list_entry(i);
+	}
+	return n;
+}
+
+static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
+							 struct list_head *h)
+{
+	struct list_head *i = s;
+	struct netlbl_af6list *n = __af6list_entry(s);
+	while (i != h && !n->valid) {
+		i = rcu_dereference(i->next);
+		n = __af6list_entry(i);
+	}
+	return n;
+}
+
+#define netlbl_af6list_foreach(iter, head)				\
+	for (iter = __af6list_valid((head)->next, head);		\
+	     prefetch(iter->list.next),	&iter->list != (head);		\
+	     iter = __af6list_valid(iter->list.next, head))
+
+#define netlbl_af6list_foreach_rcu(iter, head)				\
+	for (iter = __af6list_valid_rcu((head)->next, head);		\
+	     prefetch(iter->list.next),	&iter->list != (head);		\
+	     iter = __af6list_valid_rcu(iter->list.next, head))
+
+#define netlbl_af6list_foreach_safe(iter, tmp, head)			\
+	for (iter = __af6list_valid((head)->next, head),		\
+		     tmp = __af6list_valid(iter->list.next, head);	\
+	     &iter->list != (head);					\
+	     iter = tmp, tmp = __af6list_valid(iter->list.next, head))
+
+int netlbl_af6list_add(struct netlbl_af6list *entry,
+		       struct list_head *head);
+struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
+					     const struct in6_addr *mask,
+					     struct list_head *head);
+void netlbl_af6list_remove_entry(struct netlbl_af6list *entry);
+struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr,
+					     struct list_head *head);
+struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr,
+						   const struct in6_addr *mask,
+						   struct list_head *head);
+void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
+			       int src,
+			       const char *dev,
+			       const struct in6_addr *addr,
+			       const struct in6_addr *mask);
+#endif /* IPV6 */
+
+#endif
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 0aec318bf0ef..fff32b70efa9 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -43,6 +43,7 @@
 #include "netlabel_user.h"
 #include "netlabel_cipso_v4.h"
 #include "netlabel_mgmt.h"
+#include "netlabel_domainhash.h"
 
 /* Argument struct for cipso_v4_doi_walk() */
 struct netlbl_cipsov4_doiwalk_arg {
@@ -51,6 +52,12 @@ struct netlbl_cipsov4_doiwalk_arg {
 	u32 seq;
 };
 
+/* Argument struct for netlbl_domhsh_walk() */
+struct netlbl_domhsh_walk_arg {
+	struct netlbl_audit *audit_info;
+	u32 doi;
+};
+
 /* NetLabel Generic NETLINK CIPSOv4 family */
 static struct genl_family netlbl_cipsov4_gnl_family = {
 	.id = GENL_ID_GENERATE,
@@ -81,32 +88,6 @@ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1
  */
 
 /**
- * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition
- * @entry: the entry's RCU field
- *
- * Description:
- * This function is designed to be used as a callback to the call_rcu()
- * function so that the memory allocated to the DOI definition can be released
- * safely.
- *
- */
-void netlbl_cipsov4_doi_free(struct rcu_head *entry)
-{
-	struct cipso_v4_doi *ptr;
-
-	ptr = container_of(entry, struct cipso_v4_doi, rcu);
-	switch (ptr->type) {
-	case CIPSO_V4_MAP_STD:
-		kfree(ptr->map.std->lvl.cipso);
-		kfree(ptr->map.std->lvl.local);
-		kfree(ptr->map.std->cat.cipso);
-		kfree(ptr->map.std->cat.local);
-		break;
-	}
-	kfree(ptr);
-}
-
-/**
  * netlbl_cipsov4_add_common - Parse the common sections of a ADD message
  * @info: the Generic NETLINK info block
  * @doi_def: the CIPSO V4 DOI definition
@@ -151,9 +132,9 @@ static int netlbl_cipsov4_add_common(struct genl_info *info,
  * @info: the Generic NETLINK info block
  *
  * Description:
- * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message
- * and add it to the CIPSO V4 engine.  Return zero on success and non-zero on
- * error.
+ * Create a new CIPSO_V4_MAP_TRANS DOI definition based on the given ADD
+ * message and add it to the CIPSO V4 engine.  Return zero on success and
+ * non-zero on error.
  *
  */
 static int netlbl_cipsov4_add_std(struct genl_info *info)
@@ -183,7 +164,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info)
 		ret_val = -ENOMEM;
 		goto add_std_failure;
 	}
-	doi_def->type = CIPSO_V4_MAP_STD;
+	doi_def->type = CIPSO_V4_MAP_TRANS;
 
 	ret_val = netlbl_cipsov4_add_common(info, doi_def);
 	if (ret_val != 0)
@@ -342,7 +323,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info)
 
 add_std_failure:
 	if (doi_def)
-		netlbl_cipsov4_doi_free(&doi_def->rcu);
+		cipso_v4_doi_free(doi_def);
 	return ret_val;
 }
 
@@ -379,7 +360,44 @@ static int netlbl_cipsov4_add_pass(struct genl_info *info)
 	return 0;
 
 add_pass_failure:
-	netlbl_cipsov4_doi_free(&doi_def->rcu);
+	cipso_v4_doi_free(doi_def);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_add_local - Adds a CIPSO V4 DOI definition
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Create a new CIPSO_V4_MAP_LOCAL DOI definition based on the given ADD
+ * message and add it to the CIPSO V4 engine.  Return zero on success and
+ * non-zero on error.
+ *
+ */
+static int netlbl_cipsov4_add_local(struct genl_info *info)
+{
+	int ret_val;
+	struct cipso_v4_doi *doi_def = NULL;
+
+	if (!info->attrs[NLBL_CIPSOV4_A_TAGLST])
+		return -EINVAL;
+
+	doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
+	if (doi_def == NULL)
+		return -ENOMEM;
+	doi_def->type = CIPSO_V4_MAP_LOCAL;
+
+	ret_val = netlbl_cipsov4_add_common(info, doi_def);
+	if (ret_val != 0)
+		goto add_local_failure;
+
+	ret_val = cipso_v4_doi_add(doi_def);
+	if (ret_val != 0)
+		goto add_local_failure;
+	return 0;
+
+add_local_failure:
+	cipso_v4_doi_free(doi_def);
 	return ret_val;
 }
 
@@ -412,14 +430,18 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
 
 	type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]);
 	switch (type) {
-	case CIPSO_V4_MAP_STD:
-		type_str = "std";
+	case CIPSO_V4_MAP_TRANS:
+		type_str = "trans";
 		ret_val = netlbl_cipsov4_add_std(info);
 		break;
 	case CIPSO_V4_MAP_PASS:
 		type_str = "pass";
 		ret_val = netlbl_cipsov4_add_pass(info);
 		break;
+	case CIPSO_V4_MAP_LOCAL:
+		type_str = "local";
+		ret_val = netlbl_cipsov4_add_local(info);
+		break;
 	}
 	if (ret_val == 0)
 		atomic_inc(&netlabel_mgmt_protocount);
@@ -491,7 +513,7 @@ list_start:
 	doi_def = cipso_v4_doi_getdef(doi);
 	if (doi_def == NULL) {
 		ret_val = -EINVAL;
-		goto list_failure;
+		goto list_failure_lock;
 	}
 
 	ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type);
@@ -516,7 +538,7 @@ list_start:
 	nla_nest_end(ans_skb, nla_a);
 
 	switch (doi_def->type) {
-	case CIPSO_V4_MAP_STD:
+	case CIPSO_V4_MAP_TRANS:
 		nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST);
 		if (nla_a == NULL) {
 			ret_val = -ENOMEM;
@@ -655,7 +677,7 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb,
 				  struct netlink_callback *cb)
 {
 	struct netlbl_cipsov4_doiwalk_arg cb_arg;
-	int doi_skip = cb->args[0];
+	u32 doi_skip = cb->args[0];
 
 	cb_arg.nl_cb = cb;
 	cb_arg.skb = skb;
@@ -668,6 +690,29 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb,
 }
 
 /**
+ * netlbl_cipsov4_remove_cb - netlbl_cipsov4_remove() callback for REMOVE
+ * @entry: LSM domain mapping entry
+ * @arg: the netlbl_domhsh_walk_arg structure
+ *
+ * Description:
+ * This function is intended for use by netlbl_cipsov4_remove() as the callback
+ * for the netlbl_domhsh_walk() function; it removes LSM domain map entries
+ * which are associated with the CIPSO DOI specified in @arg.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg)
+{
+	struct netlbl_domhsh_walk_arg *cb_arg = arg;
+
+	if (entry->type == NETLBL_NLTYPE_CIPSOV4 &&
+	    entry->type_def.cipsov4->doi == cb_arg->doi)
+		return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info);
+
+	return 0;
+}
+
+/**
  * netlbl_cipsov4_remove - Handle a REMOVE message
  * @skb: the NETLINK buffer
  * @info: the Generic NETLINK info block
@@ -681,8 +726,11 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
 {
 	int ret_val = -EINVAL;
 	u32 doi = 0;
+	struct netlbl_domhsh_walk_arg cb_arg;
 	struct audit_buffer *audit_buf;
 	struct netlbl_audit audit_info;
+	u32 skip_bkt = 0;
+	u32 skip_chain = 0;
 
 	if (!info->attrs[NLBL_CIPSOV4_A_DOI])
 		return -EINVAL;
@@ -690,11 +738,15 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
 	doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
 	netlbl_netlink_auditinfo(skb, &audit_info);
 
-	ret_val = cipso_v4_doi_remove(doi,
-				      &audit_info,
-				      netlbl_cipsov4_doi_free);
-	if (ret_val == 0)
-		atomic_dec(&netlabel_mgmt_protocount);
+	cb_arg.doi = doi;
+	cb_arg.audit_info = &audit_info;
+	ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain,
+				     netlbl_cipsov4_remove_cb, &cb_arg);
+	if (ret_val == 0 || ret_val == -ENOENT) {
+		ret_val = cipso_v4_doi_remove(doi, &audit_info);
+		if (ret_val == 0)
+			atomic_dec(&netlabel_mgmt_protocount);
+	}
 
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL,
 					      &audit_info);
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index 220cb9d06b49..c8a4079261f0 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -45,12 +45,13 @@
  *     NLBL_CIPSOV4_A_MTYPE
  *     NLBL_CIPSOV4_A_TAGLST
  *
- *   If using CIPSO_V4_MAP_STD the following attributes are required:
+ *   If using CIPSO_V4_MAP_TRANS the following attributes are required:
  *
  *     NLBL_CIPSOV4_A_MLSLVLLST
  *     NLBL_CIPSOV4_A_MLSCATLST
  *
- *   If using CIPSO_V4_MAP_PASS no additional attributes are required.
+ *   If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes
+ *   are required.
  *
  * o REMOVE:
  *   Sent by an application to remove a specific DOI mapping table from the
@@ -76,12 +77,13 @@
  *     NLBL_CIPSOV4_A_MTYPE
  *     NLBL_CIPSOV4_A_TAGLST
  *
- *   If using CIPSO_V4_MAP_STD the following attributes are required:
+ *   If using CIPSO_V4_MAP_TRANS the following attributes are required:
  *
  *     NLBL_CIPSOV4_A_MLSLVLLST
  *     NLBL_CIPSOV4_A_MLSCATLST
  *
- *   If using CIPSO_V4_MAP_PASS no additional attributes are required.
+ *   If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes
+ *   are required.
  *
  * o LISTALL:
  *   This message is sent by an application to list the valid DOIs on the
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 643c032a3a57..5fadf10e5ddf 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -11,7 +11,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -40,6 +40,7 @@
 #include <asm/bug.h>
 
 #include "netlabel_mgmt.h"
+#include "netlabel_addrlist.h"
 #include "netlabel_domainhash.h"
 #include "netlabel_user.h"
 
@@ -72,8 +73,28 @@ static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
 static void netlbl_domhsh_free_entry(struct rcu_head *entry)
 {
 	struct netlbl_dom_map *ptr;
+	struct netlbl_af4list *iter4;
+	struct netlbl_af4list *tmp4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netlbl_af6list *iter6;
+	struct netlbl_af6list *tmp6;
+#endif /* IPv6 */
 
 	ptr = container_of(entry, struct netlbl_dom_map, rcu);
+	if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) {
+		netlbl_af4list_foreach_safe(iter4, tmp4,
+					    &ptr->type_def.addrsel->list4) {
+			netlbl_af4list_remove_entry(iter4);
+			kfree(netlbl_domhsh_addr4_entry(iter4));
+		}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		netlbl_af6list_foreach_safe(iter6, tmp6,
+					    &ptr->type_def.addrsel->list6) {
+			netlbl_af6list_remove_entry(iter6);
+			kfree(netlbl_domhsh_addr6_entry(iter6));
+		}
+#endif /* IPv6 */
+	}
 	kfree(ptr->domain);
 	kfree(ptr);
 }
@@ -115,13 +136,13 @@ static u32 netlbl_domhsh_hash(const char *key)
 static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain)
 {
 	u32 bkt;
+	struct list_head *bkt_list;
 	struct netlbl_dom_map *iter;
 
 	if (domain != NULL) {
 		bkt = netlbl_domhsh_hash(domain);
-		list_for_each_entry_rcu(iter,
-				     &rcu_dereference(netlbl_domhsh)->tbl[bkt],
-				     list)
+		bkt_list = &rcu_dereference(netlbl_domhsh)->tbl[bkt];
+		list_for_each_entry_rcu(iter, bkt_list, list)
 			if (iter->valid && strcmp(iter->domain, domain) == 0)
 				return iter;
 	}
@@ -156,6 +177,69 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain)
 	return entry;
 }
 
+/**
+ * netlbl_domhsh_audit_add - Generate an audit entry for an add event
+ * @entry: the entry being added
+ * @addr4: the IPv4 address information
+ * @addr6: the IPv6 address information
+ * @result: the result code
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Generate an audit record for adding a new NetLabel/LSM mapping entry with
+ * the given information.  Caller is responsibile for holding the necessary
+ * locks.
+ *
+ */
+static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry,
+				    struct netlbl_af4list *addr4,
+				    struct netlbl_af6list *addr6,
+				    int result,
+				    struct netlbl_audit *audit_info)
+{
+	struct audit_buffer *audit_buf;
+	struct cipso_v4_doi *cipsov4 = NULL;
+	u32 type;
+
+	audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info);
+	if (audit_buf != NULL) {
+		audit_log_format(audit_buf, " nlbl_domain=%s",
+				 entry->domain ? entry->domain : "(default)");
+		if (addr4 != NULL) {
+			struct netlbl_domaddr4_map *map4;
+			map4 = netlbl_domhsh_addr4_entry(addr4);
+			type = map4->type;
+			cipsov4 = map4->type_def.cipsov4;
+			netlbl_af4list_audit_addr(audit_buf, 0, NULL,
+						  addr4->addr, addr4->mask);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		} else if (addr6 != NULL) {
+			struct netlbl_domaddr6_map *map6;
+			map6 = netlbl_domhsh_addr6_entry(addr6);
+			type = map6->type;
+			netlbl_af6list_audit_addr(audit_buf, 0, NULL,
+						  &addr6->addr, &addr6->mask);
+#endif /* IPv6 */
+		} else {
+			type = entry->type;
+			cipsov4 = entry->type_def.cipsov4;
+		}
+		switch (type) {
+		case NETLBL_NLTYPE_UNLABELED:
+			audit_log_format(audit_buf, " nlbl_protocol=unlbl");
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			BUG_ON(cipsov4 == NULL);
+			audit_log_format(audit_buf,
+					 " nlbl_protocol=cipsov4 cipso_doi=%u",
+					 cipsov4->doi);
+			break;
+		}
+		audit_log_format(audit_buf, " res=%u", result == 0 ? 1 : 0);
+		audit_log_end(audit_buf);
+	}
+}
+
 /*
  * Domain Hash Table Functions
  */
@@ -213,74 +297,106 @@ int __init netlbl_domhsh_init(u32 size)
 int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 		      struct netlbl_audit *audit_info)
 {
-	int ret_val;
-	u32 bkt;
-	struct audit_buffer *audit_buf;
-
-	switch (entry->type) {
-	case NETLBL_NLTYPE_UNLABELED:
-		ret_val = 0;
-		break;
-	case NETLBL_NLTYPE_CIPSOV4:
-		ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4,
-						  entry->domain);
-		break;
-	default:
-		return -EINVAL;
-	}
-	if (ret_val != 0)
-		return ret_val;
-
-	entry->valid = 1;
-	INIT_RCU_HEAD(&entry->rcu);
+	int ret_val = 0;
+	struct netlbl_dom_map *entry_old;
+	struct netlbl_af4list *iter4;
+	struct netlbl_af4list *tmp4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netlbl_af6list *iter6;
+	struct netlbl_af6list *tmp6;
+#endif /* IPv6 */
 
 	rcu_read_lock();
+
 	spin_lock(&netlbl_domhsh_lock);
-	if (entry->domain != NULL) {
-		bkt = netlbl_domhsh_hash(entry->domain);
-		if (netlbl_domhsh_search(entry->domain) == NULL)
+	if (entry->domain != NULL)
+		entry_old = netlbl_domhsh_search(entry->domain);
+	else
+		entry_old = netlbl_domhsh_search_def(entry->domain);
+	if (entry_old == NULL) {
+		entry->valid = 1;
+		INIT_RCU_HEAD(&entry->rcu);
+
+		if (entry->domain != NULL) {
+			u32 bkt = netlbl_domhsh_hash(entry->domain);
 			list_add_tail_rcu(&entry->list,
 				    &rcu_dereference(netlbl_domhsh)->tbl[bkt]);
-		else
-			ret_val = -EEXIST;
-	} else {
-		INIT_LIST_HEAD(&entry->list);
-		if (rcu_dereference(netlbl_domhsh_def) == NULL)
+		} else {
+			INIT_LIST_HEAD(&entry->list);
 			rcu_assign_pointer(netlbl_domhsh_def, entry);
-		else
-			ret_val = -EEXIST;
-	}
-	spin_unlock(&netlbl_domhsh_lock);
-	audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info);
-	if (audit_buf != NULL) {
-		audit_log_format(audit_buf,
-				 " nlbl_domain=%s",
-				 entry->domain ? entry->domain : "(default)");
-		switch (entry->type) {
-		case NETLBL_NLTYPE_UNLABELED:
-			audit_log_format(audit_buf, " nlbl_protocol=unlbl");
-			break;
-		case NETLBL_NLTYPE_CIPSOV4:
-			audit_log_format(audit_buf,
-					 " nlbl_protocol=cipsov4 cipso_doi=%u",
-					 entry->type_def.cipsov4->doi);
-			break;
 		}
-		audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
-		audit_log_end(audit_buf);
-	}
-	rcu_read_unlock();
 
-	if (ret_val != 0) {
-		switch (entry->type) {
-		case NETLBL_NLTYPE_CIPSOV4:
-			if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
-						       entry->domain) != 0)
-				BUG();
-			break;
+		if (entry->type == NETLBL_NLTYPE_ADDRSELECT) {
+			netlbl_af4list_foreach_rcu(iter4,
+					       &entry->type_def.addrsel->list4)
+				netlbl_domhsh_audit_add(entry, iter4, NULL,
+							ret_val, audit_info);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+			netlbl_af6list_foreach_rcu(iter6,
+					       &entry->type_def.addrsel->list6)
+				netlbl_domhsh_audit_add(entry, NULL, iter6,
+							ret_val, audit_info);
+#endif /* IPv6 */
+		} else
+			netlbl_domhsh_audit_add(entry, NULL, NULL,
+						ret_val, audit_info);
+	} else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT &&
+		   entry->type == NETLBL_NLTYPE_ADDRSELECT) {
+		struct list_head *old_list4;
+		struct list_head *old_list6;
+
+		old_list4 = &entry_old->type_def.addrsel->list4;
+		old_list6 = &entry_old->type_def.addrsel->list6;
+
+		/* we only allow the addition of address selectors if all of
+		 * the selectors do not exist in the existing domain map */
+		netlbl_af4list_foreach_rcu(iter4,
+					   &entry->type_def.addrsel->list4)
+			if (netlbl_af4list_search_exact(iter4->addr,
+							iter4->mask,
+							old_list4)) {
+				ret_val = -EEXIST;
+				goto add_return;
+			}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		netlbl_af6list_foreach_rcu(iter6,
+					   &entry->type_def.addrsel->list6)
+			if (netlbl_af6list_search_exact(&iter6->addr,
+							&iter6->mask,
+							old_list6)) {
+				ret_val = -EEXIST;
+				goto add_return;
+			}
+#endif /* IPv6 */
+
+		netlbl_af4list_foreach_safe(iter4, tmp4,
+					    &entry->type_def.addrsel->list4) {
+			netlbl_af4list_remove_entry(iter4);
+			iter4->valid = 1;
+			ret_val = netlbl_af4list_add(iter4, old_list4);
+			netlbl_domhsh_audit_add(entry_old, iter4, NULL,
+						ret_val, audit_info);
+			if (ret_val != 0)
+				goto add_return;
 		}
-	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		netlbl_af6list_foreach_safe(iter6, tmp6,
+					    &entry->type_def.addrsel->list6) {
+			netlbl_af6list_remove_entry(iter6);
+			iter6->valid = 1;
+			ret_val = netlbl_af6list_add(iter6, old_list6);
+			netlbl_domhsh_audit_add(entry_old, NULL, iter6,
+						ret_val, audit_info);
+			if (ret_val != 0)
+				goto add_return;
+		}
+#endif /* IPv6 */
+	} else
+		ret_val = -EINVAL;
 
+add_return:
+	spin_unlock(&netlbl_domhsh_lock);
+	rcu_read_unlock();
 	return ret_val;
 }
 
@@ -302,35 +418,26 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
 }
 
 /**
- * netlbl_domhsh_remove - Removes an entry from the domain hash table
- * @domain: the domain to remove
+ * netlbl_domhsh_remove_entry - Removes a given entry from the domain table
+ * @entry: the entry to remove
  * @audit_info: NetLabel audit information
  *
  * Description:
  * Removes an entry from the domain hash table and handles any updates to the
- * lower level protocol handler (i.e. CIPSO).  Returns zero on success,
- * negative on failure.
+ * lower level protocol handler (i.e. CIPSO).  Caller is responsible for
+ * ensuring that the RCU read lock is held.  Returns zero on success, negative
+ * on failure.
  *
  */
-int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
+int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
+			       struct netlbl_audit *audit_info)
 {
-	int ret_val = -ENOENT;
-	struct netlbl_dom_map *entry;
+	int ret_val = 0;
 	struct audit_buffer *audit_buf;
 
-	rcu_read_lock();
-	if (domain)
-		entry = netlbl_domhsh_search(domain);
-	else
-		entry = netlbl_domhsh_search_def(domain);
 	if (entry == NULL)
-		goto remove_return;
-	switch (entry->type) {
-	case NETLBL_NLTYPE_CIPSOV4:
-		cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
-					   entry->domain);
-		break;
-	}
+		return -ENOENT;
+
 	spin_lock(&netlbl_domhsh_lock);
 	if (entry->valid) {
 		entry->valid = 0;
@@ -338,8 +445,8 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
 			list_del_rcu(&entry->list);
 		else
 			rcu_assign_pointer(netlbl_domhsh_def, NULL);
-		ret_val = 0;
-	}
+	} else
+		ret_val = -ENOENT;
 	spin_unlock(&netlbl_domhsh_lock);
 
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info);
@@ -351,10 +458,54 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
 		audit_log_end(audit_buf);
 	}
 
-remove_return:
-	rcu_read_unlock();
-	if (ret_val == 0)
+	if (ret_val == 0) {
+		struct netlbl_af4list *iter4;
+		struct netlbl_domaddr4_map *map4;
+
+		switch (entry->type) {
+		case NETLBL_NLTYPE_ADDRSELECT:
+			netlbl_af4list_foreach_rcu(iter4,
+					     &entry->type_def.addrsel->list4) {
+				map4 = netlbl_domhsh_addr4_entry(iter4);
+				cipso_v4_doi_putdef(map4->type_def.cipsov4);
+			}
+			/* no need to check the IPv6 list since we currently
+			 * support only unlabeled protocols for IPv6 */
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			cipso_v4_doi_putdef(entry->type_def.cipsov4);
+			break;
+		}
 		call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
+	}
+
+	return ret_val;
+}
+
+/**
+ * netlbl_domhsh_remove - Removes an entry from the domain hash table
+ * @domain: the domain to remove
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Removes an entry from the domain hash table and handles any updates to the
+ * lower level protocol handler (i.e. CIPSO).  Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
+{
+	int ret_val;
+	struct netlbl_dom_map *entry;
+
+	rcu_read_lock();
+	if (domain)
+		entry = netlbl_domhsh_search(domain);
+	else
+		entry = netlbl_domhsh_search_def(domain);
+	ret_val = netlbl_domhsh_remove_entry(entry, audit_info);
+	rcu_read_unlock();
+
 	return ret_val;
 }
 
@@ -389,6 +540,70 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
 }
 
 /**
+ * netlbl_domhsh_getentry_af4 - Get an entry from the domain hash table
+ * @domain: the domain name to search for
+ * @addr: the IP address to search for
+ *
+ * Description:
+ * Look through the domain hash table searching for an entry to match @domain
+ * and @addr, return a pointer to a copy of the entry or NULL.  The caller is
+ * responsible for ensuring that rcu_read_[un]lock() is called.
+ *
+ */
+struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
+						       __be32 addr)
+{
+	struct netlbl_dom_map *dom_iter;
+	struct netlbl_af4list *addr_iter;
+
+	dom_iter = netlbl_domhsh_search_def(domain);
+	if (dom_iter == NULL)
+		return NULL;
+	if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT)
+		return NULL;
+
+	addr_iter = netlbl_af4list_search(addr,
+					  &dom_iter->type_def.addrsel->list4);
+	if (addr_iter == NULL)
+		return NULL;
+
+	return netlbl_domhsh_addr4_entry(addr_iter);
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_domhsh_getentry_af6 - Get an entry from the domain hash table
+ * @domain: the domain name to search for
+ * @addr: the IP address to search for
+ *
+ * Description:
+ * Look through the domain hash table searching for an entry to match @domain
+ * and @addr, return a pointer to a copy of the entry or NULL.  The caller is
+ * responsible for ensuring that rcu_read_[un]lock() is called.
+ *
+ */
+struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
+						   const struct in6_addr *addr)
+{
+	struct netlbl_dom_map *dom_iter;
+	struct netlbl_af6list *addr_iter;
+
+	dom_iter = netlbl_domhsh_search_def(domain);
+	if (dom_iter == NULL)
+		return NULL;
+	if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT)
+		return NULL;
+
+	addr_iter = netlbl_af6list_search(addr,
+					  &dom_iter->type_def.addrsel->list6);
+	if (addr_iter == NULL)
+		return NULL;
+
+	return netlbl_domhsh_addr6_entry(addr_iter);
+}
+#endif /* IPv6 */
+
+/**
  * netlbl_domhsh_walk - Iterate through the domain mapping hash table
  * @skip_bkt: the number of buckets to skip at the start
  * @skip_chain: the number of entries to skip in the first iterated bucket
@@ -410,6 +625,7 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
 {
 	int ret_val = -ENOENT;
 	u32 iter_bkt;
+	struct list_head *iter_list;
 	struct netlbl_dom_map *iter_entry;
 	u32 chain_cnt = 0;
 
@@ -417,9 +633,8 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
 	for (iter_bkt = *skip_bkt;
 	     iter_bkt < rcu_dereference(netlbl_domhsh)->size;
 	     iter_bkt++, chain_cnt = 0) {
-		list_for_each_entry_rcu(iter_entry,
-				&rcu_dereference(netlbl_domhsh)->tbl[iter_bkt],
-				list)
+		iter_list = &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt];
+		list_for_each_entry_rcu(iter_entry, iter_list, list)
 			if (iter_entry->valid) {
 				if (chain_cnt++ < *skip_chain)
 					continue;
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 8220990ceb96..bfcb6763a1a1 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -11,7 +11,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -36,16 +36,43 @@
 #include <linux/rcupdate.h>
 #include <linux/list.h>
 
+#include "netlabel_addrlist.h"
+
 /* Domain hash table size */
 /* XXX - currently this number is an uneducated guess */
 #define NETLBL_DOMHSH_BITSIZE       7
 
-/* Domain mapping definition struct */
+/* Domain mapping definition structures */
+#define netlbl_domhsh_addr4_entry(iter) \
+	container_of(iter, struct netlbl_domaddr4_map, list)
+struct netlbl_domaddr4_map {
+	u32 type;
+	union {
+		struct cipso_v4_doi *cipsov4;
+	} type_def;
+
+	struct netlbl_af4list list;
+};
+#define netlbl_domhsh_addr6_entry(iter) \
+	container_of(iter, struct netlbl_domaddr6_map, list)
+struct netlbl_domaddr6_map {
+	u32 type;
+
+	/* NOTE: no 'type_def' union needed at present since we don't currently
+	 *       support any IPv6 labeling protocols */
+
+	struct netlbl_af6list list;
+};
+struct netlbl_domaddr_map {
+	struct list_head list4;
+	struct list_head list6;
+};
 struct netlbl_dom_map {
 	char *domain;
 	u32 type;
 	union {
 		struct cipso_v4_doi *cipsov4;
+		struct netlbl_domaddr_map *addrsel;
 	} type_def;
 
 	u32 valid;
@@ -61,12 +88,21 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 		      struct netlbl_audit *audit_info);
 int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
 			      struct netlbl_audit *audit_info);
+int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
+			       struct netlbl_audit *audit_info);
 int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info);
 int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info);
 struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
+struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
+						       __be32 addr);
 int netlbl_domhsh_walk(u32 *skip_bkt,
 		     u32 *skip_chain,
 		     int (*callback) (struct netlbl_dom_map *entry, void *arg),
 		     void *cb_arg);
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
+						  const struct in6_addr *addr);
+#endif /* IPv6 */
+
 #endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 39793a1a93aa..b32eceb3ab0d 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -10,7 +10,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -82,7 +82,7 @@ int netlbl_cfg_unlbl_add_map(const char *domain,
 
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (entry == NULL)
-		goto cfg_unlbl_add_map_failure;
+		return -ENOMEM;
 	if (domain != NULL) {
 		entry->domain = kstrdup(domain, GFP_ATOMIC);
 		if (entry->domain == NULL)
@@ -104,49 +104,6 @@ cfg_unlbl_add_map_failure:
 }
 
 /**
- * netlbl_cfg_cipsov4_add - Add a new CIPSOv4 DOI definition
- * @doi_def: the DOI definition
- * @audit_info: NetLabel audit information
- *
- * Description:
- * Add a new CIPSOv4 DOI definition to the NetLabel subsystem.  Returns zero on
- * success, negative values on failure.
- *
- */
-int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def,
-			   struct netlbl_audit *audit_info)
-{
-	int ret_val;
-	const char *type_str;
-	struct audit_buffer *audit_buf;
-
-	ret_val = cipso_v4_doi_add(doi_def);
-
-	audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
-					      audit_info);
-	if (audit_buf != NULL) {
-		switch (doi_def->type) {
-		case CIPSO_V4_MAP_STD:
-			type_str = "std";
-			break;
-		case CIPSO_V4_MAP_PASS:
-			type_str = "pass";
-			break;
-		default:
-			type_str = "(unknown)";
-		}
-		audit_log_format(audit_buf,
-				 " cipso_doi=%u cipso_type=%s res=%u",
-				 doi_def->doi,
-				 type_str,
-				 ret_val == 0 ? 1 : 0);
-		audit_log_end(audit_buf);
-	}
-
-	return ret_val;
-}
-
-/**
  * netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping
  * @doi_def: the DOI definition
  * @domain: the domain mapping to add
@@ -164,58 +121,71 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
 			       struct netlbl_audit *audit_info)
 {
 	int ret_val = -ENOMEM;
+	u32 doi;
+	u32 doi_type;
 	struct netlbl_dom_map *entry;
+	const char *type_str;
+	struct audit_buffer *audit_buf;
+
+	doi = doi_def->doi;
+	doi_type = doi_def->type;
 
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (entry == NULL)
-		goto cfg_cipsov4_add_map_failure;
+		return -ENOMEM;
 	if (domain != NULL) {
 		entry->domain = kstrdup(domain, GFP_ATOMIC);
 		if (entry->domain == NULL)
 			goto cfg_cipsov4_add_map_failure;
 	}
-	entry->type = NETLBL_NLTYPE_CIPSOV4;
-	entry->type_def.cipsov4 = doi_def;
-
-	/* Grab a RCU read lock here so nothing happens to the doi_def variable
-	 * between adding it to the CIPSOv4 protocol engine and adding a
-	 * domain mapping for it. */
 
-	rcu_read_lock();
-	ret_val = netlbl_cfg_cipsov4_add(doi_def, audit_info);
+	ret_val = cipso_v4_doi_add(doi_def);
 	if (ret_val != 0)
-		goto cfg_cipsov4_add_map_failure_unlock;
+		goto cfg_cipsov4_add_map_failure_remove_doi;
+	entry->type = NETLBL_NLTYPE_CIPSOV4;
+	entry->type_def.cipsov4 = cipso_v4_doi_getdef(doi);
+	if (entry->type_def.cipsov4 == NULL) {
+		ret_val = -ENOENT;
+		goto cfg_cipsov4_add_map_failure_remove_doi;
+	}
 	ret_val = netlbl_domhsh_add(entry, audit_info);
 	if (ret_val != 0)
-		goto cfg_cipsov4_add_map_failure_remove_doi;
-	rcu_read_unlock();
+		goto cfg_cipsov4_add_map_failure_release_doi;
 
-	return 0;
+cfg_cipsov4_add_map_return:
+	audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
+					      audit_info);
+	if (audit_buf != NULL) {
+		switch (doi_type) {
+		case CIPSO_V4_MAP_TRANS:
+			type_str = "trans";
+			break;
+		case CIPSO_V4_MAP_PASS:
+			type_str = "pass";
+			break;
+		case CIPSO_V4_MAP_LOCAL:
+			type_str = "local";
+			break;
+		default:
+			type_str = "(unknown)";
+		}
+		audit_log_format(audit_buf,
+				 " cipso_doi=%u cipso_type=%s res=%u",
+				 doi, type_str, ret_val == 0 ? 1 : 0);
+		audit_log_end(audit_buf);
+	}
 
+	return ret_val;
+
+cfg_cipsov4_add_map_failure_release_doi:
+	cipso_v4_doi_putdef(doi_def);
 cfg_cipsov4_add_map_failure_remove_doi:
-	cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free);
-cfg_cipsov4_add_map_failure_unlock:
-	rcu_read_unlock();
+	cipso_v4_doi_remove(doi, audit_info);
 cfg_cipsov4_add_map_failure:
 	if (entry != NULL)
 		kfree(entry->domain);
 	kfree(entry);
-	return ret_val;
-}
-
-/**
- * netlbl_cfg_cipsov4_del - Removean existing CIPSOv4 DOI definition
- * @doi: the CIPSO DOI value
- * @audit_info: NetLabel audit information
- *
- * Description:
- * Removes an existing CIPSOv4 DOI definition from the NetLabel subsystem.
- * Returns zero on success, negative values on failure.
- *
- */
-int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info)
-{
-	return cipso_v4_doi_remove(doi, audit_info, netlbl_cipsov4_doi_free);
+	goto cfg_cipsov4_add_map_return;
 }
 
 /*
@@ -452,7 +422,9 @@ int netlbl_enabled(void)
  * Attach the correct label to the given socket using the security attributes
  * specified in @secattr.  This function requires exclusive access to @sk,
  * which means it either needs to be in the process of being created or locked.
- * Returns zero on success, negative values on failure.
+ * Returns zero on success, -EDESTADDRREQ if the domain is configured to use
+ * network address selectors (can't blindly label the socket), and negative
+ * values on all other failures.
  *
  */
 int netlbl_sock_setattr(struct sock *sk,
@@ -466,6 +438,9 @@ int netlbl_sock_setattr(struct sock *sk,
 	if (dom_entry == NULL)
 		goto socket_setattr_return;
 	switch (dom_entry->type) {
+	case NETLBL_NLTYPE_ADDRSELECT:
+		ret_val = -EDESTADDRREQ;
+		break;
 	case NETLBL_NLTYPE_CIPSOV4:
 		ret_val = cipso_v4_sock_setattr(sk,
 						dom_entry->type_def.cipsov4,
@@ -484,6 +459,20 @@ socket_setattr_return:
 }
 
 /**
+ * netlbl_sock_delattr - Delete all the NetLabel labels on a socket
+ * @sk: the socket
+ *
+ * Description:
+ * Remove all the NetLabel labeling from @sk.  The caller is responsible for
+ * ensuring that @sk is locked.
+ *
+ */
+void netlbl_sock_delattr(struct sock *sk)
+{
+	cipso_v4_sock_delattr(sk);
+}
+
+/**
  * netlbl_sock_getattr - Determine the security attributes of a sock
  * @sk: the sock
  * @secattr: the security attributes
@@ -501,6 +490,128 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
 }
 
 /**
+ * netlbl_conn_setattr - Label a connected socket using the correct protocol
+ * @sk: the socket to label
+ * @addr: the destination address
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Attach the correct label to the given connected socket using the security
+ * attributes specified in @secattr.  The caller is responsible for ensuring
+ * that @sk is locked.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_conn_setattr(struct sock *sk,
+			struct sockaddr *addr,
+			const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+	struct sockaddr_in *addr4;
+	struct netlbl_domaddr4_map *af4_entry;
+
+	rcu_read_lock();
+	switch (addr->sa_family) {
+	case AF_INET:
+		addr4 = (struct sockaddr_in *)addr;
+		af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
+						       addr4->sin_addr.s_addr);
+		if (af4_entry == NULL) {
+			ret_val = -ENOENT;
+			goto conn_setattr_return;
+		}
+		switch (af4_entry->type) {
+		case NETLBL_NLTYPE_CIPSOV4:
+			ret_val = cipso_v4_sock_setattr(sk,
+						   af4_entry->type_def.cipsov4,
+						   secattr);
+			break;
+		case NETLBL_NLTYPE_UNLABELED:
+			/* just delete the protocols we support for right now
+			 * but we could remove other protocols if needed */
+			cipso_v4_sock_delattr(sk);
+			ret_val = 0;
+			break;
+		default:
+			ret_val = -ENOENT;
+		}
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		/* since we don't support any IPv6 labeling protocols right
+		 * now we can optimize everything away until we do */
+		ret_val = 0;
+		break;
+#endif /* IPv6 */
+	default:
+		ret_val = 0;
+	}
+
+conn_setattr_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
+/**
+ * netlbl_skbuff_setattr - Label a packet using the correct protocol
+ * @skb: the packet
+ * @family: protocol family
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Attach the correct label to the given packet using the security attributes
+ * specified in @secattr.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_skbuff_setattr(struct sk_buff *skb,
+			  u16 family,
+			  const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+	struct iphdr *hdr4;
+	struct netlbl_domaddr4_map *af4_entry;
+
+	rcu_read_lock();
+	switch (family) {
+	case AF_INET:
+		hdr4 = ip_hdr(skb);
+		af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
+						       hdr4->daddr);
+		if (af4_entry == NULL) {
+			ret_val = -ENOENT;
+			goto skbuff_setattr_return;
+		}
+		switch (af4_entry->type) {
+		case NETLBL_NLTYPE_CIPSOV4:
+			ret_val = cipso_v4_skbuff_setattr(skb,
+						   af4_entry->type_def.cipsov4,
+						   secattr);
+			break;
+		case NETLBL_NLTYPE_UNLABELED:
+			/* just delete the protocols we support for right now
+			 * but we could remove other protocols if needed */
+			ret_val = cipso_v4_skbuff_delattr(skb);
+			break;
+		default:
+			ret_val = -ENOENT;
+		}
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		/* since we don't support any IPv6 labeling protocols right
+		 * now we can optimize everything away until we do */
+		ret_val = 0;
+		break;
+#endif /* IPv6 */
+	default:
+		ret_val = 0;
+	}
+
+skbuff_setattr_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
+/**
  * netlbl_skbuff_getattr - Determine the security attributes of a packet
  * @skb: the packet
  * @family: protocol family
@@ -528,6 +639,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
  * netlbl_skbuff_err - Handle a LSM error on a sk_buff
  * @skb: the packet
  * @error: the error code
+ * @gateway: true if host is acting as a gateway, false otherwise
  *
  * Description:
  * Deal with a LSM problem when handling the packet in @skb, typically this is
@@ -535,10 +647,10 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
  * according to the packet's labeling protocol.
  *
  */
-void netlbl_skbuff_err(struct sk_buff *skb, int error)
+void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway)
 {
 	if (CIPSO_V4_OPTEXIST(skb))
-		cipso_v4_error(skb, error, 0);
+		cipso_v4_error(skb, error, gateway);
 }
 
 /**
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 44be5d5261f4..ee769ecaa13c 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -10,7 +10,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -32,9 +32,13 @@
 #include <linux/socket.h>
 #include <linux/string.h>
 #include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/in6.h>
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
 #include <asm/atomic.h>
@@ -71,86 +75,337 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
 };
 
 /*
- * NetLabel Command Handlers
+ * Helper Functions
  */
 
 /**
  * netlbl_mgmt_add - Handle an ADD message
- * @skb: the NETLINK buffer
  * @info: the Generic NETLINK info block
+ * @audit_info: NetLabel audit information
  *
  * Description:
- * Process a user generated ADD message and add the domains from the message
- * to the hash table.  See netlabel.h for a description of the message format.
- * Returns zero on success, negative values on failure.
+ * Helper function for the ADD and ADDDEF messages to add the domain mappings
+ * from the message to the hash table.  See netlabel.h for a description of the
+ * message format.  Returns zero on success, negative values on failure.
  *
  */
-static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
+static int netlbl_mgmt_add_common(struct genl_info *info,
+				  struct netlbl_audit *audit_info)
 {
 	int ret_val = -EINVAL;
 	struct netlbl_dom_map *entry = NULL;
-	size_t tmp_size;
+	struct netlbl_domaddr_map *addrmap = NULL;
+	struct cipso_v4_doi *cipsov4 = NULL;
 	u32 tmp_val;
-	struct netlbl_audit audit_info;
-
-	if (!info->attrs[NLBL_MGMT_A_DOMAIN] ||
-	    !info->attrs[NLBL_MGMT_A_PROTOCOL])
-		goto add_failure;
-
-	netlbl_netlink_auditinfo(skb, &audit_info);
 
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (entry == NULL) {
 		ret_val = -ENOMEM;
 		goto add_failure;
 	}
-	tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
-	entry->domain = kmalloc(tmp_size, GFP_KERNEL);
-	if (entry->domain == NULL) {
-		ret_val = -ENOMEM;
-		goto add_failure;
-	}
 	entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
-	nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
+	if (info->attrs[NLBL_MGMT_A_DOMAIN]) {
+		size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
+		entry->domain = kmalloc(tmp_size, GFP_KERNEL);
+		if (entry->domain == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		nla_strlcpy(entry->domain,
+			    info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
+	}
+
+	/* NOTE: internally we allow/use a entry->type value of
+	 *       NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users
+	 *       to pass that as a protocol value because we need to know the
+	 *       "real" protocol */
 
 	switch (entry->type) {
 	case NETLBL_NLTYPE_UNLABELED:
-		ret_val = netlbl_domhsh_add(entry, &audit_info);
 		break;
 	case NETLBL_NLTYPE_CIPSOV4:
 		if (!info->attrs[NLBL_MGMT_A_CV4DOI])
 			goto add_failure;
 
 		tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
-		/* We should be holding a rcu_read_lock() here while we hold
-		 * the result but since the entry will always be deleted when
-		 * the CIPSO DOI is deleted we aren't going to keep the
-		 * lock. */
-		rcu_read_lock();
-		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
-		if (entry->type_def.cipsov4 == NULL) {
-			rcu_read_unlock();
+		cipsov4 = cipso_v4_doi_getdef(tmp_val);
+		if (cipsov4 == NULL)
 			goto add_failure;
-		}
-		ret_val = netlbl_domhsh_add(entry, &audit_info);
-		rcu_read_unlock();
+		entry->type_def.cipsov4 = cipsov4;
 		break;
 	default:
 		goto add_failure;
 	}
+
+	if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) {
+		struct in_addr *addr;
+		struct in_addr *mask;
+		struct netlbl_domaddr4_map *map;
+
+		addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL);
+		if (addrmap == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		INIT_LIST_HEAD(&addrmap->list4);
+		INIT_LIST_HEAD(&addrmap->list6);
+
+		if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) !=
+		    sizeof(struct in_addr)) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) !=
+		    sizeof(struct in_addr)) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]);
+		mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]);
+
+		map = kzalloc(sizeof(*map), GFP_KERNEL);
+		if (map == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		map->list.addr = addr->s_addr & mask->s_addr;
+		map->list.mask = mask->s_addr;
+		map->list.valid = 1;
+		map->type = entry->type;
+		if (cipsov4)
+			map->type_def.cipsov4 = cipsov4;
+
+		ret_val = netlbl_af4list_add(&map->list, &addrmap->list4);
+		if (ret_val != 0) {
+			kfree(map);
+			goto add_failure;
+		}
+
+		entry->type = NETLBL_NLTYPE_ADDRSELECT;
+		entry->type_def.addrsel = addrmap;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	} else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) {
+		struct in6_addr *addr;
+		struct in6_addr *mask;
+		struct netlbl_domaddr6_map *map;
+
+		addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL);
+		if (addrmap == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		INIT_LIST_HEAD(&addrmap->list4);
+		INIT_LIST_HEAD(&addrmap->list6);
+
+		if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) !=
+		    sizeof(struct in6_addr)) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) !=
+		    sizeof(struct in6_addr)) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]);
+		mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]);
+
+		map = kzalloc(sizeof(*map), GFP_KERNEL);
+		if (map == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		ipv6_addr_copy(&map->list.addr, addr);
+		map->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
+		map->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
+		map->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
+		map->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
+		ipv6_addr_copy(&map->list.mask, mask);
+		map->list.valid = 1;
+		map->type = entry->type;
+
+		ret_val = netlbl_af6list_add(&map->list, &addrmap->list6);
+		if (ret_val != 0) {
+			kfree(map);
+			goto add_failure;
+		}
+
+		entry->type = NETLBL_NLTYPE_ADDRSELECT;
+		entry->type_def.addrsel = addrmap;
+#endif /* IPv6 */
+	}
+
+	ret_val = netlbl_domhsh_add(entry, audit_info);
 	if (ret_val != 0)
 		goto add_failure;
 
 	return 0;
 
 add_failure:
+	if (cipsov4)
+		cipso_v4_doi_putdef(cipsov4);
 	if (entry)
 		kfree(entry->domain);
+	kfree(addrmap);
 	kfree(entry);
 	return ret_val;
 }
 
 /**
+ * netlbl_mgmt_listentry - List a NetLabel/LSM domain map entry
+ * @skb: the NETLINK buffer
+ * @entry: the map entry
+ *
+ * Description:
+ * This function is a helper function used by the LISTALL and LISTDEF command
+ * handlers.  The caller is responsibile for ensuring that the RCU read lock
+ * is held.  Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_listentry(struct sk_buff *skb,
+				 struct netlbl_dom_map *entry)
+{
+	int ret_val;
+	struct nlattr *nla_a;
+	struct nlattr *nla_b;
+	struct netlbl_af4list *iter4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netlbl_af6list *iter6;
+#endif
+
+	if (entry->domain != NULL) {
+		ret_val = nla_put_string(skb,
+					 NLBL_MGMT_A_DOMAIN, entry->domain);
+		if (ret_val != 0)
+			return ret_val;
+	}
+
+	switch (entry->type) {
+	case NETLBL_NLTYPE_ADDRSELECT:
+		nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST);
+		if (nla_a == NULL)
+			return -ENOMEM;
+
+		netlbl_af4list_foreach_rcu(iter4,
+					   &entry->type_def.addrsel->list4) {
+			struct netlbl_domaddr4_map *map4;
+			struct in_addr addr_struct;
+
+			nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR);
+			if (nla_b == NULL)
+				return -ENOMEM;
+
+			addr_struct.s_addr = iter4->addr;
+			ret_val = nla_put(skb, NLBL_MGMT_A_IPV4ADDR,
+					  sizeof(struct in_addr),
+					  &addr_struct);
+			if (ret_val != 0)
+				return ret_val;
+			addr_struct.s_addr = iter4->mask;
+			ret_val = nla_put(skb, NLBL_MGMT_A_IPV4MASK,
+					  sizeof(struct in_addr),
+					  &addr_struct);
+			if (ret_val != 0)
+				return ret_val;
+			map4 = netlbl_domhsh_addr4_entry(iter4);
+			ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL,
+					      map4->type);
+			if (ret_val != 0)
+				return ret_val;
+			switch (map4->type) {
+			case NETLBL_NLTYPE_CIPSOV4:
+				ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI,
+						  map4->type_def.cipsov4->doi);
+				if (ret_val != 0)
+					return ret_val;
+				break;
+			}
+
+			nla_nest_end(skb, nla_b);
+		}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		netlbl_af6list_foreach_rcu(iter6,
+					   &entry->type_def.addrsel->list6) {
+			struct netlbl_domaddr6_map *map6;
+
+			nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR);
+			if (nla_b == NULL)
+				return -ENOMEM;
+
+			ret_val = nla_put(skb, NLBL_MGMT_A_IPV6ADDR,
+					  sizeof(struct in6_addr),
+					  &iter6->addr);
+			if (ret_val != 0)
+				return ret_val;
+			ret_val = nla_put(skb, NLBL_MGMT_A_IPV6MASK,
+					  sizeof(struct in6_addr),
+					  &iter6->mask);
+			if (ret_val != 0)
+				return ret_val;
+			map6 = netlbl_domhsh_addr6_entry(iter6);
+			ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL,
+					      map6->type);
+			if (ret_val != 0)
+				return ret_val;
+
+			nla_nest_end(skb, nla_b);
+		}
+#endif /* IPv6 */
+
+		nla_nest_end(skb, nla_a);
+		break;
+	case NETLBL_NLTYPE_UNLABELED:
+		ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type);
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type);
+		if (ret_val != 0)
+			return ret_val;
+		ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI,
+				      entry->type_def.cipsov4->doi);
+		break;
+	}
+
+	return ret_val;
+}
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_mgmt_add - Handle an ADD message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ADD message and add the domains from the message
+ * to the hash table.  See netlabel.h for a description of the message format.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
+{
+	struct netlbl_audit audit_info;
+
+	if ((!info->attrs[NLBL_MGMT_A_DOMAIN]) ||
+	    (!info->attrs[NLBL_MGMT_A_PROTOCOL]) ||
+	    (info->attrs[NLBL_MGMT_A_IPV4ADDR] &&
+	     info->attrs[NLBL_MGMT_A_IPV6ADDR]) ||
+	    (info->attrs[NLBL_MGMT_A_IPV4MASK] &&
+	     info->attrs[NLBL_MGMT_A_IPV6MASK]) ||
+	    ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^
+	     (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) ||
+	    ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^
+	     (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
+		return -EINVAL;
+
+	netlbl_netlink_auditinfo(skb, &audit_info);
+
+	return netlbl_mgmt_add_common(info, &audit_info);
+}
+
+/**
  * netlbl_mgmt_remove - Handle a REMOVE message
  * @skb: the NETLINK buffer
  * @info: the Generic NETLINK info block
@@ -198,23 +453,9 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
 	if (data == NULL)
 		goto listall_cb_failure;
 
-	ret_val = nla_put_string(cb_arg->skb,
-				 NLBL_MGMT_A_DOMAIN,
-				 entry->domain);
+	ret_val = netlbl_mgmt_listentry(cb_arg->skb, entry);
 	if (ret_val != 0)
 		goto listall_cb_failure;
-	ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type);
-	if (ret_val != 0)
-		goto listall_cb_failure;
-	switch (entry->type) {
-	case NETLBL_NLTYPE_CIPSOV4:
-		ret_val = nla_put_u32(cb_arg->skb,
-				      NLBL_MGMT_A_CV4DOI,
-				      entry->type_def.cipsov4->doi);
-		if (ret_val != 0)
-			goto listall_cb_failure;
-		break;
-	}
 
 	cb_arg->seq++;
 	return genlmsg_end(cb_arg->skb, data);
@@ -268,56 +509,22 @@ static int netlbl_mgmt_listall(struct sk_buff *skb,
  */
 static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
 {
-	int ret_val = -EINVAL;
-	struct netlbl_dom_map *entry = NULL;
-	u32 tmp_val;
 	struct netlbl_audit audit_info;
 
-	if (!info->attrs[NLBL_MGMT_A_PROTOCOL])
-		goto adddef_failure;
+	if ((!info->attrs[NLBL_MGMT_A_PROTOCOL]) ||
+	    (info->attrs[NLBL_MGMT_A_IPV4ADDR] &&
+	     info->attrs[NLBL_MGMT_A_IPV6ADDR]) ||
+	    (info->attrs[NLBL_MGMT_A_IPV4MASK] &&
+	     info->attrs[NLBL_MGMT_A_IPV6MASK]) ||
+	    ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^
+	     (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) ||
+	    ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^
+	     (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
+		return -EINVAL;
 
 	netlbl_netlink_auditinfo(skb, &audit_info);
 
-	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-	if (entry == NULL) {
-		ret_val = -ENOMEM;
-		goto adddef_failure;
-	}
-	entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
-
-	switch (entry->type) {
-	case NETLBL_NLTYPE_UNLABELED:
-		ret_val = netlbl_domhsh_add_default(entry, &audit_info);
-		break;
-	case NETLBL_NLTYPE_CIPSOV4:
-		if (!info->attrs[NLBL_MGMT_A_CV4DOI])
-			goto adddef_failure;
-
-		tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
-		/* We should be holding a rcu_read_lock() here while we hold
-		 * the result but since the entry will always be deleted when
-		 * the CIPSO DOI is deleted we aren't going to keep the
-		 * lock. */
-		rcu_read_lock();
-		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
-		if (entry->type_def.cipsov4 == NULL) {
-			rcu_read_unlock();
-			goto adddef_failure;
-		}
-		ret_val = netlbl_domhsh_add_default(entry, &audit_info);
-		rcu_read_unlock();
-		break;
-	default:
-		goto adddef_failure;
-	}
-	if (ret_val != 0)
-		goto adddef_failure;
-
-	return 0;
-
-adddef_failure:
-	kfree(entry);
-	return ret_val;
+	return netlbl_mgmt_add_common(info, &audit_info);
 }
 
 /**
@@ -371,19 +578,10 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
 		ret_val = -ENOENT;
 		goto listdef_failure_lock;
 	}
-	ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type);
-	if (ret_val != 0)
-		goto listdef_failure_lock;
-	switch (entry->type) {
-	case NETLBL_NLTYPE_CIPSOV4:
-		ret_val = nla_put_u32(ans_skb,
-				      NLBL_MGMT_A_CV4DOI,
-				      entry->type_def.cipsov4->doi);
-		if (ret_val != 0)
-			goto listdef_failure_lock;
-		break;
-	}
+	ret_val = netlbl_mgmt_listentry(ans_skb, entry);
 	rcu_read_unlock();
+	if (ret_val != 0)
+		goto listdef_failure;
 
 	genlmsg_end(ans_skb, data);
 	return genlmsg_reply(ans_skb, info);
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index a43bff169d6b..05d96431f819 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -45,6 +45,16 @@
  *     NLBL_MGMT_A_DOMAIN
  *     NLBL_MGMT_A_PROTOCOL
  *
+ *   If IPv4 is specified the following attributes are required:
+ *
+ *     NLBL_MGMT_A_IPV4ADDR
+ *     NLBL_MGMT_A_IPV4MASK
+ *
+ *   If IPv6 is specified the following attributes are required:
+ *
+ *     NLBL_MGMT_A_IPV6ADDR
+ *     NLBL_MGMT_A_IPV6MASK
+ *
  *   If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
  *
  *     NLBL_MGMT_A_CV4DOI
@@ -68,13 +78,24 @@
  *   Required attributes:
  *
  *     NLBL_MGMT_A_DOMAIN
+ *
+ *   If the IP address selectors are not used the following attribute is
+ *   required:
+ *
  *     NLBL_MGMT_A_PROTOCOL
  *
- *   If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
+ *   If the IP address selectors are used then the following attritbute is
+ *   required:
+ *
+ *     NLBL_MGMT_A_SELECTORLIST
+ *
+ *   If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following
+ *   attributes are required:
  *
  *     NLBL_MGMT_A_CV4DOI
  *
- *   If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
+ *   If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other
+ *   attributes are required.
  *
  * o ADDDEF:
  *   Sent by an application to set the default domain mapping for the NetLabel
@@ -100,15 +121,23 @@
  *   application there is no payload.  On success the kernel should send a
  *   response using the following format.
  *
- *   Required attributes:
+ *   If the IP address selectors are not used the following attribute is
+ *   required:
  *
  *     NLBL_MGMT_A_PROTOCOL
  *
- *   If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
+ *   If the IP address selectors are used then the following attritbute is
+ *   required:
+ *
+ *     NLBL_MGMT_A_SELECTORLIST
+ *
+ *   If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following
+ *   attributes are required:
  *
  *     NLBL_MGMT_A_CV4DOI
  *
- *   If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
+ *   If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other
+ *   attributes are required.
  *
  * o PROTOCOLS:
  *   Sent by an application to request a list of configured NetLabel protocols
@@ -162,6 +191,26 @@ enum {
 	NLBL_MGMT_A_CV4DOI,
 	/* (NLA_U32)
 	 * the CIPSOv4 DOI value */
+	NLBL_MGMT_A_IPV6ADDR,
+	/* (NLA_BINARY, struct in6_addr)
+	 * an IPv6 address */
+	NLBL_MGMT_A_IPV6MASK,
+	/* (NLA_BINARY, struct in6_addr)
+	 * an IPv6 address mask */
+	NLBL_MGMT_A_IPV4ADDR,
+	/* (NLA_BINARY, struct in_addr)
+	 * an IPv4 address */
+	NLBL_MGMT_A_IPV4MASK,
+	/* (NLA_BINARY, struct in_addr)
+	 * and IPv4 address mask */
+	NLBL_MGMT_A_ADDRSELECTOR,
+	/* (NLA_NESTED)
+	 * an IP address selector, must contain an address, mask, and protocol
+	 * attribute plus any protocol specific attributes */
+	NLBL_MGMT_A_SELECTORLIST,
+	/* (NLA_NESTED)
+	 * the selector list, there must be at least one
+	 * NLBL_MGMT_A_ADDRSELECTOR attribute */
 	__NLBL_MGMT_A_MAX,
 };
 #define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 921c118ead89..e8a5c32b0f10 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -10,7 +10,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2007
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -54,6 +54,7 @@
 #include <asm/atomic.h>
 
 #include "netlabel_user.h"
+#include "netlabel_addrlist.h"
 #include "netlabel_domainhash.h"
 #include "netlabel_unlabeled.h"
 #include "netlabel_mgmt.h"
@@ -76,22 +77,20 @@ struct netlbl_unlhsh_tbl {
 	struct list_head *tbl;
 	u32 size;
 };
+#define netlbl_unlhsh_addr4_entry(iter) \
+	container_of(iter, struct netlbl_unlhsh_addr4, list)
 struct netlbl_unlhsh_addr4 {
-	__be32 addr;
-	__be32 mask;
 	u32 secid;
 
-	u32 valid;
-	struct list_head list;
+	struct netlbl_af4list list;
 	struct rcu_head rcu;
 };
+#define netlbl_unlhsh_addr6_entry(iter) \
+	container_of(iter, struct netlbl_unlhsh_addr6, list)
 struct netlbl_unlhsh_addr6 {
-	struct in6_addr addr;
-	struct in6_addr mask;
 	u32 secid;
 
-	u32 valid;
-	struct list_head list;
+	struct netlbl_af6list list;
 	struct rcu_head rcu;
 };
 struct netlbl_unlhsh_iface {
@@ -147,76 +146,6 @@ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1
 };
 
 /*
- * Audit Helper Functions
- */
-
-/**
- * netlbl_unlabel_audit_addr4 - Audit an IPv4 address
- * @audit_buf: audit buffer
- * @dev: network interface
- * @addr: IP address
- * @mask: IP address mask
- *
- * Description:
- * Write the IPv4 address and address mask, if necessary, to @audit_buf.
- *
- */
-static void netlbl_unlabel_audit_addr4(struct audit_buffer *audit_buf,
-				     const char *dev,
-				     __be32 addr, __be32 mask)
-{
-	u32 mask_val = ntohl(mask);
-
-	if (dev != NULL)
-		audit_log_format(audit_buf, " netif=%s", dev);
-	audit_log_format(audit_buf, " src=" NIPQUAD_FMT, NIPQUAD(addr));
-	if (mask_val != 0xffffffff) {
-		u32 mask_len = 0;
-		while (mask_val > 0) {
-			mask_val <<= 1;
-			mask_len++;
-		}
-		audit_log_format(audit_buf, " src_prefixlen=%d", mask_len);
-	}
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-/**
- * netlbl_unlabel_audit_addr6 - Audit an IPv6 address
- * @audit_buf: audit buffer
- * @dev: network interface
- * @addr: IP address
- * @mask: IP address mask
- *
- * Description:
- * Write the IPv6 address and address mask, if necessary, to @audit_buf.
- *
- */
-static void netlbl_unlabel_audit_addr6(struct audit_buffer *audit_buf,
-				     const char *dev,
-				     const struct in6_addr *addr,
-				     const struct in6_addr *mask)
-{
-	if (dev != NULL)
-		audit_log_format(audit_buf, " netif=%s", dev);
-	audit_log_format(audit_buf, " src=" NIP6_FMT, NIP6(*addr));
-	if (ntohl(mask->s6_addr32[3]) != 0xffffffff) {
-		u32 mask_len = 0;
-		u32 mask_val;
-		int iter = -1;
-		while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff)
-			mask_len += 32;
-		mask_val = ntohl(mask->s6_addr32[iter]);
-		while (mask_val > 0) {
-			mask_val <<= 1;
-			mask_len++;
-		}
-		audit_log_format(audit_buf, " src_prefixlen=%d", mask_len);
-	}
-}
-#endif /* IPv6 */
-
-/*
  * Unlabeled Connection Hash Table Functions
  */
 
@@ -274,26 +203,28 @@ static void netlbl_unlhsh_free_addr6(struct rcu_head *entry)
 static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
 {
 	struct netlbl_unlhsh_iface *iface;
-	struct netlbl_unlhsh_addr4 *iter4;
-	struct netlbl_unlhsh_addr4 *tmp4;
-	struct netlbl_unlhsh_addr6 *iter6;
-	struct netlbl_unlhsh_addr6 *tmp6;
+	struct netlbl_af4list *iter4;
+	struct netlbl_af4list *tmp4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netlbl_af6list *iter6;
+	struct netlbl_af6list *tmp6;
+#endif /* IPv6 */
 
 	iface = container_of(entry, struct netlbl_unlhsh_iface, rcu);
 
 	/* no need for locks here since we are the only one with access to this
 	 * structure */
 
-	list_for_each_entry_safe(iter4, tmp4, &iface->addr4_list, list)
-		if (iter4->valid) {
-			list_del_rcu(&iter4->list);
-			kfree(iter4);
-		}
-	list_for_each_entry_safe(iter6, tmp6, &iface->addr6_list, list)
-		if (iter6->valid) {
-			list_del_rcu(&iter6->list);
-			kfree(iter6);
-		}
+	netlbl_af4list_foreach_safe(iter4, tmp4, &iface->addr4_list) {
+		netlbl_af4list_remove_entry(iter4);
+		kfree(netlbl_unlhsh_addr4_entry(iter4));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) {
+		netlbl_af6list_remove_entry(iter6);
+		kfree(netlbl_unlhsh_addr6_entry(iter6));
+	}
+#endif /* IPv6 */
 	kfree(iface);
 }
 
@@ -316,59 +247,6 @@ static u32 netlbl_unlhsh_hash(int ifindex)
 }
 
 /**
- * netlbl_unlhsh_search_addr4 - Search for a matching IPv4 address entry
- * @addr: IPv4 address
- * @iface: the network interface entry
- *
- * Description:
- * Searches the IPv4 address list of the network interface specified by @iface.
- * If a matching address entry is found it is returned, otherwise NULL is
- * returned.  The caller is responsible for calling the rcu_read_[un]lock()
- * functions.
- *
- */
-static struct netlbl_unlhsh_addr4 *netlbl_unlhsh_search_addr4(
-	                               __be32 addr,
-	                               const struct netlbl_unlhsh_iface *iface)
-{
-	struct netlbl_unlhsh_addr4 *iter;
-
-	list_for_each_entry_rcu(iter, &iface->addr4_list, list)
-		if (iter->valid && (addr & iter->mask) == iter->addr)
-			return iter;
-
-	return NULL;
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-/**
- * netlbl_unlhsh_search_addr6 - Search for a matching IPv6 address entry
- * @addr: IPv6 address
- * @iface: the network interface entry
- *
- * Description:
- * Searches the IPv6 address list of the network interface specified by @iface.
- * If a matching address entry is found it is returned, otherwise NULL is
- * returned.  The caller is responsible for calling the rcu_read_[un]lock()
- * functions.
- *
- */
-static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6(
-	                               const struct in6_addr *addr,
-	                               const struct netlbl_unlhsh_iface *iface)
-{
-	struct netlbl_unlhsh_addr6 *iter;
-
-	list_for_each_entry_rcu(iter, &iface->addr6_list, list)
-		if (iter->valid &&
-		    ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0)
-		return iter;
-
-	return NULL;
-}
-#endif /* IPv6 */
-
-/**
  * netlbl_unlhsh_search_iface - Search for a matching interface entry
  * @ifindex: the network interface
  *
@@ -381,12 +259,12 @@ static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6(
 static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
 {
 	u32 bkt;
+	struct list_head *bkt_list;
 	struct netlbl_unlhsh_iface *iter;
 
 	bkt = netlbl_unlhsh_hash(ifindex);
-	list_for_each_entry_rcu(iter,
-				&rcu_dereference(netlbl_unlhsh)->tbl[bkt],
-				list)
+	bkt_list = &rcu_dereference(netlbl_unlhsh)->tbl[bkt];
+	list_for_each_entry_rcu(iter, bkt_list, list)
 		if (iter->valid && iter->ifindex == ifindex)
 			return iter;
 
@@ -439,43 +317,26 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface,
 				   const struct in_addr *mask,
 				   u32 secid)
 {
+	int ret_val;
 	struct netlbl_unlhsh_addr4 *entry;
-	struct netlbl_unlhsh_addr4 *iter;
 
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (entry == NULL)
 		return -ENOMEM;
 
-	entry->addr = addr->s_addr & mask->s_addr;
-	entry->mask = mask->s_addr;
-	entry->secid = secid;
-	entry->valid = 1;
+	entry->list.addr = addr->s_addr & mask->s_addr;
+	entry->list.mask = mask->s_addr;
+	entry->list.valid = 1;
 	INIT_RCU_HEAD(&entry->rcu);
+	entry->secid = secid;
 
 	spin_lock(&netlbl_unlhsh_lock);
-	iter = netlbl_unlhsh_search_addr4(entry->addr, iface);
-	if (iter != NULL &&
-	    iter->addr == addr->s_addr && iter->mask == mask->s_addr) {
-		spin_unlock(&netlbl_unlhsh_lock);
-		kfree(entry);
-		return -EEXIST;
-	}
-	/* in order to speed up address searches through the list (the common
-	 * case) we need to keep the list in order based on the size of the
-	 * address mask such that the entry with the widest mask (smallest
-	 * numerical value) appears first in the list */
-	list_for_each_entry_rcu(iter, &iface->addr4_list, list)
-		if (iter->valid &&
-		    ntohl(entry->mask) > ntohl(iter->mask)) {
-			__list_add_rcu(&entry->list,
-				       iter->list.prev,
-				       &iter->list);
-			spin_unlock(&netlbl_unlhsh_lock);
-			return 0;
-		}
-	list_add_tail_rcu(&entry->list, &iface->addr4_list);
+	ret_val = netlbl_af4list_add(&entry->list, &iface->addr4_list);
 	spin_unlock(&netlbl_unlhsh_lock);
-	return 0;
+
+	if (ret_val != 0)
+		kfree(entry);
+	return ret_val;
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -498,47 +359,29 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface,
 				   const struct in6_addr *mask,
 				   u32 secid)
 {
+	int ret_val;
 	struct netlbl_unlhsh_addr6 *entry;
-	struct netlbl_unlhsh_addr6 *iter;
 
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (entry == NULL)
 		return -ENOMEM;
 
-	ipv6_addr_copy(&entry->addr, addr);
-	entry->addr.s6_addr32[0] &= mask->s6_addr32[0];
-	entry->addr.s6_addr32[1] &= mask->s6_addr32[1];
-	entry->addr.s6_addr32[2] &= mask->s6_addr32[2];
-	entry->addr.s6_addr32[3] &= mask->s6_addr32[3];
-	ipv6_addr_copy(&entry->mask, mask);
-	entry->secid = secid;
-	entry->valid = 1;
+	ipv6_addr_copy(&entry->list.addr, addr);
+	entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
+	entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
+	entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
+	entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
+	ipv6_addr_copy(&entry->list.mask, mask);
+	entry->list.valid = 1;
 	INIT_RCU_HEAD(&entry->rcu);
+	entry->secid = secid;
 
 	spin_lock(&netlbl_unlhsh_lock);
-	iter = netlbl_unlhsh_search_addr6(&entry->addr, iface);
-	if (iter != NULL &&
-	    (ipv6_addr_equal(&iter->addr, addr) &&
-	     ipv6_addr_equal(&iter->mask, mask))) {
-		spin_unlock(&netlbl_unlhsh_lock);
-		kfree(entry);
-		return -EEXIST;
-	}
-	/* in order to speed up address searches through the list (the common
-	 * case) we need to keep the list in order based on the size of the
-	 * address mask such that the entry with the widest mask (smallest
-	 * numerical value) appears first in the list */
-	list_for_each_entry_rcu(iter, &iface->addr6_list, list)
-		if (iter->valid &&
-		    ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) {
-			__list_add_rcu(&entry->list,
-				       iter->list.prev,
-				       &iter->list);
-			spin_unlock(&netlbl_unlhsh_lock);
-			return 0;
-		}
-	list_add_tail_rcu(&entry->list, &iface->addr6_list);
+	ret_val = netlbl_af6list_add(&entry->list, &iface->addr6_list);
 	spin_unlock(&netlbl_unlhsh_lock);
+
+	if (ret_val != 0)
+		kfree(entry);
 	return 0;
 }
 #endif /* IPv6 */
@@ -658,10 +501,10 @@ static int netlbl_unlhsh_add(struct net *net,
 		mask4 = (struct in_addr *)mask;
 		ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid);
 		if (audit_buf != NULL)
-			netlbl_unlabel_audit_addr4(audit_buf,
-						   dev_name,
-						   addr4->s_addr,
-						   mask4->s_addr);
+			netlbl_af4list_audit_addr(audit_buf, 1,
+						  dev_name,
+						  addr4->s_addr,
+						  mask4->s_addr);
 		break;
 	}
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -672,9 +515,9 @@ static int netlbl_unlhsh_add(struct net *net,
 		mask6 = (struct in6_addr *)mask;
 		ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid);
 		if (audit_buf != NULL)
-			netlbl_unlabel_audit_addr6(audit_buf,
-						   dev_name,
-						   addr6, mask6);
+			netlbl_af6list_audit_addr(audit_buf, 1,
+						  dev_name,
+						  addr6, mask6);
 		break;
 	}
 #endif /* IPv6 */
@@ -719,35 +562,34 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
 				      const struct in_addr *mask,
 				      struct netlbl_audit *audit_info)
 {
-	int ret_val = -ENOENT;
+	int ret_val = 0;
+	struct netlbl_af4list *list_entry;
 	struct netlbl_unlhsh_addr4 *entry;
-	struct audit_buffer *audit_buf = NULL;
+	struct audit_buffer *audit_buf;
 	struct net_device *dev;
-	char *secctx = NULL;
+	char *secctx;
 	u32 secctx_len;
 
 	spin_lock(&netlbl_unlhsh_lock);
-	entry = netlbl_unlhsh_search_addr4(addr->s_addr, iface);
-	if (entry != NULL &&
-	    entry->addr == addr->s_addr && entry->mask == mask->s_addr) {
-		entry->valid = 0;
-		list_del_rcu(&entry->list);
-		ret_val = 0;
-	}
+	list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr,
+					   &iface->addr4_list);
 	spin_unlock(&netlbl_unlhsh_lock);
+	if (list_entry == NULL)
+		ret_val = -ENOENT;
+	entry = netlbl_unlhsh_addr4_entry(list_entry);
 
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
 					      audit_info);
 	if (audit_buf != NULL) {
 		dev = dev_get_by_index(net, iface->ifindex);
-		netlbl_unlabel_audit_addr4(audit_buf,
-					   (dev != NULL ? dev->name : NULL),
-					   entry->addr, entry->mask);
+		netlbl_af4list_audit_addr(audit_buf, 1,
+					  (dev != NULL ? dev->name : NULL),
+					  addr->s_addr, mask->s_addr);
 		if (dev != NULL)
 			dev_put(dev);
-		if (security_secid_to_secctx(entry->secid,
-					     &secctx,
-					     &secctx_len) == 0) {
+		if (entry && security_secid_to_secctx(entry->secid,
+						      &secctx,
+						      &secctx_len) == 0) {
 			audit_log_format(audit_buf, " sec_obj=%s", secctx);
 			security_release_secctx(secctx, secctx_len);
 		}
@@ -781,36 +623,33 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
 				      const struct in6_addr *mask,
 				      struct netlbl_audit *audit_info)
 {
-	int ret_val = -ENOENT;
+	int ret_val = 0;
+	struct netlbl_af6list *list_entry;
 	struct netlbl_unlhsh_addr6 *entry;
-	struct audit_buffer *audit_buf = NULL;
+	struct audit_buffer *audit_buf;
 	struct net_device *dev;
-	char *secctx = NULL;
+	char *secctx;
 	u32 secctx_len;
 
 	spin_lock(&netlbl_unlhsh_lock);
-	entry = netlbl_unlhsh_search_addr6(addr, iface);
-	if (entry != NULL &&
-	    (ipv6_addr_equal(&entry->addr, addr) &&
-	     ipv6_addr_equal(&entry->mask, mask))) {
-		entry->valid = 0;
-		list_del_rcu(&entry->list);
-		ret_val = 0;
-	}
+	list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list);
 	spin_unlock(&netlbl_unlhsh_lock);
+	if (list_entry == NULL)
+		ret_val = -ENOENT;
+	entry = netlbl_unlhsh_addr6_entry(list_entry);
 
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
 					      audit_info);
 	if (audit_buf != NULL) {
 		dev = dev_get_by_index(net, iface->ifindex);
-		netlbl_unlabel_audit_addr6(audit_buf,
-					   (dev != NULL ? dev->name : NULL),
-					   addr, mask);
+		netlbl_af6list_audit_addr(audit_buf, 1,
+					  (dev != NULL ? dev->name : NULL),
+					  addr, mask);
 		if (dev != NULL)
 			dev_put(dev);
-		if (security_secid_to_secctx(entry->secid,
-					     &secctx,
-					     &secctx_len) == 0) {
+		if (entry && security_secid_to_secctx(entry->secid,
+						      &secctx,
+						      &secctx_len) == 0) {
 			audit_log_format(audit_buf, " sec_obj=%s", secctx);
 			security_release_secctx(secctx, secctx_len);
 		}
@@ -836,16 +675,18 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
  */
 static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
 {
-	struct netlbl_unlhsh_addr4 *iter4;
-	struct netlbl_unlhsh_addr6 *iter6;
+	struct netlbl_af4list *iter4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netlbl_af6list *iter6;
+#endif /* IPv6 */
 
 	spin_lock(&netlbl_unlhsh_lock);
-	list_for_each_entry_rcu(iter4, &iface->addr4_list, list)
-		if (iter4->valid)
-			goto unlhsh_condremove_failure;
-	list_for_each_entry_rcu(iter6, &iface->addr6_list, list)
-		if (iter6->valid)
-			goto unlhsh_condremove_failure;
+	netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list)
+		goto unlhsh_condremove_failure;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list)
+		goto unlhsh_condremove_failure;
+#endif /* IPv6 */
 	iface->valid = 0;
 	if (iface->ifindex > 0)
 		list_del_rcu(&iface->list);
@@ -1349,7 +1190,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
 	if (addr4) {
 		struct in_addr addr_struct;
 
-		addr_struct.s_addr = addr4->addr;
+		addr_struct.s_addr = addr4->list.addr;
 		ret_val = nla_put(cb_arg->skb,
 				  NLBL_UNLABEL_A_IPV4ADDR,
 				  sizeof(struct in_addr),
@@ -1357,7 +1198,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
 		if (ret_val != 0)
 			goto list_cb_failure;
 
-		addr_struct.s_addr = addr4->mask;
+		addr_struct.s_addr = addr4->list.mask;
 		ret_val = nla_put(cb_arg->skb,
 				  NLBL_UNLABEL_A_IPV4MASK,
 				  sizeof(struct in_addr),
@@ -1370,14 +1211,14 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
 		ret_val = nla_put(cb_arg->skb,
 				  NLBL_UNLABEL_A_IPV6ADDR,
 				  sizeof(struct in6_addr),
-				  &addr6->addr);
+				  &addr6->list.addr);
 		if (ret_val != 0)
 			goto list_cb_failure;
 
 		ret_val = nla_put(cb_arg->skb,
 				  NLBL_UNLABEL_A_IPV6MASK,
 				  sizeof(struct in6_addr),
-				  &addr6->mask);
+				  &addr6->list.mask);
 		if (ret_val != 0)
 			goto list_cb_failure;
 
@@ -1425,8 +1266,11 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 	u32 iter_bkt;
 	u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
 	struct netlbl_unlhsh_iface *iface;
-	struct netlbl_unlhsh_addr4 *addr4;
-	struct netlbl_unlhsh_addr6 *addr6;
+	struct list_head *iter_list;
+	struct netlbl_af4list *addr4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netlbl_af6list *addr6;
+#endif
 
 	cb_arg.nl_cb = cb;
 	cb_arg.skb = skb;
@@ -1436,44 +1280,43 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 	for (iter_bkt = skip_bkt;
 	     iter_bkt < rcu_dereference(netlbl_unlhsh)->size;
 	     iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) {
-		list_for_each_entry_rcu(iface,
-			        &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt],
-				list) {
+		iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt];
+		list_for_each_entry_rcu(iface, iter_list, list) {
 			if (!iface->valid ||
 			    iter_chain++ < skip_chain)
 				continue;
-			list_for_each_entry_rcu(addr4,
-						&iface->addr4_list,
-						list) {
-				if (!addr4->valid || iter_addr4++ < skip_addr4)
+			netlbl_af4list_foreach_rcu(addr4,
+						   &iface->addr4_list) {
+				if (iter_addr4++ < skip_addr4)
 					continue;
 				if (netlbl_unlabel_staticlist_gen(
-					             NLBL_UNLABEL_C_STATICLIST,
-						     iface,
-						     addr4,
-						     NULL,
-						     &cb_arg) < 0) {
+					      NLBL_UNLABEL_C_STATICLIST,
+					      iface,
+					      netlbl_unlhsh_addr4_entry(addr4),
+					      NULL,
+					      &cb_arg) < 0) {
 					iter_addr4--;
 					iter_chain--;
 					goto unlabel_staticlist_return;
 				}
 			}
-			list_for_each_entry_rcu(addr6,
-						&iface->addr6_list,
-						list) {
-				if (!addr6->valid || iter_addr6++ < skip_addr6)
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+			netlbl_af6list_foreach_rcu(addr6,
+						   &iface->addr6_list) {
+				if (iter_addr6++ < skip_addr6)
 					continue;
 				if (netlbl_unlabel_staticlist_gen(
-						     NLBL_UNLABEL_C_STATICLIST,
-						     iface,
-						     NULL,
-						     addr6,
-						     &cb_arg) < 0) {
+					      NLBL_UNLABEL_C_STATICLIST,
+					      iface,
+					      NULL,
+					      netlbl_unlhsh_addr6_entry(addr6),
+					      &cb_arg) < 0) {
 					iter_addr6--;
 					iter_chain--;
 					goto unlabel_staticlist_return;
 				}
 			}
+#endif /* IPv6 */
 		}
 	}
 
@@ -1504,9 +1347,12 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 	struct netlbl_unlhsh_iface *iface;
 	u32 skip_addr4 = cb->args[0];
 	u32 skip_addr6 = cb->args[1];
-	u32 iter_addr4 = 0, iter_addr6 = 0;
-	struct netlbl_unlhsh_addr4 *addr4;
-	struct netlbl_unlhsh_addr6 *addr6;
+	u32 iter_addr4 = 0;
+	struct netlbl_af4list *addr4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	u32 iter_addr6 = 0;
+	struct netlbl_af6list *addr6;
+#endif
 
 	cb_arg.nl_cb = cb;
 	cb_arg.skb = skb;
@@ -1517,30 +1363,32 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 	if (iface == NULL || !iface->valid)
 		goto unlabel_staticlistdef_return;
 
-	list_for_each_entry_rcu(addr4, &iface->addr4_list, list) {
-		if (!addr4->valid || iter_addr4++ < skip_addr4)
+	netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) {
+		if (iter_addr4++ < skip_addr4)
 			continue;
 		if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
-					   iface,
-					   addr4,
-					   NULL,
-					   &cb_arg) < 0) {
+					      iface,
+					      netlbl_unlhsh_addr4_entry(addr4),
+					      NULL,
+					      &cb_arg) < 0) {
 			iter_addr4--;
 			goto unlabel_staticlistdef_return;
 		}
 	}
-	list_for_each_entry_rcu(addr6, &iface->addr6_list, list) {
-		if (!addr6->valid || iter_addr6++ < skip_addr6)
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) {
+		if (iter_addr6++ < skip_addr6)
 			continue;
 		if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
-					   iface,
-					   NULL,
-					   addr6,
-					   &cb_arg) < 0) {
+					      iface,
+					      NULL,
+					      netlbl_unlhsh_addr6_entry(addr6),
+					      &cb_arg) < 0) {
 			iter_addr6--;
 			goto unlabel_staticlistdef_return;
 		}
 	}
+#endif /* IPv6 */
 
 unlabel_staticlistdef_return:
 	rcu_read_unlock();
@@ -1718,25 +1566,27 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
 	switch (family) {
 	case PF_INET: {
 		struct iphdr *hdr4;
-		struct netlbl_unlhsh_addr4 *addr4;
+		struct netlbl_af4list *addr4;
 
 		hdr4 = ip_hdr(skb);
-		addr4 = netlbl_unlhsh_search_addr4(hdr4->saddr, iface);
+		addr4 = netlbl_af4list_search(hdr4->saddr,
+					      &iface->addr4_list);
 		if (addr4 == NULL)
 			goto unlabel_getattr_nolabel;
-		secattr->attr.secid = addr4->secid;
+		secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid;
 		break;
 	}
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case PF_INET6: {
 		struct ipv6hdr *hdr6;
-		struct netlbl_unlhsh_addr6 *addr6;
+		struct netlbl_af6list *addr6;
 
 		hdr6 = ipv6_hdr(skb);
-		addr6 = netlbl_unlhsh_search_addr6(&hdr6->saddr, iface);
+		addr6 = netlbl_af6list_search(&hdr6->saddr,
+					      &iface->addr6_list);
 		if (addr6 == NULL)
 			goto unlabel_getattr_nolabel;
-		secattr->attr.secid = addr6->secid;
+		secattr->attr.secid = netlbl_unlhsh_addr6_entry(addr6)->secid;
 		break;
 	}
 #endif /* IPv6 */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b0eacc0007cc..480184a857d2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1,7 +1,7 @@
 /*
  * NETLINK      Kernel-user communication protocol.
  *
- * 		Authors:	Alan Cox <alan@redhat.com>
+ * 		Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>
  * 				Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  *
  *		This program is free software; you can redistribute it and/or
@@ -435,7 +435,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol)
 		return -EPROTONOSUPPORT;
 
 	netlink_lock_table();
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 	if (!nl_table[protocol].registered) {
 		netlink_unlock_table();
 		request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 9e9c6fce11aa..b9d97effebe3 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -67,11 +67,10 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol)
 	}
 
 	pnp = phonet_proto_get(protocol);
-#ifdef CONFIG_KMOD
 	if (pnp == NULL &&
 	    request_module("net-pf-%d-proto-%d", PF_PHONET, protocol) == 0)
 		pnp = phonet_proto_get(protocol);
-#endif
+
 	if (pnp == NULL)
 		return -EPROTONOSUPPORT;
 	if (sock->type != pnp->sock_type) {
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index e5b69556bb5b..21124ec0a73d 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -16,6 +16,7 @@
 #include <linux/workqueue.h>
 #include <linux/init.h>
 #include <linux/rfkill.h>
+#include <linux/sched.h>
 
 #include "rfkill-input.h"
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9974b3f04f05..8f457f1e0acf 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -494,7 +494,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
 
 	a_o = tc_lookup_action_n(act_name);
 	if (a_o == NULL) {
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 		rtnl_unlock();
 		request_module("act_%s", act_name);
 		rtnl_lock();
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8eb79e92e94c..16e7ac9774e5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -227,7 +227,7 @@ replay:
 		err = -ENOENT;
 		tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
 		if (tp_ops == NULL) {
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 			struct nlattr *kind = tca[TCA_KIND];
 			char name[IFNAMSIZ];
 
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 5e6f82e0e6f3..e82519e548d7 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -224,7 +224,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
 
 		if (em->ops == NULL) {
 			err = -ENOENT;
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 			__rtnl_unlock();
 			request_module("ematch-kind-%u", em_hdr->kind);
 			rtnl_lock();
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1122c952aa99..b16ad2972c6b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -764,7 +764,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 	struct qdisc_size_table *stab;
 
 	ops = qdisc_lookup_ops(kind);
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 	if (ops == NULL && kind != NULL) {
 		char name[IFNAMSIZ];
 		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7b5572d6beb5..93cd30ce6501 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -326,6 +326,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 
 static struct netdev_queue noop_netdev_queue = {
 	.qdisc		=	&noop_qdisc,
+	.qdisc_sleeping	=	&noop_qdisc,
 };
 
 struct Qdisc noop_qdisc = {
@@ -352,6 +353,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
 static struct Qdisc noqueue_qdisc;
 static struct netdev_queue noqueue_netdev_queue = {
 	.qdisc		=	&noqueue_qdisc,
+	.qdisc_sleeping	=	&noqueue_qdisc,
 };
 
 static struct Qdisc noqueue_qdisc = {
diff --git a/net/socket.c b/net/socket.c
index 3e8d4e35c08f..2b7a4b5c9b72 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1142,7 +1142,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol,
 
 	sock->type = type;
 
-#if defined(CONFIG_KMOD)
+#ifdef CONFIG_MODULES
 	/* Attempt to load a protocol module if the find failed.
 	 *
 	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 6bfea9ed6869..436bf1b4b76c 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -83,10 +83,8 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
 	if (flavor >= RPC_AUTH_MAXFLAVOR)
 		goto out;
 
-#ifdef CONFIG_KMOD
 	if ((ops = auth_flavors[flavor]) == NULL)
 		request_module("rpc-auth-%u", flavor);
-#endif
 	spin_lock(&rpc_authflavor_lock);
 	ops = auth_flavors[flavor];
 	if (ops == NULL || !try_module_get(ops->owner)) {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 76739e928d0d..4895c341e46d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -174,7 +174,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 	clnt->cl_procinfo = version->procs;
 	clnt->cl_maxproc  = version->nrprocs;
 	clnt->cl_protname = program->name;
-	clnt->cl_prog     = program->number;
+	clnt->cl_prog     = args->prognumber ? : program->number;
 	clnt->cl_vers     = version->number;
 	clnt->cl_stats    = program->stats;
 	clnt->cl_metrics  = rpc_alloc_iostats(clnt);
@@ -213,10 +213,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 	}
 
 	/* save the nodename */
-	clnt->cl_nodelen = strlen(utsname()->nodename);
+	clnt->cl_nodelen = strlen(init_utsname()->nodename);
 	if (clnt->cl_nodelen > UNX_MAXNODENAME)
 		clnt->cl_nodelen = UNX_MAXNODENAME;
-	memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen);
+	memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen);
 	rpc_register_client(clnt);
 	return clnt;
 
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 24db2b4d12d3..41013dd66ac3 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -20,6 +20,7 @@
 #include <linux/in6.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <net/ipv6.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/sched.h>
@@ -176,13 +177,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
 }
 
 static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
-			      u32 version, struct rpc_message *msg,
-			      int *result)
+			      u32 version, struct rpc_message *msg)
 {
 	struct rpc_clnt *rpcb_clnt;
-	int error = 0;
+	int result, error = 0;
 
-	*result = 0;
+	msg->rpc_resp = &result;
 
 	rpcb_clnt = rpcb_create_local(addr, addrlen, version);
 	if (!IS_ERR(rpcb_clnt)) {
@@ -191,12 +191,15 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
 	} else
 		error = PTR_ERR(rpcb_clnt);
 
-	if (error < 0)
+	if (error < 0) {
 		printk(KERN_WARNING "RPC: failed to contact local rpcbind "
 				"server (errno %d).\n", -error);
-	dprintk("RPC:       registration status %d/%d\n", error, *result);
+		return error;
+	}
 
-	return error;
+	if (!result)
+		return -EACCES;
+	return 0;
 }
 
 /**
@@ -205,7 +208,11 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
  * @vers: RPC version number to bind
  * @prot: transport protocol to register
  * @port: port value to register
- * @okay: OUT: result code
+ *
+ * Returns zero if the registration request was dispatched successfully
+ * and the rpcbind daemon returned success.  Otherwise, returns an errno
+ * value that reflects the nature of the error (request could not be
+ * dispatched, timed out, or rpcbind returned an error).
  *
  * RPC services invoke this function to advertise their contact
  * information via the system's rpcbind daemon.  RPC services
@@ -217,15 +224,6 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
  * all registered transports for [program, version] from the local
  * rpcbind database.
  *
- * Returns zero if the registration request was dispatched
- * successfully and a reply was received.  The rpcbind daemon's
- * boolean result code is stored in *okay.
- *
- * Returns an errno value and sets *result to zero if there was
- * some problem that prevented the rpcbind request from being
- * dispatched, or if the rpcbind daemon did not respond within
- * the timeout.
- *
  * This function uses rpcbind protocol version 2 to contact the
  * local rpcbind daemon.
  *
@@ -236,7 +234,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
  * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6
  * addresses).
  */
-int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
+int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
 {
 	struct rpcbind_args map = {
 		.r_prog		= prog,
@@ -246,7 +244,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 	};
 	struct rpc_message msg = {
 		.rpc_argp	= &map,
-		.rpc_resp	= okay,
 	};
 
 	dprintk("RPC:       %sregistering (%u, %u, %d, %u) with local "
@@ -259,7 +256,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 
 	return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
 					sizeof(rpcb_inaddr_loopback),
-					RPCBVERS_2, &msg, okay);
+					RPCBVERS_2, &msg);
 }
 
 /*
@@ -290,7 +287,7 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
 
 	return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
 					sizeof(rpcb_inaddr_loopback),
-					RPCBVERS_4, msg, msg->rpc_resp);
+					RPCBVERS_4, msg);
 }
 
 /*
@@ -304,10 +301,13 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
 	char buf[64];
 
 	/* Construct AF_INET6 universal address */
-	snprintf(buf, sizeof(buf),
-			NIP6_FMT".%u.%u",
-			NIP6(address_to_register->sin6_addr),
-			port >> 8, port & 0xff);
+	if (ipv6_addr_any(&address_to_register->sin6_addr))
+		snprintf(buf, sizeof(buf), "::.%u.%u",
+				port >> 8, port & 0xff);
+	else
+		snprintf(buf, sizeof(buf), NIP6_FMT".%u.%u",
+				NIP6(address_to_register->sin6_addr),
+				port >> 8, port & 0xff);
 	map->r_addr = buf;
 
 	dprintk("RPC:       %sregistering [%u, %u, %s, '%s'] with "
@@ -321,7 +321,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
 
 	return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback,
 					sizeof(rpcb_in6addr_loopback),
-					RPCBVERS_4, msg, msg->rpc_resp);
+					RPCBVERS_4, msg);
 }
 
 /**
@@ -330,7 +330,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
  * @version: RPC version number of service to (un)register
  * @address: address family, IP address, and port to (un)register
  * @netid: netid of transport protocol to (un)register
- * @result: result code from rpcbind RPC call
+ *
+ * Returns zero if the registration request was dispatched successfully
+ * and the rpcbind daemon returned success.  Otherwise, returns an errno
+ * value that reflects the nature of the error (request could not be
+ * dispatched, timed out, or rpcbind returned an error).
  *
  * RPC services invoke this function to advertise their contact
  * information via the system's rpcbind daemon.  RPC services
@@ -342,15 +346,6 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
  * to zero.  Callers pass a netid of "" to unregister all
  * transport netids associated with [program, version, address].
  *
- * Returns zero if the registration request was dispatched
- * successfully and a reply was received.  The rpcbind daemon's
- * result code is stored in *result.
- *
- * Returns an errno value and sets *result to zero if there was
- * some problem that prevented the rpcbind request from being
- * dispatched, or if the rpcbind daemon did not respond within
- * the timeout.
- *
  * This function uses rpcbind protocol version 4 to contact the
  * local rpcbind daemon.  The local rpcbind daemon must support
  * version 4 of the rpcbind protocol in order for these functions
@@ -372,8 +367,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
  * advertises the service on all IPv4 and IPv6 addresses.
  */
 int rpcb_v4_register(const u32 program, const u32 version,
-		     const struct sockaddr *address, const char *netid,
-		     int *result)
+		     const struct sockaddr *address, const char *netid)
 {
 	struct rpcbind_args map = {
 		.r_prog		= program,
@@ -383,11 +377,8 @@ int rpcb_v4_register(const u32 program, const u32 version,
 	};
 	struct rpc_message msg = {
 		.rpc_argp	= &map,
-		.rpc_resp	= result,
 	};
 
-	*result = 0;
-
 	switch (address->sa_family) {
 	case AF_INET:
 		return rpcb_register_netid4((struct sockaddr_in *)address,
@@ -469,6 +460,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
 	return rpc_run_task(&task_setup_data);
 }
 
+/*
+ * In the case where rpc clients have been cloned, we want to make
+ * sure that we use the program number/version etc of the actual
+ * owner of the xprt. To do so, we walk back up the tree of parents
+ * to find whoever created the transport and/or whoever has the
+ * autobind flag set.
+ */
+static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
+{
+	struct rpc_clnt *parent = clnt->cl_parent;
+
+	while (parent != clnt) {
+		if (parent->cl_xprt != clnt->cl_xprt)
+			break;
+		if (clnt->cl_autobind)
+			break;
+		clnt = parent;
+		parent = parent->cl_parent;
+	}
+	return clnt;
+}
+
 /**
  * rpcb_getport_async - obtain the port for a given RPC service on a given host
  * @task: task that is waiting for portmapper request
@@ -478,10 +491,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
  */
 void rpcb_getport_async(struct rpc_task *task)
 {
-	struct rpc_clnt *clnt = task->tk_client;
+	struct rpc_clnt *clnt;
 	struct rpc_procinfo *proc;
 	u32 bind_version;
-	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpc_xprt *xprt;
 	struct rpc_clnt	*rpcb_clnt;
 	static struct rpcbind_args *map;
 	struct rpc_task	*child;
@@ -490,13 +503,13 @@ void rpcb_getport_async(struct rpc_task *task)
 	size_t salen;
 	int status;
 
+	clnt = rpcb_find_transport_owner(task->tk_client);
+	xprt = clnt->cl_xprt;
+
 	dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
 		task->tk_pid, __func__,
 		clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot);
 
-	/* Autobind on cloned rpc clients is discouraged */
-	BUG_ON(clnt->cl_parent != clnt);
-
 	/* Put self on the wait queue to ensure we get notified if
 	 * some other task is already attempting to bind the port */
 	rpc_sleep_on(&xprt->binding, task, NULL);
@@ -558,7 +571,7 @@ void rpcb_getport_async(struct rpc_task *task)
 		status = -ENOMEM;
 		dprintk("RPC: %5u %s: no memory available\n",
 			task->tk_pid, __func__);
-		goto bailout_nofree;
+		goto bailout_release_client;
 	}
 	map->r_prog = clnt->cl_prog;
 	map->r_vers = clnt->cl_vers;
@@ -578,11 +591,13 @@ void rpcb_getport_async(struct rpc_task *task)
 			task->tk_pid, __func__);
 		return;
 	}
-	rpc_put_task(child);
 
-	task->tk_xprt->stat.bind_count++;
+	xprt->stat.bind_count++;
+	rpc_put_task(child);
 	return;
 
+bailout_release_client:
+	rpc_release_client(rpcb_clnt);
 bailout_nofree:
 	rpcb_wake_rpcbind_waiters(xprt, status);
 	task->tk_status = status;
@@ -633,7 +648,7 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
 static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,
 			       struct rpcbind_args *rpcb)
 {
-	dprintk("RPC:       rpcb_encode_mapping(%u, %u, %d, %u)\n",
+	dprintk("RPC:       encoding rpcb request (%u, %u, %d, %u)\n",
 			rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
 	*p++ = htonl(rpcb->r_prog);
 	*p++ = htonl(rpcb->r_vers);
@@ -648,7 +663,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p,
 			       unsigned short *portp)
 {
 	*portp = (unsigned short) ntohl(*p++);
-	dprintk("RPC:       rpcb_decode_getport result %u\n",
+	dprintk("RPC:       rpcb getport result: %u\n",
 			*portp);
 	return 0;
 }
@@ -657,7 +672,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
 			   unsigned int *boolp)
 {
 	*boolp = (unsigned int) ntohl(*p++);
-	dprintk("RPC:       rpcb_decode_set: call %s\n",
+	dprintk("RPC:       rpcb set/unset call %s\n",
 			(*boolp ? "succeeded" : "failed"));
 	return 0;
 }
@@ -665,7 +680,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
 static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p,
 			       struct rpcbind_args *rpcb)
 {
-	dprintk("RPC:       rpcb_encode_getaddr(%u, %u, %s)\n",
+	dprintk("RPC:       encoding rpcb request (%u, %u, %s)\n",
 			rpcb->r_prog, rpcb->r_vers, rpcb->r_addr);
 	*p++ = htonl(rpcb->r_prog);
 	*p++ = htonl(rpcb->r_vers);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 5a32cb7c4bb4..54c98d876847 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -28,6 +28,8 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCDSP
 
+static void svc_unregister(const struct svc_serv *serv);
+
 #define svc_serv_is_pooled(serv)    ((serv)->sv_function)
 
 /*
@@ -357,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
  */
 static struct svc_serv *
 __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
-	   void (*shutdown)(struct svc_serv *serv))
+	   sa_family_t family, void (*shutdown)(struct svc_serv *serv))
 {
 	struct svc_serv	*serv;
 	unsigned int vers;
@@ -366,6 +368,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 
 	if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
 		return NULL;
+	serv->sv_family    = family;
 	serv->sv_name      = prog->pg_name;
 	serv->sv_program   = prog;
 	serv->sv_nrthreads = 1;
@@ -416,30 +419,29 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 		spin_lock_init(&pool->sp_lock);
 	}
 
-
 	/* Remove any stale portmap registrations */
-	svc_register(serv, 0, 0);
+	svc_unregister(serv);
 
 	return serv;
 }
 
 struct svc_serv *
 svc_create(struct svc_program *prog, unsigned int bufsize,
-		void (*shutdown)(struct svc_serv *serv))
+		sa_family_t family, void (*shutdown)(struct svc_serv *serv))
 {
-	return __svc_create(prog, bufsize, /*npools*/1, shutdown);
+	return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
 }
 EXPORT_SYMBOL(svc_create);
 
 struct svc_serv *
 svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
-		void (*shutdown)(struct svc_serv *serv),
+		  sa_family_t family, void (*shutdown)(struct svc_serv *serv),
 		  svc_thread_fn func, struct module *mod)
 {
 	struct svc_serv *serv;
 	unsigned int npools = svc_pool_map_get();
 
-	serv = __svc_create(prog, bufsize, npools, shutdown);
+	serv = __svc_create(prog, bufsize, npools, family, shutdown);
 
 	if (serv != NULL) {
 		serv->sv_function = func;
@@ -486,8 +488,7 @@ svc_destroy(struct svc_serv *serv)
 	if (svc_serv_is_pooled(serv))
 		svc_pool_map_put();
 
-	/* Unregister service with the portmapper */
-	svc_register(serv, 0, 0);
+	svc_unregister(serv);
 	kfree(serv->sv_pools);
 	kfree(serv);
 }
@@ -718,55 +719,245 @@ svc_exit_thread(struct svc_rqst *rqstp)
 }
 EXPORT_SYMBOL(svc_exit_thread);
 
+#ifdef CONFIG_SUNRPC_REGISTER_V4
+
 /*
- * Register an RPC service with the local portmapper.
- * To unregister a service, call this routine with
- * proto and port == 0.
+ * Register an "inet" protocol family netid with the local
+ * rpcbind daemon via an rpcbind v4 SET request.
+ *
+ * No netconfig infrastructure is available in the kernel, so
+ * we map IP_ protocol numbers to netids by hand.
+ *
+ * Returns zero on success; a negative errno value is returned
+ * if any error occurs.
  */
-int
-svc_register(struct svc_serv *serv, int proto, unsigned short port)
+static int __svc_rpcb_register4(const u32 program, const u32 version,
+				const unsigned short protocol,
+				const unsigned short port)
+{
+	struct sockaddr_in sin = {
+		.sin_family		= AF_INET,
+		.sin_addr.s_addr	= htonl(INADDR_ANY),
+		.sin_port		= htons(port),
+	};
+	char *netid;
+
+	switch (protocol) {
+	case IPPROTO_UDP:
+		netid = RPCBIND_NETID_UDP;
+		break;
+	case IPPROTO_TCP:
+		netid = RPCBIND_NETID_TCP;
+		break;
+	default:
+		return -EPROTONOSUPPORT;
+	}
+
+	return rpcb_v4_register(program, version,
+				(struct sockaddr *)&sin, netid);
+}
+
+/*
+ * Register an "inet6" protocol family netid with the local
+ * rpcbind daemon via an rpcbind v4 SET request.
+ *
+ * No netconfig infrastructure is available in the kernel, so
+ * we map IP_ protocol numbers to netids by hand.
+ *
+ * Returns zero on success; a negative errno value is returned
+ * if any error occurs.
+ */
+static int __svc_rpcb_register6(const u32 program, const u32 version,
+				const unsigned short protocol,
+				const unsigned short port)
+{
+	struct sockaddr_in6 sin6 = {
+		.sin6_family		= AF_INET6,
+		.sin6_addr		= IN6ADDR_ANY_INIT,
+		.sin6_port		= htons(port),
+	};
+	char *netid;
+
+	switch (protocol) {
+	case IPPROTO_UDP:
+		netid = RPCBIND_NETID_UDP6;
+		break;
+	case IPPROTO_TCP:
+		netid = RPCBIND_NETID_TCP6;
+		break;
+	default:
+		return -EPROTONOSUPPORT;
+	}
+
+	return rpcb_v4_register(program, version,
+				(struct sockaddr *)&sin6, netid);
+}
+
+/*
+ * Register a kernel RPC service via rpcbind version 4.
+ *
+ * Returns zero on success; a negative errno value is returned
+ * if any error occurs.
+ */
+static int __svc_register(const u32 program, const u32 version,
+			  const sa_family_t family,
+			  const unsigned short protocol,
+			  const unsigned short port)
+{
+	int error;
+
+	switch (family) {
+	case AF_INET:
+		return __svc_rpcb_register4(program, version,
+						protocol, port);
+	case AF_INET6:
+		error = __svc_rpcb_register6(program, version,
+						protocol, port);
+		if (error < 0)
+			return error;
+
+		/*
+		 * Work around bug in some versions of Linux rpcbind
+		 * which don't allow registration of both inet and
+		 * inet6 netids.
+		 *
+		 * Error return ignored for now.
+		 */
+		__svc_rpcb_register4(program, version,
+						protocol, port);
+		return 0;
+	}
+
+	return -EAFNOSUPPORT;
+}
+
+#else	/* CONFIG_SUNRPC_REGISTER_V4 */
+
+/*
+ * Register a kernel RPC service via rpcbind version 2.
+ *
+ * Returns zero on success; a negative errno value is returned
+ * if any error occurs.
+ */
+static int __svc_register(const u32 program, const u32 version,
+			  sa_family_t family,
+			  const unsigned short protocol,
+			  const unsigned short port)
+{
+	if (family != AF_INET)
+		return -EAFNOSUPPORT;
+
+	return rpcb_register(program, version, protocol, port);
+}
+
+#endif /* CONFIG_SUNRPC_REGISTER_V4 */
+
+/**
+ * svc_register - register an RPC service with the local portmapper
+ * @serv: svc_serv struct for the service to register
+ * @proto: transport protocol number to advertise
+ * @port: port to advertise
+ *
+ * Service is registered for any address in serv's address family
+ */
+int svc_register(const struct svc_serv *serv, const unsigned short proto,
+		 const unsigned short port)
 {
 	struct svc_program	*progp;
-	unsigned long		flags;
 	unsigned int		i;
-	int			error = 0, dummy;
+	int			error = 0;
 
-	if (!port)
-		clear_thread_flag(TIF_SIGPENDING);
+	BUG_ON(proto == 0 && port == 0);
 
 	for (progp = serv->sv_program; progp; progp = progp->pg_next) {
 		for (i = 0; i < progp->pg_nvers; i++) {
 			if (progp->pg_vers[i] == NULL)
 				continue;
 
-			dprintk("svc: svc_register(%s, %s, %d, %d)%s\n",
+			dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
 					progp->pg_name,
+					i,
 					proto == IPPROTO_UDP?  "udp" : "tcp",
 					port,
-					i,
+					serv->sv_family,
 					progp->pg_vers[i]->vs_hidden?
 						" (but not telling portmap)" : "");
 
 			if (progp->pg_vers[i]->vs_hidden)
 				continue;
 
-			error = rpcb_register(progp->pg_prog, i, proto, port, &dummy);
+			error = __svc_register(progp->pg_prog, i,
+						serv->sv_family, proto, port);
 			if (error < 0)
 				break;
-			if (port && !dummy) {
-				error = -EACCES;
-				break;
-			}
 		}
 	}
 
-	if (!port) {
-		spin_lock_irqsave(&current->sighand->siglock, flags);
-		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	return error;
+}
+
+#ifdef CONFIG_SUNRPC_REGISTER_V4
+
+static void __svc_unregister(const u32 program, const u32 version,
+			     const char *progname)
+{
+	struct sockaddr_in6 sin6 = {
+		.sin6_family		= AF_INET6,
+		.sin6_addr		= IN6ADDR_ANY_INIT,
+		.sin6_port		= 0,
+	};
+	int error;
+
+	error = rpcb_v4_register(program, version,
+				(struct sockaddr *)&sin6, "");
+	dprintk("svc: %s(%sv%u), error %d\n",
+			__func__, progname, version, error);
+}
+
+#else	/* CONFIG_SUNRPC_REGISTER_V4 */
+
+static void __svc_unregister(const u32 program, const u32 version,
+			     const char *progname)
+{
+	int error;
+
+	error = rpcb_register(program, version, 0, 0);
+	dprintk("svc: %s(%sv%u), error %d\n",
+			__func__, progname, version, error);
+}
+
+#endif	/* CONFIG_SUNRPC_REGISTER_V4 */
+
+/*
+ * All netids, bind addresses and ports registered for [program, version]
+ * are removed from the local rpcbind database (if the service is not
+ * hidden) to make way for a new instance of the service.
+ *
+ * The result of unregistration is reported via dprintk for those who want
+ * verification of the result, but is otherwise not important.
+ */
+static void svc_unregister(const struct svc_serv *serv)
+{
+	struct svc_program *progp;
+	unsigned long flags;
+	unsigned int i;
+
+	clear_thread_flag(TIF_SIGPENDING);
+
+	for (progp = serv->sv_program; progp; progp = progp->pg_next) {
+		for (i = 0; i < progp->pg_nvers; i++) {
+			if (progp->pg_vers[i] == NULL)
+				continue;
+			if (progp->pg_vers[i]->vs_hidden)
+				continue;
+
+			__svc_unregister(progp->pg_prog, i, progp->pg_name);
+		}
 	}
 
-	return error;
+	spin_lock_irqsave(&current->sighand->siglock, flags);
+	recalc_sigpending();
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 }
 
 /*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e46c825f4954..bf5b5cdafebf 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -159,15 +159,44 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
-int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
-		    int flags)
+static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
+					 struct svc_serv *serv,
+					 unsigned short port, int flags)
 {
-	struct svc_xprt_class *xcl;
 	struct sockaddr_in sin = {
 		.sin_family		= AF_INET,
 		.sin_addr.s_addr	= htonl(INADDR_ANY),
 		.sin_port		= htons(port),
 	};
+	struct sockaddr_in6 sin6 = {
+		.sin6_family		= AF_INET6,
+		.sin6_addr		= IN6ADDR_ANY_INIT,
+		.sin6_port		= htons(port),
+	};
+	struct sockaddr *sap;
+	size_t len;
+
+	switch (serv->sv_family) {
+	case AF_INET:
+		sap = (struct sockaddr *)&sin;
+		len = sizeof(sin);
+		break;
+	case AF_INET6:
+		sap = (struct sockaddr *)&sin6;
+		len = sizeof(sin6);
+		break;
+	default:
+		return ERR_PTR(-EAFNOSUPPORT);
+	}
+
+	return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
+}
+
+int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
+		    int flags)
+{
+	struct svc_xprt_class *xcl;
+
 	dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
 	spin_lock(&svc_xprt_class_lock);
 	list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
@@ -180,9 +209,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
 			goto err;
 
 		spin_unlock(&svc_xprt_class_lock);
-		newxprt = xcl->xcl_ops->
-			xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin),
-				   flags);
+		newxprt = __svc_xpo_create(xcl, serv, port, flags);
 		if (IS_ERR(newxprt)) {
 			module_put(xcl->xcl_owner);
 			return PTR_ERR(newxprt);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 3e65719f1ef6..95293f549e9c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1114,6 +1114,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	struct svc_sock	*svsk;
 	struct sock	*inet;
 	int		pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
+	int		val;
 
 	dprintk("svc: svc_setup_socket %p\n", sock);
 	if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1146,6 +1147,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	else
 		svc_tcp_init(svsk, serv);
 
+	/*
+	 * We start one listener per sv_serv.  We want AF_INET
+	 * requests to be automatically shunted to our AF_INET6
+	 * listener using a mapped IPv4 address.  Make sure
+	 * no-one starts an equivalent IPv4 listener, which
+	 * would steal our incoming connections.
+	 */
+	val = 0;
+	if (serv->sv_family == AF_INET6)
+		kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
+					(char *)&val, sizeof(val));
+
 	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
 				svsk, svsk->sk_sk);
 
@@ -1154,8 +1167,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 
 int svc_addsock(struct svc_serv *serv,
 		int fd,
-		char *name_return,
-		int *proto)
+		char *name_return)
 {
 	int err = 0;
 	struct socket *so = sockfd_lookup(fd, &err);
@@ -1190,7 +1202,6 @@ int svc_addsock(struct svc_serv *serv,
 		sockfd_put(so);
 		return err;
 	}
-	if (proto) *proto = so->sk->sk_protocol;
 	return one_sock_name(name_return, svsk);
 }
 EXPORT_SYMBOL_GPL(svc_addsock);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 99a52aabe332..29e401bb612e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport)
 			goto out;
 	}
 
-	result = -EINVAL;
-	if (try_module_get(THIS_MODULE)) {
-		list_add_tail(&transport->list, &xprt_list);
-		printk(KERN_INFO "RPC: Registered %s transport module.\n",
-			transport->name);
-		result = 0;
-	}
+	list_add_tail(&transport->list, &xprt_list);
+	printk(KERN_INFO "RPC: Registered %s transport module.\n",
+	       transport->name);
+	result = 0;
 
 out:
 	spin_unlock(&xprt_list_lock);
@@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport)
 				"RPC: Unregistered %s transport module.\n",
 				transport->name);
 			list_del_init(&transport->list);
-			module_put(THIS_MODULE);
 			goto out;
 		}
 	}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 5c1954d28d09..14106d26bb95 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
 	}
 
 	if (xdrbuf->tail[0].iov_len) {
+		/* the rpcrdma protocol allows us to omit any trailing
+		 * xdr pad bytes, saving the server an RDMA operation. */
+		if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
+			return n;
 		if (n == nsegs)
 			return 0;
 		seg[n].mr_page = NULL;
@@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	if (hdrlen == 0)
 		return -1;
 
-	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd padlen %zd\n"
-		"                   headerp 0x%p base 0x%p lkey 0x%x\n",
+	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd padlen %zd"
+		" headerp 0x%p base 0x%p lkey 0x%x\n",
 		__func__, transfertypes[wtype], hdrlen, rpclen, padlen,
 		headerp, base, req->rl_iov.lkey);
 
@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
  * Scatter inline received data back into provided iov's.
  */
 static void
-rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
+rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
 {
 	int i, npages, curlen, olen;
 	char *destp;
@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
 	} else
 		rqst->rq_rcv_buf.tail[0].iov_len = 0;
 
+	if (pad) {
+		/* implicit padding on terminal chunk */
+		unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base;
+		while (pad--)
+			p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0;
+	}
+
 	if (copy_len)
 		dprintk("RPC:       %s: %d bytes in"
 			" %d extra segments (%d lost)\n",
@@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
 	struct rpc_xprt *xprt = ep->rep_xprt;
 
 	spin_lock_bh(&xprt->transport_lock);
+	if (++xprt->connect_cookie == 0)	/* maintain a reserved value */
+		++xprt->connect_cookie;
 	if (ep->rep_connected > 0) {
 		if (!xprt_test_and_set_connected(xprt))
 			xprt_wake_pending_tasks(xprt, 0);
 	} else {
 		if (xprt_test_and_clear_connected(xprt))
-			xprt_wake_pending_tasks(xprt, ep->rep_connected);
+			xprt_wake_pending_tasks(xprt, -ENOTCONN);
 	}
 	spin_unlock_bh(&xprt->transport_lock);
 }
@@ -792,14 +805,20 @@ repost:
 			    ((unsigned char *)iptr - (unsigned char *)headerp);
 			status = rep->rr_len + rdmalen;
 			r_xprt->rx_stats.total_rdma_reply += rdmalen;
+			/* special case - last chunk may omit padding */
+			if (rdmalen &= 3) {
+				rdmalen = 4 - rdmalen;
+				status += rdmalen;
+			}
 		} else {
 			/* else ordinary inline */
+			rdmalen = 0;
 			iptr = (__be32 *)((unsigned char *)headerp + 28);
 			rep->rr_len -= 28; /*sizeof *headerp;*/
 			status = rep->rr_len;
 		}
 		/* Fix up the rpc results for upper layer */
-		rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len);
+		rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
 		break;
 
 	case htonl(RDMA_NOMSG):
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 74de31a06616..a4756576d687 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
  *
  * Assumptions:
  * - chunk[0]->position points to pages[0] at an offset of 0
- * - pages[] is not physically or virtually contigous and consists of
+ * - pages[] is not physically or virtually contiguous and consists of
  *   PAGE_SIZE elements.
  *
  * Output:
@@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
  *   chunk in the read list
  *
  */
-static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
+static int map_read_chunks(struct svcxprt_rdma *xprt,
 			   struct svc_rqst *rqstp,
 			   struct svc_rdma_op_ctxt *head,
 			   struct rpcrdma_msg *rmsgp,
@@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 	return sge_no;
 }
 
-static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
-			      struct svc_rdma_op_ctxt *ctxt,
-			      struct kvec *vec,
-			      u64 *sgl_offset,
-			      int count)
+/* Map a read-chunk-list to an XDR and fast register the page-list.
+ *
+ * Assumptions:
+ * - chunk[0]	position points to pages[0] at an offset of 0
+ * - pages[]	will be made physically contiguous by creating a one-off memory
+ *		region using the fastreg verb.
+ * - byte_count is # of bytes in read-chunk-list
+ * - ch_count	is # of chunks in read-chunk-list
+ *
+ * Output:
+ * - sge array pointing into pages[] array.
+ * - chunk_sge array specifying sge index and count for each
+ *   chunk in the read list
+ */
+static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
+				struct svc_rqst *rqstp,
+				struct svc_rdma_op_ctxt *head,
+				struct rpcrdma_msg *rmsgp,
+				struct svc_rdma_req_map *rpl_map,
+				struct svc_rdma_req_map *chl_map,
+				int ch_count,
+				int byte_count)
+{
+	int page_no;
+	int ch_no;
+	u32 offset;
+	struct rpcrdma_read_chunk *ch;
+	struct svc_rdma_fastreg_mr *frmr;
+	int ret = 0;
+
+	frmr = svc_rdma_get_frmr(xprt);
+	if (IS_ERR(frmr))
+		return -ENOMEM;
+
+	head->frmr = frmr;
+	head->arg.head[0] = rqstp->rq_arg.head[0];
+	head->arg.tail[0] = rqstp->rq_arg.tail[0];
+	head->arg.pages = &head->pages[head->count];
+	head->hdr_count = head->count; /* save count of hdr pages */
+	head->arg.page_base = 0;
+	head->arg.page_len = byte_count;
+	head->arg.len = rqstp->rq_arg.len + byte_count;
+	head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
+
+	/* Fast register the page list */
+	frmr->kva = page_address(rqstp->rq_arg.pages[0]);
+	frmr->direction = DMA_FROM_DEVICE;
+	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
+	frmr->map_len = byte_count;
+	frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
+	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
+		frmr->page_list->page_list[page_no] =
+			ib_dma_map_single(xprt->sc_cm_id->device,
+					  page_address(rqstp->rq_arg.pages[page_no]),
+					  PAGE_SIZE, DMA_TO_DEVICE);
+		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+					 frmr->page_list->page_list[page_no]))
+			goto fatal_err;
+		atomic_inc(&xprt->sc_dma_used);
+		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
+	}
+	head->count += page_no;
+
+	/* rq_respages points one past arg pages */
+	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
+
+	/* Create the reply and chunk maps */
+	offset = 0;
+	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+	for (ch_no = 0; ch_no < ch_count; ch_no++) {
+		rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
+		rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length;
+		chl_map->ch[ch_no].count = 1;
+		chl_map->ch[ch_no].start = ch_no;
+		offset += ch->rc_target.rs_length;
+		ch++;
+	}
+
+	ret = svc_rdma_fastreg(xprt, frmr);
+	if (ret)
+		goto fatal_err;
+
+	return ch_no;
+
+ fatal_err:
+	printk("svcrdma: error fast registering xdr for xprt %p", xprt);
+	svc_rdma_put_frmr(xprt, frmr);
+	return -EIO;
+}
+
+static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
+			     struct svc_rdma_op_ctxt *ctxt,
+			     struct svc_rdma_fastreg_mr *frmr,
+			     struct kvec *vec,
+			     u64 *sgl_offset,
+			     int count)
 {
 	int i;
 
 	ctxt->count = count;
 	ctxt->direction = DMA_FROM_DEVICE;
 	for (i = 0; i < count; i++) {
-		atomic_inc(&xprt->sc_dma_used);
-		ctxt->sge[i].addr =
-			ib_dma_map_single(xprt->sc_cm_id->device,
-					  vec[i].iov_base, vec[i].iov_len,
-					  DMA_FROM_DEVICE);
+		ctxt->sge[i].length = 0; /* in case map fails */
+		if (!frmr) {
+			ctxt->sge[i].addr =
+				ib_dma_map_single(xprt->sc_cm_id->device,
+						  vec[i].iov_base,
+						  vec[i].iov_len,
+						  DMA_FROM_DEVICE);
+			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+						 ctxt->sge[i].addr))
+				return -EINVAL;
+			ctxt->sge[i].lkey = xprt->sc_dma_lkey;
+			atomic_inc(&xprt->sc_dma_used);
+		} else {
+			ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
+			ctxt->sge[i].lkey = frmr->mr->lkey;
+		}
 		ctxt->sge[i].length = vec[i].iov_len;
-		ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
 		*sgl_offset = *sgl_offset + vec[i].iov_len;
 	}
+	return 0;
 }
 
 static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
@@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 			 struct svc_rdma_op_ctxt *hdr_ctxt)
 {
 	struct ib_send_wr read_wr;
+	struct ib_send_wr inv_wr;
 	int err = 0;
 	int ch_no;
 	int ch_count;
@@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 	svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
 	if (ch_count > RPCSVC_MAXPAGES)
 		return -EINVAL;
-	sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
-				    rpl_map, chl_map,
-				    ch_count, byte_count);
+
+	if (!xprt->sc_frmr_pg_list_len)
+		sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
+					    rpl_map, chl_map, ch_count,
+					    byte_count);
+	else
+		sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
+						 rpl_map, chl_map, ch_count,
+						 byte_count);
+	if (sge_count < 0) {
+		err = -EIO;
+		goto out;
+	}
+
 	sgl_offset = 0;
 	ch_no = 0;
 
@@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 next_sge:
 		ctxt = svc_rdma_get_context(xprt);
 		ctxt->direction = DMA_FROM_DEVICE;
+		ctxt->frmr = hdr_ctxt->frmr;
+		ctxt->read_hdr = NULL;
 		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
 
 		/* Prepare READ WR */
 		memset(&read_wr, 0, sizeof read_wr);
-		ctxt->wr_op = IB_WR_RDMA_READ;
 		read_wr.wr_id = (unsigned long)ctxt;
 		read_wr.opcode = IB_WR_RDMA_READ;
+		ctxt->wr_op = read_wr.opcode;
 		read_wr.send_flags = IB_SEND_SIGNALED;
 		read_wr.wr.rdma.rkey = ch->rc_target.rs_handle;
 		read_wr.wr.rdma.remote_addr =
@@ -327,10 +444,15 @@ next_sge:
 		read_wr.sg_list = ctxt->sge;
 		read_wr.num_sge =
 			rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
-		rdma_set_ctxt_sge(xprt, ctxt,
-				  &rpl_map->sge[chl_map->ch[ch_no].start],
-				  &sgl_offset,
-				  read_wr.num_sge);
+		err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
+					&rpl_map->sge[chl_map->ch[ch_no].start],
+					&sgl_offset,
+					read_wr.num_sge);
+		if (err) {
+			svc_rdma_unmap_dma(ctxt);
+			svc_rdma_put_context(ctxt, 0);
+			goto out;
+		}
 		if (((ch+1)->rc_discrim == 0) &&
 		    (read_wr.num_sge == chl_map->ch[ch_no].count)) {
 			/*
@@ -339,6 +461,29 @@ next_sge:
 			 * the client and the RPC needs to be enqueued.
 			 */
 			set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+			if (hdr_ctxt->frmr) {
+				set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+				/*
+				 * Invalidate the local MR used to map the data
+				 * sink.
+				 */
+				if (xprt->sc_dev_caps &
+				    SVCRDMA_DEVCAP_READ_W_INV) {
+					read_wr.opcode =
+						IB_WR_RDMA_READ_WITH_INV;
+					ctxt->wr_op = read_wr.opcode;
+					read_wr.ex.invalidate_rkey =
+						ctxt->frmr->mr->lkey;
+				} else {
+					/* Prepare INVALIDATE WR */
+					memset(&inv_wr, 0, sizeof inv_wr);
+					inv_wr.opcode = IB_WR_LOCAL_INV;
+					inv_wr.send_flags = IB_SEND_SIGNALED;
+					inv_wr.ex.invalidate_rkey =
+						hdr_ctxt->frmr->mr->lkey;
+					read_wr.next = &inv_wr;
+				}
+			}
 			ctxt->read_hdr = hdr_ctxt;
 		}
 		/* Post the read */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 84d328329d98..9a7a8e7ae038 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -69,9 +69,127 @@
  * array is only concerned with the reply we are assured that we have
  * on extra page for the RPCRMDA header.
  */
-static void xdr_to_sge(struct svcxprt_rdma *xprt,
-		       struct xdr_buf *xdr,
-		       struct svc_rdma_req_map *vec)
+int fast_reg_xdr(struct svcxprt_rdma *xprt,
+		 struct xdr_buf *xdr,
+		 struct svc_rdma_req_map *vec)
+{
+	int sge_no;
+	u32 sge_bytes;
+	u32 page_bytes;
+	u32 page_off;
+	int page_no = 0;
+	u8 *frva;
+	struct svc_rdma_fastreg_mr *frmr;
+
+	frmr = svc_rdma_get_frmr(xprt);
+	if (IS_ERR(frmr))
+		return -ENOMEM;
+	vec->frmr = frmr;
+
+	/* Skip the RPCRDMA header */
+	sge_no = 1;
+
+	/* Map the head. */
+	frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
+	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
+	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
+	vec->count = 2;
+	sge_no++;
+
+	/* Build the FRMR */
+	frmr->kva = frva;
+	frmr->direction = DMA_TO_DEVICE;
+	frmr->access_flags = 0;
+	frmr->map_len = PAGE_SIZE;
+	frmr->page_list_len = 1;
+	frmr->page_list->page_list[page_no] =
+		ib_dma_map_single(xprt->sc_cm_id->device,
+				  (void *)xdr->head[0].iov_base,
+				  PAGE_SIZE, DMA_TO_DEVICE);
+	if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+				 frmr->page_list->page_list[page_no]))
+		goto fatal_err;
+	atomic_inc(&xprt->sc_dma_used);
+
+	page_off = xdr->page_base;
+	page_bytes = xdr->page_len + page_off;
+	if (!page_bytes)
+		goto encode_tail;
+
+	/* Map the pages */
+	vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
+	vec->sge[sge_no].iov_len = page_bytes;
+	sge_no++;
+	while (page_bytes) {
+		struct page *page;
+
+		page = xdr->pages[page_no++];
+		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
+		page_bytes -= sge_bytes;
+
+		frmr->page_list->page_list[page_no] =
+			ib_dma_map_page(xprt->sc_cm_id->device, page, 0,
+					  PAGE_SIZE, DMA_TO_DEVICE);
+		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+					 frmr->page_list->page_list[page_no]))
+			goto fatal_err;
+
+		atomic_inc(&xprt->sc_dma_used);
+		page_off = 0; /* reset for next time through loop */
+		frmr->map_len += PAGE_SIZE;
+		frmr->page_list_len++;
+	}
+	vec->count++;
+
+ encode_tail:
+	/* Map tail */
+	if (0 == xdr->tail[0].iov_len)
+		goto done;
+
+	vec->count++;
+	vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
+
+	if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
+	    ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
+		/*
+		 * If head and tail use the same page, we don't need
+		 * to map it again.
+		 */
+		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
+	} else {
+		void *va;
+
+		/* Map another page for the tail */
+		page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
+		va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
+		vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
+
+		frmr->page_list->page_list[page_no] =
+			ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
+					  DMA_TO_DEVICE);
+		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+					 frmr->page_list->page_list[page_no]))
+			goto fatal_err;
+		atomic_inc(&xprt->sc_dma_used);
+		frmr->map_len += PAGE_SIZE;
+		frmr->page_list_len++;
+	}
+
+ done:
+	if (svc_rdma_fastreg(xprt, frmr))
+		goto fatal_err;
+
+	return 0;
+
+ fatal_err:
+	printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
+	svc_rdma_put_frmr(xprt, frmr);
+	return -EIO;
+}
+
+static int map_xdr(struct svcxprt_rdma *xprt,
+		   struct xdr_buf *xdr,
+		   struct svc_rdma_req_map *vec)
 {
 	int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
 	int sge_no;
@@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
 	BUG_ON(xdr->len !=
 	       (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
 
+	if (xprt->sc_frmr_pg_list_len)
+		return fast_reg_xdr(xprt, xdr, vec);
+
 	/* Skip the first sge, this is for the RPCRDMA header */
 	sge_no = 1;
 
@@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
 
 	BUG_ON(sge_no > sge_max);
 	vec->count = sge_no;
+	return 0;
 }
 
 /* Assumptions:
+ * - We are using FRMR
+ *     - or -
  * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
  */
 static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
@@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 	sge_no = 0;
 
 	/* Copy the remaining SGE */
-	while (bc != 0 && xdr_sge_no < vec->count) {
-		sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
-		sge_bytes = min((size_t)bc,
-				(size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
+	while (bc != 0) {
+		sge_bytes = min_t(size_t,
+			  bc, vec->sge[xdr_sge_no].iov_len-sge_off);
 		sge[sge_no].length = sge_bytes;
-		atomic_inc(&xprt->sc_dma_used);
-		sge[sge_no].addr =
-			ib_dma_map_single(xprt->sc_cm_id->device,
-					  (void *)
-					  vec->sge[xdr_sge_no].iov_base + sge_off,
-					  sge_bytes, DMA_TO_DEVICE);
-		if (dma_mapping_error(xprt->sc_cm_id->device->dma_device,
-					sge[sge_no].addr))
-			goto err;
+		if (!vec->frmr) {
+			sge[sge_no].addr =
+				ib_dma_map_single(xprt->sc_cm_id->device,
+						  (void *)
+						  vec->sge[xdr_sge_no].iov_base + sge_off,
+						  sge_bytes, DMA_TO_DEVICE);
+			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+						 sge[sge_no].addr))
+				goto err;
+			atomic_inc(&xprt->sc_dma_used);
+			sge[sge_no].lkey = xprt->sc_dma_lkey;
+		} else {
+			sge[sge_no].addr = (unsigned long)
+				vec->sge[xdr_sge_no].iov_base + sge_off;
+			sge[sge_no].lkey = vec->frmr->mr->lkey;
+		}
+		ctxt->count++;
+		ctxt->frmr = vec->frmr;
 		sge_off = 0;
 		sge_no++;
-		ctxt->count++;
 		xdr_sge_no++;
+		BUG_ON(xdr_sge_no > vec->count);
 		bc -= sge_bytes;
 	}
 
-	BUG_ON(bc != 0);
-	BUG_ON(xdr_sge_no > vec->count);
-
 	/* Prepare WRITE WR */
 	memset(&write_wr, 0, sizeof write_wr);
 	ctxt->wr_op = IB_WR_RDMA_WRITE;
@@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
 	res_ary = (struct rpcrdma_write_array *)
 		&rdma_resp->rm_body.rm_chunks[1];
 
-	max_write = xprt->sc_max_sge * PAGE_SIZE;
+	if (vec->frmr)
+		max_write = vec->frmr->map_len;
+	else
+		max_write = xprt->sc_max_sge * PAGE_SIZE;
 
 	/* Write chunks start at the pagelist */
 	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
@@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
 	res_ary = (struct rpcrdma_write_array *)
 		&rdma_resp->rm_body.rm_chunks[2];
 
-	max_write = xprt->sc_max_sge * PAGE_SIZE;
+	if (vec->frmr)
+		max_write = vec->frmr->map_len;
+	else
+		max_write = xprt->sc_max_sge * PAGE_SIZE;
 
 	/* xdr offset starts at RPC message */
 	for (xdr_off = 0, chunk_no = 0;
@@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
 		ch = &arg_ary->wc_array[chunk_no].wc_target;
 		write_len = min(xfer_len, ch->rs_length);
 
-
 		/* Prepare the reply chunk given the length actually
 		 * written */
 		rs_offset = get_unaligned(&(ch->rs_offset));
@@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		      int byte_count)
 {
 	struct ib_send_wr send_wr;
+	struct ib_send_wr inv_wr;
 	int sge_no;
 	int sge_bytes;
 	int page_no;
@@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	/* Prepare the context */
 	ctxt->pages[0] = page;
 	ctxt->count = 1;
+	ctxt->frmr = vec->frmr;
+	if (vec->frmr)
+		set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+	else
+		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
 
 	/* Prepare the SGE for the RPCRDMA Header */
-	atomic_inc(&rdma->sc_dma_used);
 	ctxt->sge[0].addr =
 		ib_dma_map_page(rdma->sc_cm_id->device,
 				page, 0, PAGE_SIZE, DMA_TO_DEVICE);
+	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
+		goto err;
+	atomic_inc(&rdma->sc_dma_used);
+
 	ctxt->direction = DMA_TO_DEVICE;
+
 	ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
-	ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
+	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
 
 	/* Determine how many of our SGE are to be transmitted */
 	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
 		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
 		byte_count -= sge_bytes;
-		atomic_inc(&rdma->sc_dma_used);
-		ctxt->sge[sge_no].addr =
-			ib_dma_map_single(rdma->sc_cm_id->device,
-					  vec->sge[sge_no].iov_base,
-					  sge_bytes, DMA_TO_DEVICE);
+		if (!vec->frmr) {
+			ctxt->sge[sge_no].addr =
+				ib_dma_map_single(rdma->sc_cm_id->device,
+						  vec->sge[sge_no].iov_base,
+						  sge_bytes, DMA_TO_DEVICE);
+			if (ib_dma_mapping_error(rdma->sc_cm_id->device,
+						 ctxt->sge[sge_no].addr))
+				goto err;
+			atomic_inc(&rdma->sc_dma_used);
+			ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+		} else {
+			ctxt->sge[sge_no].addr = (unsigned long)
+				vec->sge[sge_no].iov_base;
+			ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
+		}
 		ctxt->sge[sge_no].length = sge_bytes;
-		ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
 	}
 	BUG_ON(byte_count != 0);
 
@@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
 		ctxt->count++;
 		rqstp->rq_respages[page_no] = NULL;
-		/* If there are more pages than SGE, terminate SGE list */
+		/*
+		 * If there are more pages than SGE, terminate SGE
+		 * list so that svc_rdma_unmap_dma doesn't attempt to
+		 * unmap garbage.
+		 */
 		if (page_no+1 >= sge_no)
 			ctxt->sge[page_no+1].length = 0;
 	}
 	BUG_ON(sge_no > rdma->sc_max_sge);
+	BUG_ON(sge_no > ctxt->count);
 	memset(&send_wr, 0, sizeof send_wr);
 	ctxt->wr_op = IB_WR_SEND;
 	send_wr.wr_id = (unsigned long)ctxt;
@@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	send_wr.num_sge = sge_no;
 	send_wr.opcode = IB_WR_SEND;
 	send_wr.send_flags =  IB_SEND_SIGNALED;
+	if (vec->frmr) {
+		/* Prepare INVALIDATE WR */
+		memset(&inv_wr, 0, sizeof inv_wr);
+		inv_wr.opcode = IB_WR_LOCAL_INV;
+		inv_wr.send_flags = IB_SEND_SIGNALED;
+		inv_wr.ex.invalidate_rkey =
+			vec->frmr->mr->lkey;
+		send_wr.next = &inv_wr;
+	}
 
 	ret = svc_rdma_send(rdma, &send_wr);
 	if (ret)
-		svc_rdma_put_context(ctxt, 1);
+		goto err;
 
-	return ret;
+	return 0;
+
+ err:
+	svc_rdma_put_frmr(rdma, vec->frmr);
+	svc_rdma_put_context(ctxt, 1);
+	return -EIO;
 }
 
 void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
@@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	ctxt = svc_rdma_get_context(rdma);
 	ctxt->direction = DMA_TO_DEVICE;
 	vec = svc_rdma_get_req_map();
-	xdr_to_sge(rdma, &rqstp->rq_res, vec);
-
+	ret = map_xdr(rdma, &rqstp->rq_res, vec);
+	if (ret)
+		goto err0;
 	inline_bytes = rqstp->rq_res.len;
 
 	/* Create the RDMA response header */
@@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	if (ret < 0) {
 		printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
 		       ret);
-		goto error;
+		goto err1;
 	}
 	inline_bytes -= ret;
 
@@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	if (ret < 0) {
 		printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
 		       ret);
-		goto error;
+		goto err1;
 	}
 	inline_bytes -= ret;
 
@@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	svc_rdma_put_req_map(vec);
 	dprintk("svcrdma: send_reply returns %d\n", ret);
 	return ret;
- error:
+
+ err1:
+	put_page(res_page);
+ err0:
 	svc_rdma_put_req_map(vec);
 	svc_rdma_put_context(ctxt, 0);
-	put_page(res_page);
 	return ret;
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 900cb69728c6..6fb493cbd29f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -100,20 +100,29 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 	ctxt->xprt = xprt;
 	INIT_LIST_HEAD(&ctxt->dto_q);
 	ctxt->count = 0;
+	ctxt->frmr = NULL;
 	atomic_inc(&xprt->sc_ctxt_used);
 	return ctxt;
 }
 
-static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
+void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
 {
 	struct svcxprt_rdma *xprt = ctxt->xprt;
 	int i;
 	for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
-		atomic_dec(&xprt->sc_dma_used);
-		ib_dma_unmap_single(xprt->sc_cm_id->device,
-				    ctxt->sge[i].addr,
-				    ctxt->sge[i].length,
-				    ctxt->direction);
+		/*
+		 * Unmap the DMA addr in the SGE if the lkey matches
+		 * the sc_dma_lkey, otherwise, ignore it since it is
+		 * an FRMR lkey and will be unmapped later when the
+		 * last WR that uses it completes.
+		 */
+		if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
+			atomic_dec(&xprt->sc_dma_used);
+			ib_dma_unmap_single(xprt->sc_cm_id->device,
+					    ctxt->sge[i].addr,
+					    ctxt->sge[i].length,
+					    ctxt->direction);
+		}
 	}
 }
 
@@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
 		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
 	}
 	map->count = 0;
+	map->frmr = NULL;
 	return map;
 }
 
@@ -316,6 +326,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
 }
 
 /*
+ * Processs a completion context
+ */
+static void process_context(struct svcxprt_rdma *xprt,
+			    struct svc_rdma_op_ctxt *ctxt)
+{
+	svc_rdma_unmap_dma(ctxt);
+
+	switch (ctxt->wr_op) {
+	case IB_WR_SEND:
+		if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
+			svc_rdma_put_frmr(xprt, ctxt->frmr);
+		svc_rdma_put_context(ctxt, 1);
+		break;
+
+	case IB_WR_RDMA_WRITE:
+		svc_rdma_put_context(ctxt, 0);
+		break;
+
+	case IB_WR_RDMA_READ:
+	case IB_WR_RDMA_READ_WITH_INV:
+		if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
+			struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
+			BUG_ON(!read_hdr);
+			if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
+				svc_rdma_put_frmr(xprt, ctxt->frmr);
+			spin_lock_bh(&xprt->sc_rq_dto_lock);
+			set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+			list_add_tail(&read_hdr->dto_q,
+				      &xprt->sc_read_complete_q);
+			spin_unlock_bh(&xprt->sc_rq_dto_lock);
+			svc_xprt_enqueue(&xprt->sc_xprt);
+		}
+		svc_rdma_put_context(ctxt, 0);
+		break;
+
+	default:
+		printk(KERN_ERR "svcrdma: unexpected completion type, "
+		       "opcode=%d\n",
+		       ctxt->wr_op);
+		break;
+	}
+}
+
+/*
  * Send Queue Completion Handler - potentially called on interrupt context.
  *
  * Note that caller must hold a transport reference.
@@ -327,17 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
 	struct ib_cq *cq = xprt->sc_sq_cq;
 	int ret;
 
-
 	if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
 		return;
 
 	ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
 	atomic_inc(&rdma_stat_sq_poll);
 	while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
-		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
-		xprt = ctxt->xprt;
-
-		svc_rdma_unmap_dma(ctxt);
 		if (wc.status != IB_WC_SUCCESS)
 			/* Close the transport */
 			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -346,35 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
 		atomic_dec(&xprt->sc_sq_count);
 		wake_up(&xprt->sc_send_wait);
 
-		switch (ctxt->wr_op) {
-		case IB_WR_SEND:
-			svc_rdma_put_context(ctxt, 1);
-			break;
-
-		case IB_WR_RDMA_WRITE:
-			svc_rdma_put_context(ctxt, 0);
-			break;
-
-		case IB_WR_RDMA_READ:
-			if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
-				struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
-				BUG_ON(!read_hdr);
-				spin_lock_bh(&xprt->sc_rq_dto_lock);
-				set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-				list_add_tail(&read_hdr->dto_q,
-					      &xprt->sc_read_complete_q);
-				spin_unlock_bh(&xprt->sc_rq_dto_lock);
-				svc_xprt_enqueue(&xprt->sc_xprt);
-			}
-			svc_rdma_put_context(ctxt, 0);
-			break;
+		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
+		if (ctxt)
+			process_context(xprt, ctxt);
 
-		default:
-			printk(KERN_ERR "svcrdma: unexpected completion type, "
-			       "opcode=%d, status=%d\n",
-			       wc.opcode, wc.status);
-			break;
-		}
 		svc_xprt_put(&xprt->sc_xprt);
 	}
 
@@ -425,10 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
+	INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
 
 	spin_lock_init(&cma_xprt->sc_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
+	spin_lock_init(&cma_xprt->sc_frmr_q_lock);
 
 	cma_xprt->sc_ord = svcrdma_ord;
 
@@ -462,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 	struct ib_recv_wr recv_wr, *bad_recv_wr;
 	struct svc_rdma_op_ctxt *ctxt;
 	struct page *page;
-	unsigned long pa;
+	dma_addr_t pa;
 	int sge_no;
 	int buflen;
 	int ret;
@@ -474,13 +500,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 		BUG_ON(sge_no >= xprt->sc_max_sge);
 		page = svc_rdma_get_page();
 		ctxt->pages[sge_no] = page;
-		atomic_inc(&xprt->sc_dma_used);
 		pa = ib_dma_map_page(xprt->sc_cm_id->device,
 				     page, 0, PAGE_SIZE,
 				     DMA_FROM_DEVICE);
+		if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
+			goto err_put_ctxt;
+		atomic_inc(&xprt->sc_dma_used);
 		ctxt->sge[sge_no].addr = pa;
 		ctxt->sge[sge_no].length = PAGE_SIZE;
-		ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+		ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
 		buflen += PAGE_SIZE;
 	}
 	ctxt->count = sge_no;
@@ -496,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 		svc_rdma_put_context(ctxt, 1);
 	}
 	return ret;
+
+ err_put_ctxt:
+	svc_rdma_put_context(ctxt, 1);
+	return -ENOMEM;
 }
 
 /*
@@ -566,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
 		dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
 			"event=%d\n", cma_id, cma_id->context, event->event);
 		handle_connect_req(cma_id,
-				   event->param.conn.responder_resources);
+				   event->param.conn.initiator_depth);
 		break;
 
 	case RDMA_CM_EVENT_ESTABLISHED:
@@ -686,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 	return ERR_PTR(ret);
 }
 
+static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
+{
+	struct ib_mr *mr;
+	struct ib_fast_reg_page_list *pl;
+	struct svc_rdma_fastreg_mr *frmr;
+
+	frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
+	if (!frmr)
+		goto err;
+
+	mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
+	if (!mr)
+		goto err_free_frmr;
+
+	pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
+					 RPCSVC_MAXPAGES);
+	if (!pl)
+		goto err_free_mr;
+
+	frmr->mr = mr;
+	frmr->page_list = pl;
+	INIT_LIST_HEAD(&frmr->frmr_list);
+	return frmr;
+
+ err_free_mr:
+	ib_dereg_mr(mr);
+ err_free_frmr:
+	kfree(frmr);
+ err:
+	return ERR_PTR(-ENOMEM);
+}
+
+static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
+{
+	struct svc_rdma_fastreg_mr *frmr;
+
+	while (!list_empty(&xprt->sc_frmr_q)) {
+		frmr = list_entry(xprt->sc_frmr_q.next,
+				  struct svc_rdma_fastreg_mr, frmr_list);
+		list_del_init(&frmr->frmr_list);
+		ib_dereg_mr(frmr->mr);
+		ib_free_fast_reg_page_list(frmr->page_list);
+		kfree(frmr);
+	}
+}
+
+struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
+{
+	struct svc_rdma_fastreg_mr *frmr = NULL;
+
+	spin_lock_bh(&rdma->sc_frmr_q_lock);
+	if (!list_empty(&rdma->sc_frmr_q)) {
+		frmr = list_entry(rdma->sc_frmr_q.next,
+				  struct svc_rdma_fastreg_mr, frmr_list);
+		list_del_init(&frmr->frmr_list);
+		frmr->map_len = 0;
+		frmr->page_list_len = 0;
+	}
+	spin_unlock_bh(&rdma->sc_frmr_q_lock);
+	if (frmr)
+		return frmr;
+
+	return rdma_alloc_frmr(rdma);
+}
+
+static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
+			   struct svc_rdma_fastreg_mr *frmr)
+{
+	int page_no;
+	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
+		dma_addr_t addr = frmr->page_list->page_list[page_no];
+		if (ib_dma_mapping_error(frmr->mr->device, addr))
+			continue;
+		atomic_dec(&xprt->sc_dma_used);
+		ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE,
+				    frmr->direction);
+	}
+}
+
+void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
+		       struct svc_rdma_fastreg_mr *frmr)
+{
+	if (frmr) {
+		frmr_unmap_dma(rdma, frmr);
+		spin_lock_bh(&rdma->sc_frmr_q_lock);
+		BUG_ON(!list_empty(&frmr->frmr_list));
+		list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
+		spin_unlock_bh(&rdma->sc_frmr_q_lock);
+	}
+}
+
 /*
  * This is the xpo_recvfrom function for listening endpoints. Its
  * purpose is to accept incoming connections. The CMA callback handler
@@ -704,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	struct rdma_conn_param conn_param;
 	struct ib_qp_init_attr qp_attr;
 	struct ib_device_attr devattr;
+	int dma_mr_acc;
+	int need_dma_mr;
 	int ret;
 	int i;
 
@@ -819,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	}
 	newxprt->sc_qp = newxprt->sc_cm_id->qp;
 
-	/* Register all of physical memory */
-	newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd,
-					    IB_ACCESS_LOCAL_WRITE |
-					    IB_ACCESS_REMOTE_WRITE);
-	if (IS_ERR(newxprt->sc_phys_mr)) {
-		dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret);
+	/*
+	 * Use the most secure set of MR resources based on the
+	 * transport type and available memory management features in
+	 * the device. Here's the table implemented below:
+	 *
+	 *		Fast	Global	DMA	Remote WR
+	 *		Reg	LKEY	MR	Access
+	 *		Sup'd	Sup'd	Needed	Needed
+	 *
+	 * IWARP	N	N	Y	Y
+	 *		N	Y	Y	Y
+	 *		Y	N	Y	N
+	 *		Y	Y	N	-
+	 *
+	 * IB		N	N	Y	N
+	 *		N	Y	N	-
+	 *		Y	N	Y	N
+	 *		Y	Y	N	-
+	 *
+	 * NB:	iWARP requires remote write access for the data sink
+	 *	of an RDMA_READ. IB does not.
+	 */
+	if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+		newxprt->sc_frmr_pg_list_len =
+			devattr.max_fast_reg_page_list_len;
+		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
+	}
+
+	/*
+	 * Determine if a DMA MR is required and if so, what privs are required
+	 */
+	switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) {
+	case RDMA_TRANSPORT_IWARP:
+		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
+		if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
+			need_dma_mr = 1;
+			dma_mr_acc =
+				(IB_ACCESS_LOCAL_WRITE |
+				 IB_ACCESS_REMOTE_WRITE);
+		} else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+			need_dma_mr = 1;
+			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+		} else
+			need_dma_mr = 0;
+		break;
+	case RDMA_TRANSPORT_IB:
+		if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+			need_dma_mr = 1;
+			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+		} else
+			need_dma_mr = 0;
+		break;
+	default:
 		goto errout;
 	}
 
+	/* Create the DMA MR if needed, otherwise, use the DMA LKEY */
+	if (need_dma_mr) {
+		/* Register all of physical memory */
+		newxprt->sc_phys_mr =
+			ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
+		if (IS_ERR(newxprt->sc_phys_mr)) {
+			dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
+				ret);
+			goto errout;
+		}
+		newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
+	} else
+		newxprt->sc_dma_lkey =
+			newxprt->sc_cm_id->device->local_dma_lkey;
+
 	/* Post receive buffers */
 	for (i = 0; i < newxprt->sc_max_requests; i++) {
 		ret = svc_rdma_post_recv(newxprt);
@@ -961,6 +1148,9 @@ static void __svc_rdma_free(struct work_struct *work)
 	WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
 	WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
 
+	/* De-allocate fastreg mr */
+	rdma_dealloc_frmr_q(rdma);
+
 	/* Destroy the QP if present (not a listener) */
 	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
 		ib_destroy_qp(rdma->sc_qp);
@@ -1014,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
 	return 1;
 }
 
+/*
+ * Attempt to register the kvec representing the RPC memory with the
+ * device.
+ *
+ * Returns:
+ *  NULL : The device does not support fastreg or there were no more
+ *         fastreg mr.
+ *  frmr : The kvec register request was successfully posted.
+ *    <0 : An error was encountered attempting to register the kvec.
+ */
+int svc_rdma_fastreg(struct svcxprt_rdma *xprt,
+		     struct svc_rdma_fastreg_mr *frmr)
+{
+	struct ib_send_wr fastreg_wr;
+	u8 key;
+
+	/* Bump the key */
+	key = (u8)(frmr->mr->lkey & 0x000000FF);
+	ib_update_fast_reg_key(frmr->mr, ++key);
+
+	/* Prepare FASTREG WR */
+	memset(&fastreg_wr, 0, sizeof fastreg_wr);
+	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+	fastreg_wr.send_flags = IB_SEND_SIGNALED;
+	fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
+	fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
+	fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
+	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+	fastreg_wr.wr.fast_reg.length = frmr->map_len;
+	fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
+	fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
+	return svc_rdma_send(xprt, &fastreg_wr);
+}
+
 int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 {
-	struct ib_send_wr *bad_wr;
+	struct ib_send_wr *bad_wr, *n_wr;
+	int wr_count;
+	int i;
 	int ret;
 
 	if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
 		return -ENOTCONN;
 
 	BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
-	BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op !=
-		wr->opcode);
+	wr_count = 1;
+	for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
+		wr_count++;
+
 	/* If the SQ is full, wait until an SQ entry is available */
 	while (1) {
 		spin_lock_bh(&xprt->sc_lock);
-		if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) {
+		if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
 			spin_unlock_bh(&xprt->sc_lock);
 			atomic_inc(&rdma_stat_sq_starve);
 
@@ -1043,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 				return 0;
 			continue;
 		}
-		/* Bumped used SQ WR count and post */
-		svc_xprt_get(&xprt->sc_xprt);
+		/* Take a transport ref for each WR posted */
+		for (i = 0; i < wr_count; i++)
+			svc_xprt_get(&xprt->sc_xprt);
+
+		/* Bump used SQ WR count and post */
+		atomic_add(wr_count, &xprt->sc_sq_count);
 		ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
-		if (!ret)
-			atomic_inc(&xprt->sc_sq_count);
-		else {
-			svc_xprt_put(&xprt->sc_xprt);
+		if (ret) {
+			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+			atomic_sub(wr_count, &xprt->sc_sq_count);
+			for (i = 0; i < wr_count; i ++)
+				svc_xprt_put(&xprt->sc_xprt);
 			dprintk("svcrdma: failed to post SQ WR rc=%d, "
 			       "sc_sq_count=%d, sc_sq_depth=%d\n",
 			       ret, atomic_read(&xprt->sc_sq_count),
 			       xprt->sc_sq_depth);
 		}
 		spin_unlock_bh(&xprt->sc_lock);
+		if (ret)
+			wake_up(&xprt->sc_send_wait);
 		break;
 	}
 	return ret;
@@ -1079,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
 
 	/* Prepare SGE for local address */
-	atomic_inc(&xprt->sc_dma_used);
 	sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
 				   p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
-	sge.lkey = xprt->sc_phys_mr->lkey;
+	if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
+		put_page(p);
+		return;
+	}
+	atomic_inc(&xprt->sc_dma_used);
+	sge.lkey = xprt->sc_dma_lkey;
 	sge.length = length;
 
 	ctxt = svc_rdma_get_context(xprt);
@@ -1103,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 	if (ret) {
 		dprintk("svcrdma: Error %d posting send for protocol error\n",
 			ret);
+		ib_dma_unmap_page(xprt->sc_cm_id->device,
+				  sge.addr, PAGE_SIZE,
+				  DMA_FROM_DEVICE);
 		svc_rdma_put_context(ctxt, 1);
 	}
 }
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a564c1a39ec5..9839c3d94145 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
 static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_inline_write_padding;
-#if !RPCRDMA_PERSISTENT_REGISTRATION
-static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */
-#else
-static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
-#endif
+static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
+                int xprt_rdma_pad_optimize = 0;
 
 #ifdef RPC_DEBUG
 
@@ -140,6 +137,14 @@ static ctl_table xr_tunables_table[] = {
 		.extra2		= &max_memreg,
 	},
 	{
+		.ctl_name       = CTL_UNNUMBERED,
+		.procname	= "rdma_pad_optimize",
+		.data		= &xprt_rdma_pad_optimize,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
 		.ctl_name = 0,
 	},
 };
@@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt)
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 
 	dprintk("RPC:       %s: closing\n", __func__);
+	if (r_xprt->rx_ep.rep_connected > 0)
+		xprt->reestablish_timeout = 0;
 	xprt_disconnect_done(xprt);
 	(void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
 }
@@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task)
 			/* Reconnect */
 			schedule_delayed_work(&r_xprt->rdma_connect,
 				xprt->reestablish_timeout);
+			xprt->reestablish_timeout <<= 1;
+			if (xprt->reestablish_timeout > (30 * HZ))
+				xprt->reestablish_timeout = (30 * HZ);
+			else if (xprt->reestablish_timeout < (5 * HZ))
+				xprt->reestablish_timeout = (5 * HZ);
 		} else {
 			schedule_delayed_work(&r_xprt->rdma_connect, 0);
 			if (!RPC_IS_ASYNC(task))
@@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
 	}
 	dprintk("RPC:       %s: size %zd, request 0x%p\n", __func__, size, req);
 out:
+	req->rl_connect_cookie = 0;	/* our reserved value */
 	return req->rl_xdr_buf;
 
 outfail:
@@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task)
 		req->rl_reply->rr_xprt = xprt;
 	}
 
-	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) {
-		xprt_disconnect_done(xprt);
-		return -ENOTCONN;	/* implies disconnect */
-	}
+	/* Must suppress retransmit to maintain credits */
+	if (req->rl_connect_cookie == xprt->connect_cookie)
+		goto drop_connection;
+	req->rl_connect_cookie = xprt->connect_cookie;
+
+	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
+		goto drop_connection;
 
+	task->tk_bytes_sent += rqst->rq_snd_buf.len;
 	rqst->rq_bytes_sent = 0;
 	return 0;
+
+drop_connection:
+	xprt_disconnect_done(xprt);
+	return -ENOTCONN;	/* implies disconnect */
 }
 
 static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
@@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void)
 {
 	int rc;
 
-	dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
+	dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n");
 #ifdef RPC_DEBUG
 	if (sunrpc_table_header) {
 		unregister_sysctl_table(sunrpc_table_header);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8ea283ecc522..a5fef5e6c323 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 	switch (event->event) {
 	case RDMA_CM_EVENT_ADDR_RESOLVED:
 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
+		ia->ri_async_rc = 0;
 		complete(&ia->ri_done);
 		break;
 	case RDMA_CM_EVENT_ADDR_ERROR:
@@ -338,13 +339,32 @@ connected:
 		wake_up_all(&ep->rep_connect_wait);
 		break;
 	default:
-		ia->ri_async_rc = -EINVAL;
-		dprintk("RPC:       %s: unexpected CM event %X\n",
+		dprintk("RPC:       %s: unexpected CM event %d\n",
 			__func__, event->event);
-		complete(&ia->ri_done);
 		break;
 	}
 
+#ifdef RPC_DEBUG
+	if (connstate == 1) {
+		int ird = attr.max_dest_rd_atomic;
+		int tird = ep->rep_remote_cma.responder_resources;
+		printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u "
+			"on %s, memreg %d slots %d ird %d%s\n",
+			NIPQUAD(addr->sin_addr.s_addr),
+			ntohs(addr->sin_port),
+			ia->ri_id->device->name,
+			ia->ri_memreg_strategy,
+			xprt->rx_buf.rb_max_requests,
+			ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
+	} else if (connstate < 0) {
+		printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u "
+			"closed (%d)\n",
+			NIPQUAD(addr->sin_addr.s_addr),
+			ntohs(addr->sin_port),
+			connstate);
+	}
+#endif
+
 	return 0;
 }
 
@@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 	struct rdma_cm_id *id;
 	int rc;
 
+	init_completion(&ia->ri_done);
+
 	id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP);
 	if (IS_ERR(id)) {
 		rc = PTR_ERR(id);
@@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 		return id;
 	}
 
-	ia->ri_async_rc = 0;
+	ia->ri_async_rc = -ETIMEDOUT;
 	rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
 	if (rc) {
 		dprintk("RPC:       %s: rdma_resolve_addr() failed %i\n",
 			__func__, rc);
 		goto out;
 	}
-	wait_for_completion(&ia->ri_done);
+	wait_for_completion_interruptible_timeout(&ia->ri_done,
+				msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
 	rc = ia->ri_async_rc;
 	if (rc)
 		goto out;
 
-	ia->ri_async_rc = 0;
+	ia->ri_async_rc = -ETIMEDOUT;
 	rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
 	if (rc) {
 		dprintk("RPC:       %s: rdma_resolve_route() failed %i\n",
 			__func__, rc);
 		goto out;
 	}
-	wait_for_completion(&ia->ri_done);
+	wait_for_completion_interruptible_timeout(&ia->ri_done,
+				msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
 	rc = ia->ri_async_rc;
 	if (rc)
 		goto out;
@@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq)
 int
 rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 {
-	int rc;
+	int rc, mem_priv;
+	struct ib_device_attr devattr;
 	struct rpcrdma_ia *ia = &xprt->rx_ia;
 
-	init_completion(&ia->ri_done);
-
 	ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
 	if (IS_ERR(ia->ri_id)) {
 		rc = PTR_ERR(ia->ri_id);
@@ -443,6 +466,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 	}
 
 	/*
+	 * Query the device to determine if the requested memory
+	 * registration strategy is supported. If it isn't, set the
+	 * strategy to a globally supported model.
+	 */
+	rc = ib_query_device(ia->ri_id->device, &devattr);
+	if (rc) {
+		dprintk("RPC:       %s: ib_query_device failed %d\n",
+			__func__, rc);
+		goto out2;
+	}
+
+	if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
+		ia->ri_have_dma_lkey = 1;
+		ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
+	}
+
+	switch (memreg) {
+	case RPCRDMA_MEMWINDOWS:
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+		if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
+			dprintk("RPC:       %s: MEMWINDOWS registration "
+				"specified but not supported by adapter, "
+				"using slower RPCRDMA_REGISTER\n",
+				__func__);
+			memreg = RPCRDMA_REGISTER;
+		}
+		break;
+	case RPCRDMA_MTHCAFMR:
+		if (!ia->ri_id->device->alloc_fmr) {
+#if RPCRDMA_PERSISTENT_REGISTRATION
+			dprintk("RPC:       %s: MTHCAFMR registration "
+				"specified but not supported by adapter, "
+				"using riskier RPCRDMA_ALLPHYSICAL\n",
+				__func__);
+			memreg = RPCRDMA_ALLPHYSICAL;
+#else
+			dprintk("RPC:       %s: MTHCAFMR registration "
+				"specified but not supported by adapter, "
+				"using slower RPCRDMA_REGISTER\n",
+				__func__);
+			memreg = RPCRDMA_REGISTER;
+#endif
+		}
+		break;
+	case RPCRDMA_FRMR:
+		/* Requires both frmr reg and local dma lkey */
+		if ((devattr.device_cap_flags &
+		     (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
+		    (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
+#if RPCRDMA_PERSISTENT_REGISTRATION
+			dprintk("RPC:       %s: FRMR registration "
+				"specified but not supported by adapter, "
+				"using riskier RPCRDMA_ALLPHYSICAL\n",
+				__func__);
+			memreg = RPCRDMA_ALLPHYSICAL;
+#else
+			dprintk("RPC:       %s: FRMR registration "
+				"specified but not supported by adapter, "
+				"using slower RPCRDMA_REGISTER\n",
+				__func__);
+			memreg = RPCRDMA_REGISTER;
+#endif
+		}
+		break;
+	}
+
+	/*
 	 * Optionally obtain an underlying physical identity mapping in
 	 * order to do a memory window-based bind. This base registration
 	 * is protected from remote access - that is enabled only by binding
@@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 	 * revoked after the corresponding completion similar to a storage
 	 * adapter.
 	 */
-	if (memreg > RPCRDMA_REGISTER) {
-		int mem_priv = IB_ACCESS_LOCAL_WRITE;
-		switch (memreg) {
+	switch (memreg) {
+	case RPCRDMA_BOUNCEBUFFERS:
+	case RPCRDMA_REGISTER:
+	case RPCRDMA_FRMR:
+		break;
 #if RPCRDMA_PERSISTENT_REGISTRATION
-		case RPCRDMA_ALLPHYSICAL:
-			mem_priv |= IB_ACCESS_REMOTE_WRITE;
-			mem_priv |= IB_ACCESS_REMOTE_READ;
-			break;
+	case RPCRDMA_ALLPHYSICAL:
+		mem_priv = IB_ACCESS_LOCAL_WRITE |
+				IB_ACCESS_REMOTE_WRITE |
+				IB_ACCESS_REMOTE_READ;
+		goto register_setup;
 #endif
-		case RPCRDMA_MEMWINDOWS_ASYNC:
-		case RPCRDMA_MEMWINDOWS:
-			mem_priv |= IB_ACCESS_MW_BIND;
-			break;
-		default:
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		mem_priv = IB_ACCESS_LOCAL_WRITE |
+				IB_ACCESS_MW_BIND;
+		goto register_setup;
+	case RPCRDMA_MTHCAFMR:
+		if (ia->ri_have_dma_lkey)
 			break;
-		}
+		mem_priv = IB_ACCESS_LOCAL_WRITE;
+	register_setup:
 		ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
 		if (IS_ERR(ia->ri_bind_mem)) {
 			printk(KERN_ALERT "%s: ib_get_dma_mr for "
@@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 			memreg = RPCRDMA_REGISTER;
 			ia->ri_bind_mem = NULL;
 		}
+		break;
+	default:
+		printk(KERN_ERR "%s: invalid memory registration mode %d\n",
+				__func__, memreg);
+		rc = -EINVAL;
+		goto out2;
 	}
+	dprintk("RPC:       %s: memory registration strategy is %d\n",
+		__func__, memreg);
 
 	/* Else will do memory reg/dereg for each chunk */
 	ia->ri_memreg_strategy = memreg;
@@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 	return 0;
 out2:
 	rdma_destroy_id(ia->ri_id);
+	ia->ri_id = NULL;
 out1:
 	return rc;
 }
@@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
 		dprintk("RPC:       %s: ib_dereg_mr returned %i\n",
 			__func__, rc);
 	}
-	if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp)
-		rdma_destroy_qp(ia->ri_id);
+	if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
+		if (ia->ri_id->qp)
+			rdma_destroy_qp(ia->ri_id);
+		rdma_destroy_id(ia->ri_id);
+		ia->ri_id = NULL;
+	}
 	if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
 		rc = ib_dealloc_pd(ia->ri_pd);
 		dprintk("RPC:       %s: ib_dealloc_pd returned %i\n",
 			__func__, rc);
 	}
-	if (ia->ri_id != NULL && !IS_ERR(ia->ri_id))
-		rdma_destroy_id(ia->ri_id);
 }
 
 /*
@@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	ep->rep_attr.srq = NULL;
 	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
 	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_FRMR:
+		/* Add room for frmr register and invalidate WRs */
+		ep->rep_attr.cap.max_send_wr *= 3;
+		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
+			return -EINVAL;
+		break;
 	case RPCRDMA_MEMWINDOWS_ASYNC:
 	case RPCRDMA_MEMWINDOWS:
 		/* Add room for mw_binds+unbinds - overkill! */
@@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	ep->rep_remote_cma.private_data_len = 0;
 
 	/* Client offers RDMA Read but does not initiate */
-	switch (ia->ri_memreg_strategy) {
-	case RPCRDMA_BOUNCEBUFFERS:
+	ep->rep_remote_cma.initiator_depth = 0;
+	if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS)
 		ep->rep_remote_cma.responder_resources = 0;
-		break;
-	case RPCRDMA_MTHCAFMR:
-	case RPCRDMA_REGISTER:
-		ep->rep_remote_cma.responder_resources = cdata->max_requests *
-				(RPCRDMA_MAX_DATA_SEGS / 8);
-		break;
-	case RPCRDMA_MEMWINDOWS:
-	case RPCRDMA_MEMWINDOWS_ASYNC:
-#if RPCRDMA_PERSISTENT_REGISTRATION
-	case RPCRDMA_ALLPHYSICAL:
-#endif
-		ep->rep_remote_cma.responder_resources = cdata->max_requests *
-				(RPCRDMA_MAX_DATA_SEGS / 2);
-		break;
-	default:
-		break;
-	}
-	if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom)
+	else if (devattr.max_qp_rd_atom > 32)	/* arbitrary but <= 255 */
+		ep->rep_remote_cma.responder_resources = 32;
+	else
 		ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
-	ep->rep_remote_cma.initiator_depth = 0;
 
 	ep->rep_remote_cma.retry_count = 7;
 	ep->rep_remote_cma.flow_control = 0;
@@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 		if (rc)
 			dprintk("RPC:       %s: rpcrdma_ep_disconnect"
 				" returned %i\n", __func__, rc);
+		rdma_destroy_qp(ia->ri_id);
+		ia->ri_id->qp = NULL;
 	}
 
-	ep->rep_func = NULL;
-
 	/* padding - could be done in rpcrdma_buffer_destroy... */
 	if (ep->rep_pad_mr) {
 		rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
 		ep->rep_pad_mr = NULL;
 	}
 
-	if (ia->ri_id->qp) {
-		rdma_destroy_qp(ia->ri_id);
-		ia->ri_id->qp = NULL;
-	}
-
 	rpcrdma_clean_cq(ep->rep_cq);
 	rc = ib_destroy_cq(ep->rep_cq);
 	if (rc)
@@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 	struct rdma_cm_id *id;
 	int rc = 0;
 	int retry_count = 0;
-	int reconnect = (ep->rep_connected != 0);
 
-	if (reconnect) {
+	if (ep->rep_connected != 0) {
 		struct rpcrdma_xprt *xprt;
 retry:
 		rc = rpcrdma_ep_disconnect(ep, ia);
@@ -745,6 +836,7 @@ retry:
 			goto out;
 		}
 		/* END TEMP */
+		rdma_destroy_qp(ia->ri_id);
 		rdma_destroy_id(ia->ri_id);
 		ia->ri_id = id;
 	}
@@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
 	}
 }
 
-	/* Theoretically a client initiator_depth > 0 is not needed,
-	 * but many peers fail to complete the connection unless they
-	 * == responder_resources! */
-	if (ep->rep_remote_cma.initiator_depth !=
-				ep->rep_remote_cma.responder_resources)
-		ep->rep_remote_cma.initiator_depth =
-			ep->rep_remote_cma.responder_resources;
-
 	ep->rep_connected = 0;
 
 	rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
 		goto out;
 	}
 
-	if (reconnect)
-		return 0;
-
 	wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
 
 	/*
@@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
 	if (ep->rep_connected <= 0) {
 		/* Sometimes, the only way to reliably connect to remote
 		 * CMs is to use same nonzero values for ORD and IRD. */
-		ep->rep_remote_cma.initiator_depth =
-					ep->rep_remote_cma.responder_resources;
-		if (ep->rep_remote_cma.initiator_depth == 0)
-			++ep->rep_remote_cma.initiator_depth;
-		if (ep->rep_remote_cma.responder_resources == 0)
-			++ep->rep_remote_cma.responder_resources;
-		if (retry_count++ == 0)
+		if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
+		    (ep->rep_remote_cma.responder_resources == 0 ||
+		     ep->rep_remote_cma.initiator_depth !=
+				ep->rep_remote_cma.responder_resources)) {
+			if (ep->rep_remote_cma.responder_resources == 0)
+				ep->rep_remote_cma.responder_resources = 1;
+			ep->rep_remote_cma.initiator_depth =
+				ep->rep_remote_cma.responder_resources;
 			goto retry;
+		}
 		rc = ep->rep_connected;
 	} else {
 		dprintk("RPC:       %s: connected\n", __func__);
@@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
 	char *p;
 	size_t len;
 	int i, rc;
+	struct rpcrdma_mw *r;
 
 	buf->rb_max_requests = cdata->max_requests;
 	spin_lock_init(&buf->rb_lock);
@@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
 	 *   2.  arrays of struct rpcrdma_req to fill in pointers
 	 *   3.  array of struct rpcrdma_rep for replies
 	 *   4.  padding, if any
-	 *   5.  mw's, if any
+	 *   5.  mw's, fmr's or frmr's, if any
 	 * Send/recv buffers in req/rep need to be registered
 	 */
 
@@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
 		(sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
 	len += cdata->padding;
 	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_FRMR:
+		len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
+				sizeof(struct rpcrdma_mw);
+		break;
 	case RPCRDMA_MTHCAFMR:
 		/* TBD we are perhaps overallocating here */
 		len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
@@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
 	 * and also reduce unbind-to-bind collision.
 	 */
 	INIT_LIST_HEAD(&buf->rb_mws);
+	r = (struct rpcrdma_mw *)p;
 	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_FRMR:
+		for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
+			r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
+							 RPCRDMA_MAX_SEGS);
+			if (IS_ERR(r->r.frmr.fr_mr)) {
+				rc = PTR_ERR(r->r.frmr.fr_mr);
+				dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
+					" failed %i\n", __func__, rc);
+				goto out;
+			}
+			r->r.frmr.fr_pgl =
+				ib_alloc_fast_reg_page_list(ia->ri_id->device,
+							    RPCRDMA_MAX_SEGS);
+			if (IS_ERR(r->r.frmr.fr_pgl)) {
+				rc = PTR_ERR(r->r.frmr.fr_pgl);
+				dprintk("RPC:       %s: "
+					"ib_alloc_fast_reg_page_list "
+					"failed %i\n", __func__, rc);
+				goto out;
+			}
+			list_add(&r->mw_list, &buf->rb_mws);
+			++r;
+		}
+		break;
 	case RPCRDMA_MTHCAFMR:
-		{
-		struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
-		struct ib_fmr_attr fa = {
-			RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT
-		};
 		/* TBD we are perhaps overallocating here */
 		for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
+			static struct ib_fmr_attr fa =
+				{ RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
 			r->r.fmr = ib_alloc_fmr(ia->ri_pd,
 				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
 				&fa);
@@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
 			list_add(&r->mw_list, &buf->rb_mws);
 			++r;
 		}
-		}
 		break;
 	case RPCRDMA_MEMWINDOWS_ASYNC:
 	case RPCRDMA_MEMWINDOWS:
-		{
-		struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
 		/* Allocate one extra request's worth, for full cycling */
 		for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
 			r->r.mw = ib_alloc_mw(ia->ri_pd);
@@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
 			list_add(&r->mw_list, &buf->rb_mws);
 			++r;
 		}
-		}
 		break;
 	default:
 		break;
@@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 {
 	int rc, i;
 	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
+	struct rpcrdma_mw *r;
 
 	/* clean up in reverse order from create
 	 *   1.  recv mr memory (mr free, then kfree)
@@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 		}
 		if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
 			while (!list_empty(&buf->rb_mws)) {
-				struct rpcrdma_mw *r;
 				r = list_entry(buf->rb_mws.next,
 					struct rpcrdma_mw, mw_list);
 				list_del(&r->mw_list);
 				switch (ia->ri_memreg_strategy) {
+				case RPCRDMA_FRMR:
+					rc = ib_dereg_mr(r->r.frmr.fr_mr);
+					if (rc)
+						dprintk("RPC:       %s:"
+							" ib_dereg_mr"
+							" failed %i\n",
+							__func__, rc);
+					ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+					break;
 				case RPCRDMA_MTHCAFMR:
 					rc = ib_dealloc_fmr(r->r.fmr);
 					if (rc)
@@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
 {
 	struct rpcrdma_req *req;
 	unsigned long flags;
+	int i;
+	struct rpcrdma_mw *r;
 
 	spin_lock_irqsave(&buffers->rb_lock, flags);
 	if (buffers->rb_send_index == buffers->rb_max_requests) {
@@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
 	}
 	buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
 	if (!list_empty(&buffers->rb_mws)) {
-		int i = RPCRDMA_MAX_SEGS - 1;
+		i = RPCRDMA_MAX_SEGS - 1;
 		do {
-			struct rpcrdma_mw *r;
 			r = list_entry(buffers->rb_mws.next,
 					struct rpcrdma_mw, mw_list);
 			list_del(&r->mw_list);
@@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
 		req->rl_reply = NULL;
 	}
 	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_FRMR:
 	case RPCRDMA_MTHCAFMR:
 	case RPCRDMA_MEMWINDOWS_ASYNC:
 	case RPCRDMA_MEMWINDOWS:
@@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
 			va, len, DMA_BIDIRECTIONAL);
 	iov->length = len;
 
-	if (ia->ri_bind_mem != NULL) {
+	if (ia->ri_have_dma_lkey) {
+		*mrp = NULL;
+		iov->lkey = ia->ri_dma_lkey;
+		return 0;
+	} else if (ia->ri_bind_mem != NULL) {
 		*mrp = NULL;
 		iov->lkey = ia->ri_bind_mem->lkey;
 		return 0;
@@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
 				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
 }
 
+static int
+rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
+			int *nsegs, int writing, struct rpcrdma_ia *ia,
+			struct rpcrdma_xprt *r_xprt)
+{
+	struct rpcrdma_mr_seg *seg1 = seg;
+	struct ib_send_wr frmr_wr, *bad_wr;
+	u8 key;
+	int len, pageoff;
+	int i, rc;
+
+	pageoff = offset_in_page(seg1->mr_offset);
+	seg1->mr_offset -= pageoff;	/* start of page */
+	seg1->mr_len += pageoff;
+	len = -pageoff;
+	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
+		*nsegs = RPCRDMA_MAX_DATA_SEGS;
+	for (i = 0; i < *nsegs;) {
+		rpcrdma_map_one(ia, seg, writing);
+		seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
+		len += seg->mr_len;
+		++seg;
+		++i;
+		/* Check for holes */
+		if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
+		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+			break;
+	}
+	dprintk("RPC:       %s: Using frmr %p to map %d segments\n",
+		__func__, seg1->mr_chunk.rl_mw, i);
+
+	/* Bump the key */
+	key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
+	ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
+
+	/* Prepare FRMR WR */
+	memset(&frmr_wr, 0, sizeof frmr_wr);
+	frmr_wr.opcode = IB_WR_FAST_REG_MR;
+	frmr_wr.send_flags = 0;			/* unsignaled */
+	frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma;
+	frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
+	frmr_wr.wr.fast_reg.page_list_len = i;
+	frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+	frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
+	frmr_wr.wr.fast_reg.access_flags = (writing ?
+				IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ);
+	frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+	DECR_CQCOUNT(&r_xprt->rx_ep);
+
+	rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr);
+
+	if (rc) {
+		dprintk("RPC:       %s: failed ib_post_send for register,"
+			" status %i\n", __func__, rc);
+		while (i--)
+			rpcrdma_unmap_one(ia, --seg);
+	} else {
+		seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+		seg1->mr_base = seg1->mr_dma + pageoff;
+		seg1->mr_nsegs = i;
+		seg1->mr_len = len;
+	}
+	*nsegs = i;
+	return rc;
+}
+
+static int
+rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
+			struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
+{
+	struct rpcrdma_mr_seg *seg1 = seg;
+	struct ib_send_wr invalidate_wr, *bad_wr;
+	int rc;
+
+	while (seg1->mr_nsegs--)
+		rpcrdma_unmap_one(ia, seg++);
+
+	memset(&invalidate_wr, 0, sizeof invalidate_wr);
+	invalidate_wr.opcode = IB_WR_LOCAL_INV;
+	invalidate_wr.send_flags = 0;			/* unsignaled */
+	invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+	DECR_CQCOUNT(&r_xprt->rx_ep);
+
+	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
+	if (rc)
+		dprintk("RPC:       %s: failed ib_post_send for invalidate,"
+			" status %i\n", __func__, rc);
+	return rc;
+}
+
+static int
+rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
+			int *nsegs, int writing, struct rpcrdma_ia *ia)
+{
+	struct rpcrdma_mr_seg *seg1 = seg;
+	u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
+	int len, pageoff, i, rc;
+
+	pageoff = offset_in_page(seg1->mr_offset);
+	seg1->mr_offset -= pageoff;	/* start of page */
+	seg1->mr_len += pageoff;
+	len = -pageoff;
+	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
+		*nsegs = RPCRDMA_MAX_DATA_SEGS;
+	for (i = 0; i < *nsegs;) {
+		rpcrdma_map_one(ia, seg, writing);
+		physaddrs[i] = seg->mr_dma;
+		len += seg->mr_len;
+		++seg;
+		++i;
+		/* Check for holes */
+		if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
+		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+			break;
+	}
+	rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
+				physaddrs, i, seg1->mr_dma);
+	if (rc) {
+		dprintk("RPC:       %s: failed ib_map_phys_fmr "
+			"%u@0x%llx+%i (%d)... status %i\n", __func__,
+			len, (unsigned long long)seg1->mr_dma,
+			pageoff, i, rc);
+		while (i--)
+			rpcrdma_unmap_one(ia, --seg);
+	} else {
+		seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
+		seg1->mr_base = seg1->mr_dma + pageoff;
+		seg1->mr_nsegs = i;
+		seg1->mr_len = len;
+	}
+	*nsegs = i;
+	return rc;
+}
+
+static int
+rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
+			struct rpcrdma_ia *ia)
+{
+	struct rpcrdma_mr_seg *seg1 = seg;
+	LIST_HEAD(l);
+	int rc;
+
+	list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
+	rc = ib_unmap_fmr(&l);
+	while (seg1->mr_nsegs--)
+		rpcrdma_unmap_one(ia, seg++);
+	if (rc)
+		dprintk("RPC:       %s: failed ib_unmap_fmr,"
+			" status %i\n", __func__, rc);
+	return rc;
+}
+
+static int
+rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
+			int *nsegs, int writing, struct rpcrdma_ia *ia,
+			struct rpcrdma_xprt *r_xprt)
+{
+	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
+				  IB_ACCESS_REMOTE_READ);
+	struct ib_mw_bind param;
+	int rc;
+
+	*nsegs = 1;
+	rpcrdma_map_one(ia, seg, writing);
+	param.mr = ia->ri_bind_mem;
+	param.wr_id = 0ULL;	/* no send cookie */
+	param.addr = seg->mr_dma;
+	param.length = seg->mr_len;
+	param.send_flags = 0;
+	param.mw_access_flags = mem_priv;
+
+	DECR_CQCOUNT(&r_xprt->rx_ep);
+	rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
+	if (rc) {
+		dprintk("RPC:       %s: failed ib_bind_mw "
+			"%u@0x%llx status %i\n",
+			__func__, seg->mr_len,
+			(unsigned long long)seg->mr_dma, rc);
+		rpcrdma_unmap_one(ia, seg);
+	} else {
+		seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
+		seg->mr_base = param.addr;
+		seg->mr_nsegs = 1;
+	}
+	return rc;
+}
+
+static int
+rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
+			struct rpcrdma_ia *ia,
+			struct rpcrdma_xprt *r_xprt, void **r)
+{
+	struct ib_mw_bind param;
+	LIST_HEAD(l);
+	int rc;
+
+	BUG_ON(seg->mr_nsegs != 1);
+	param.mr = ia->ri_bind_mem;
+	param.addr = 0ULL;	/* unbind */
+	param.length = 0;
+	param.mw_access_flags = 0;
+	if (*r) {
+		param.wr_id = (u64) (unsigned long) *r;
+		param.send_flags = IB_SEND_SIGNALED;
+		INIT_CQCOUNT(&r_xprt->rx_ep);
+	} else {
+		param.wr_id = 0ULL;
+		param.send_flags = 0;
+		DECR_CQCOUNT(&r_xprt->rx_ep);
+	}
+	rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
+	rpcrdma_unmap_one(ia, seg);
+	if (rc)
+		dprintk("RPC:       %s: failed ib_(un)bind_mw,"
+			" status %i\n", __func__, rc);
+	else
+		*r = NULL;	/* will upcall on completion */
+	return rc;
+}
+
+static int
+rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
+			int *nsegs, int writing, struct rpcrdma_ia *ia)
+{
+	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
+				  IB_ACCESS_REMOTE_READ);
+	struct rpcrdma_mr_seg *seg1 = seg;
+	struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
+	int len, i, rc = 0;
+
+	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
+		*nsegs = RPCRDMA_MAX_DATA_SEGS;
+	for (len = 0, i = 0; i < *nsegs;) {
+		rpcrdma_map_one(ia, seg, writing);
+		ipb[i].addr = seg->mr_dma;
+		ipb[i].size = seg->mr_len;
+		len += seg->mr_len;
+		++seg;
+		++i;
+		/* Check for holes */
+		if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
+		    offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
+			break;
+	}
+	seg1->mr_base = seg1->mr_dma;
+	seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
+				ipb, i, mem_priv, &seg1->mr_base);
+	if (IS_ERR(seg1->mr_chunk.rl_mr)) {
+		rc = PTR_ERR(seg1->mr_chunk.rl_mr);
+		dprintk("RPC:       %s: failed ib_reg_phys_mr "
+			"%u@0x%llx (%d)... status %i\n",
+			__func__, len,
+			(unsigned long long)seg1->mr_dma, i, rc);
+		while (i--)
+			rpcrdma_unmap_one(ia, --seg);
+	} else {
+		seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
+		seg1->mr_nsegs = i;
+		seg1->mr_len = len;
+	}
+	*nsegs = i;
+	return rc;
+}
+
+static int
+rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
+			struct rpcrdma_ia *ia)
+{
+	struct rpcrdma_mr_seg *seg1 = seg;
+	int rc;
+
+	rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
+	seg1->mr_chunk.rl_mr = NULL;
+	while (seg1->mr_nsegs--)
+		rpcrdma_unmap_one(ia, seg++);
+	if (rc)
+		dprintk("RPC:       %s: failed ib_dereg_mr,"
+			" status %i\n", __func__, rc);
+	return rc;
+}
+
 int
 rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
 			int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
-				  IB_ACCESS_REMOTE_READ);
-	struct rpcrdma_mr_seg *seg1 = seg;
-	int i;
 	int rc = 0;
 
 	switch (ia->ri_memreg_strategy) {
@@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
 		break;
 #endif
 
-	/* Registration using fast memory registration */
+	/* Registration using frmr registration */
+	case RPCRDMA_FRMR:
+		rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
+		break;
+
+	/* Registration using fmr memory registration */
 	case RPCRDMA_MTHCAFMR:
-		{
-		u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
-		int len, pageoff = offset_in_page(seg->mr_offset);
-		seg1->mr_offset -= pageoff;	/* start of page */
-		seg1->mr_len += pageoff;
-		len = -pageoff;
-		if (nsegs > RPCRDMA_MAX_DATA_SEGS)
-			nsegs = RPCRDMA_MAX_DATA_SEGS;
-		for (i = 0; i < nsegs;) {
-			rpcrdma_map_one(ia, seg, writing);
-			physaddrs[i] = seg->mr_dma;
-			len += seg->mr_len;
-			++seg;
-			++i;
-			/* Check for holes */
-			if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
-			    offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
-				break;
-		}
-		nsegs = i;
-		rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
-					physaddrs, nsegs, seg1->mr_dma);
-		if (rc) {
-			dprintk("RPC:       %s: failed ib_map_phys_fmr "
-				"%u@0x%llx+%i (%d)... status %i\n", __func__,
-				len, (unsigned long long)seg1->mr_dma,
-				pageoff, nsegs, rc);
-			while (nsegs--)
-				rpcrdma_unmap_one(ia, --seg);
-		} else {
-			seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
-			seg1->mr_base = seg1->mr_dma + pageoff;
-			seg1->mr_nsegs = nsegs;
-			seg1->mr_len = len;
-		}
-		}
+		rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
 		break;
 
 	/* Registration using memory windows */
 	case RPCRDMA_MEMWINDOWS_ASYNC:
 	case RPCRDMA_MEMWINDOWS:
-		{
-		struct ib_mw_bind param;
-		rpcrdma_map_one(ia, seg, writing);
-		param.mr = ia->ri_bind_mem;
-		param.wr_id = 0ULL;	/* no send cookie */
-		param.addr = seg->mr_dma;
-		param.length = seg->mr_len;
-		param.send_flags = 0;
-		param.mw_access_flags = mem_priv;
-
-		DECR_CQCOUNT(&r_xprt->rx_ep);
-		rc = ib_bind_mw(ia->ri_id->qp,
-					seg->mr_chunk.rl_mw->r.mw, &param);
-		if (rc) {
-			dprintk("RPC:       %s: failed ib_bind_mw "
-				"%u@0x%llx status %i\n",
-				__func__, seg->mr_len,
-				(unsigned long long)seg->mr_dma, rc);
-			rpcrdma_unmap_one(ia, seg);
-		} else {
-			seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
-			seg->mr_base = param.addr;
-			seg->mr_nsegs = 1;
-			nsegs = 1;
-		}
-		}
+		rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
 		break;
 
 	/* Default registration each time */
 	default:
-		{
-		struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
-		int len = 0;
-		if (nsegs > RPCRDMA_MAX_DATA_SEGS)
-			nsegs = RPCRDMA_MAX_DATA_SEGS;
-		for (i = 0; i < nsegs;) {
-			rpcrdma_map_one(ia, seg, writing);
-			ipb[i].addr = seg->mr_dma;
-			ipb[i].size = seg->mr_len;
-			len += seg->mr_len;
-			++seg;
-			++i;
-			/* Check for holes */
-			if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
-			    offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
-				break;
-		}
-		nsegs = i;
-		seg1->mr_base = seg1->mr_dma;
-		seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
-					ipb, nsegs, mem_priv, &seg1->mr_base);
-		if (IS_ERR(seg1->mr_chunk.rl_mr)) {
-			rc = PTR_ERR(seg1->mr_chunk.rl_mr);
-			dprintk("RPC:       %s: failed ib_reg_phys_mr "
-				"%u@0x%llx (%d)... status %i\n",
-				__func__, len,
-				(unsigned long long)seg1->mr_dma, nsegs, rc);
-			while (nsegs--)
-				rpcrdma_unmap_one(ia, --seg);
-		} else {
-			seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
-			seg1->mr_nsegs = nsegs;
-			seg1->mr_len = len;
-		}
-		}
+		rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
 		break;
 	}
 	if (rc)
@@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
 		struct rpcrdma_xprt *r_xprt, void *r)
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-	struct rpcrdma_mr_seg *seg1 = seg;
 	int nsegs = seg->mr_nsegs, rc;
 
 	switch (ia->ri_memreg_strategy) {
@@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
 		break;
 #endif
 
+	case RPCRDMA_FRMR:
+		rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
+		break;
+
 	case RPCRDMA_MTHCAFMR:
-		{
-		LIST_HEAD(l);
-		list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l);
-		rc = ib_unmap_fmr(&l);
-		while (seg1->mr_nsegs--)
-			rpcrdma_unmap_one(ia, seg++);
-		}
-		if (rc)
-			dprintk("RPC:       %s: failed ib_unmap_fmr,"
-				" status %i\n", __func__, rc);
+		rc = rpcrdma_deregister_fmr_external(seg, ia);
 		break;
 
 	case RPCRDMA_MEMWINDOWS_ASYNC:
 	case RPCRDMA_MEMWINDOWS:
-		{
-		struct ib_mw_bind param;
-		BUG_ON(nsegs != 1);
-		param.mr = ia->ri_bind_mem;
-		param.addr = 0ULL;	/* unbind */
-		param.length = 0;
-		param.mw_access_flags = 0;
-		if (r) {
-			param.wr_id = (u64) (unsigned long) r;
-			param.send_flags = IB_SEND_SIGNALED;
-			INIT_CQCOUNT(&r_xprt->rx_ep);
-		} else {
-			param.wr_id = 0ULL;
-			param.send_flags = 0;
-			DECR_CQCOUNT(&r_xprt->rx_ep);
-		}
-		rc = ib_bind_mw(ia->ri_id->qp,
-				seg->mr_chunk.rl_mw->r.mw, &param);
-		rpcrdma_unmap_one(ia, seg);
-		}
-		if (rc)
-			dprintk("RPC:       %s: failed ib_(un)bind_mw,"
-				" status %i\n", __func__, rc);
-		else
-			r = NULL;	/* will upcall on completion */
+		rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r);
 		break;
 
 	default:
-		rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
-		seg1->mr_chunk.rl_mr = NULL;
-		while (seg1->mr_nsegs--)
-			rpcrdma_unmap_one(ia, seg++);
-		if (rc)
-			dprintk("RPC:       %s: failed ib_dereg_mr,"
-				" status %i\n", __func__, rc);
+		rc = rpcrdma_deregister_default_external(seg, ia);
 		break;
 	}
 	if (r) {
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 2427822f8bd4..c7a7eba991bc 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -51,6 +51,9 @@
 #include <linux/sunrpc/rpc_rdma.h> 	/* RPC/RDMA protocol */
 #include <linux/sunrpc/xprtrdma.h> 	/* xprt parameters */
 
+#define RDMA_RESOLVE_TIMEOUT	(5000)	/* 5 seconds */
+#define RDMA_CONNECT_RETRY_MAX	(2)	/* retries if no listener backlog */
+
 /*
  * Interface Adapter -- one per transport instance
  */
@@ -58,6 +61,8 @@ struct rpcrdma_ia {
 	struct rdma_cm_id 	*ri_id;
 	struct ib_pd		*ri_pd;
 	struct ib_mr		*ri_bind_mem;
+	u32			ri_dma_lkey;
+	int			ri_have_dma_lkey;
 	struct completion	ri_done;
 	int			ri_async_rc;
 	enum rpcrdma_memreg	ri_memreg_strategy;
@@ -156,6 +161,10 @@ struct rpcrdma_mr_seg {		/* chunk descriptors */
 			union {
 				struct ib_mw	*mw;
 				struct ib_fmr	*fmr;
+				struct {
+					struct ib_fast_reg_page_list *fr_pgl;
+					struct ib_mr *fr_mr;
+				} frmr;
 			} r;
 			struct list_head mw_list;
 		} *rl_mw;
@@ -175,6 +184,7 @@ struct rpcrdma_req {
 	size_t 		rl_size;	/* actual length of buffer */
 	unsigned int	rl_niovs;	/* 0, 2 or 4 */
 	unsigned int	rl_nchunks;	/* non-zero if chunks */
+	unsigned int	rl_connect_cookie;	/* retry detection */
 	struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
 	struct rpcrdma_rep	*rl_reply;/* holder for reply buffer */
 	struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
@@ -198,7 +208,7 @@ struct rpcrdma_buffer {
 	atomic_t	rb_credits;	/* most recent server credits */
 	unsigned long	rb_cwndscale;	/* cached framework rpc_cwndscale */
 	int		rb_max_requests;/* client max requests */
-	struct list_head rb_mws;	/* optional memory windows/fmrs */
+	struct list_head rb_mws;	/* optional memory windows/fmrs/frmrs */
 	int		rb_send_index;
 	struct rpcrdma_req	**rb_send_bufs;
 	int		rb_recv_index;
@@ -273,6 +283,11 @@ struct rpcrdma_xprt {
 #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
 #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
 
+/* Setting this to 0 ensures interoperability with early servers.
+ * Setting this to 1 enhances certain unaligned read/write performance.
+ * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
+extern int xprt_rdma_pad_optimize;
+
 /*
  * Interface Adapter calls - xprtrdma/verbs.c
  */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4486c59c3aca..9a288d5eea64 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -3,8 +3,8 @@
  *
  * Client-side transport implementation for sockets.
  *
- * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com>
- * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com>
+ * TCP callback races fixes (C) 1998 Red Hat
+ * TCP send fixes (C) 1998 Red Hat
  * TCP NFS related read + write fixes
  *  (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
  *
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 015606b54d9b..c647aab8d418 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1,7 +1,7 @@
 /*
  * NET4:	Implementation of BSD Unix domain sockets.
  *
- * Authors:	Alan Cox, <alan.cox@linux.org>
+ * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5cadbeb76a14..5031db7b275b 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -13,7 +13,6 @@
 #include <linux/debugfs.h>
 #include <linux/notifier.h>
 #include <linux/device.h>
-#include <linux/list.h>
 #include <net/genetlink.h>
 #include <net/cfg80211.h>
 #include <net/wireless.h>
@@ -185,7 +184,8 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
 	if (result)
 		goto out_unlock;
 
-	if (!debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
+	if (rdev->wiphy.debugfsdir &&
+	    !debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
 			    rdev->wiphy.debugfsdir,
 			    rdev->wiphy.debugfsdir->d_parent,
 			    newname))
@@ -318,6 +318,8 @@ int wiphy_register(struct wiphy *wiphy)
 	drv->wiphy.debugfsdir =
 		debugfs_create_dir(wiphy_name(&drv->wiphy),
 				   ieee80211_debugfs_dir);
+	if (IS_ERR(drv->wiphy.debugfsdir))
+		drv->wiphy.debugfsdir = NULL;
 
 	res = 0;
 out_unlock: