53 files changed, 774 insertions, 252 deletions
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 198a640d53a..a0874cc1f71 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
 	util.o \
 	protocol.o \
 	trans_fd.o \
+	trans_common.o \
 
 9pnet_virtio-objs := \
 	trans_virtio.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index a848bca9fbf..347ec0cd271 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -229,10 +229,23 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 			return ERR_PTR(-ENOMEM);
 		}
 		init_waitqueue_head(req->wq);
-		req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
-								GFP_KERNEL);
-		req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
-								GFP_KERNEL);
+		if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+				P9_TRANS_PREF_PAYLOAD_SEP) {
+			int alloc_msize = min(c->msize, 4096);
+			req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
+					GFP_KERNEL);
+			req->tc->capacity = alloc_msize;
+			req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
+					GFP_KERNEL);
+			req->rc->capacity = alloc_msize;
+		} else {
+			req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+					GFP_KERNEL);
+			req->tc->capacity = c->msize;
+			req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+					GFP_KERNEL);
+			req->rc->capacity = c->msize;
+		}
 		if ((!req->tc) || (!req->rc)) {
 			printk(KERN_ERR "Couldn't grow tag array\n");
 			kfree(req->tc);
@@ -243,9 +256,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 			return ERR_PTR(-ENOMEM);
 		}
 		req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
-		req->tc->capacity = c->msize;
 		req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
-		req->rc->capacity = c->msize;
 	}
 
 	p9pdu_reset(req->tc);
@@ -443,6 +454,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
 {
 	int8_t type;
 	int err;
+	int ecode;
 
 	err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
 	if (err) {
@@ -450,36 +462,53 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
 		return err;
 	}
 
-	if (type == P9_RERROR || type == P9_RLERROR) {
-		int ecode;
-
-		if (!p9_is_proto_dotl(c)) {
-			char *ename;
+	if (type != P9_RERROR && type != P9_RLERROR)
+		return 0;
 
-			err = p9pdu_readf(req->rc, c->proto_version, "s?d",
-								&ename, &ecode);
-			if (err)
-				goto out_err;
+	if (!p9_is_proto_dotl(c)) {
+		char *ename;
+
+		if (req->tc->pbuf_size) {
+			/* Handle user buffers */
+			size_t len = req->rc->size - req->rc->offset;
+			if (req->tc->pubuf) {
+				/* User Buffer */
+				err = copy_from_user(
+					&req->rc->sdata[req->rc->offset],
+					req->tc->pubuf, len);
+				if (err) {
+					err = -EFAULT;
+					goto out_err;
+				}
+			} else {
+				/* Kernel Buffer */
+				memmove(&req->rc->sdata[req->rc->offset],
+						req->tc->pkbuf, len);
+			}
+		}
+		err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+				&ename, &ecode);
+		if (err)
+			goto out_err;
 
-			if (p9_is_proto_dotu(c))
-				err = -ecode;
+		if (p9_is_proto_dotu(c))
+			err = -ecode;
 
-			if (!err || !IS_ERR_VALUE(err)) {
-				err = p9_errstr2errno(ename, strlen(ename));
+		if (!err || !IS_ERR_VALUE(err)) {
+			err = p9_errstr2errno(ename, strlen(ename));
 
-				P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
+			P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode,
+					ename);
 
-				kfree(ename);
-			}
-		} else {
-			err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
-			err = -ecode;
-
-			P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+			kfree(ename);
 		}
+	} else {
+		err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+		err = -ecode;
+
+		P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+	}
 
-	} else
-		err = 0;
 
 	return err;
 
@@ -1191,6 +1220,27 @@ error:
 }
 EXPORT_SYMBOL(p9_client_fsync);
 
+int p9_client_sync_fs(struct p9_fid *fid)
+{
+	int err = 0;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid);
+
+	clnt = fid->clnt;
+	req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid);
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_sync_fs);
+
 int p9_client_clunk(struct p9_fid *fid)
 {
 	int err;
@@ -1270,7 +1320,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
 	if (count < rsize)
 		rsize = count;
 
-	req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize);
+	/* Don't bother zerocopy form small IO (< 1024) */
+	if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+			P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
+		req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset,
+				rsize, data, udata);
+	} else {
+		req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
+				rsize);
+	}
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
@@ -1284,13 +1342,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
 
-	if (data) {
-		memmove(data, dataptr, count);
-	} else {
-		err = copy_to_user(udata, dataptr, count);
-		if (err) {
-			err = -EFAULT;
-			goto free_and_error;
+	if (!req->tc->pbuf_size) {
+		if (data) {
+			memmove(data, dataptr, count);
+		} else {
+			err = copy_to_user(udata, dataptr, count);
+			if (err) {
+				err = -EFAULT;
+				goto free_and_error;
+			}
 		}
 	}
 	p9_free_req(clnt, req);
@@ -1323,12 +1383,21 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 
 	if (count < rsize)
 		rsize = count;
-	if (data)
-		req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset,
-								rsize, data);
-	else
-		req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset,
-								rsize, udata);
+
+	/* Don't bother zerocopy form small IO (< 1024) */
+	if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+				P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
+		req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset,
+				rsize, data, udata);
+	} else {
+
+		if (data)
+			req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid,
+					offset, rsize, data);
+		else
+			req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid,
+					offset, rsize, udata);
+	}
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
@@ -1716,7 +1785,14 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 	if (count < rsize)
 		rsize = count;
 
-	req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize);
+	if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+			P9_TRANS_PREF_PAYLOAD_SEP) {
+		req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid,
+				offset, rsize, data);
+	} else {
+		req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid,
+				offset, rsize);
+	}
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
@@ -1730,7 +1806,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
 
-	if (data)
+	if (!req->tc->pbuf_size && data)
 		memmove(data, dataptr, count);
 
 	p9_free_req(clnt, req);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 1e308f21092..2ce515b859b 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -114,6 +114,26 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
 	return size - len;
 }
 
+static size_t
+pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata,
+		size_t size)
+{
+	BUG_ON(pdu->size > P9_IOHDRSZ);
+	pdu->pubuf = (char __user *)udata;
+	pdu->pkbuf = (char *)kdata;
+	pdu->pbuf_size = size;
+	return 0;
+}
+
+static size_t
+pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size)
+{
+	BUG_ON(pdu->size > P9_READDIRHDRSZ);
+	pdu->pkbuf = (char *)kdata;
+	pdu->pbuf_size = size;
+	return 0;
+}
+
 /*
 	b - int8_t
 	w - int16_t
@@ -445,6 +465,25 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 					errcode = -EFAULT;
 			}
 			break;
+		case 'E':{
+				 int32_t cnt = va_arg(ap, int32_t);
+				 const char *k = va_arg(ap, const void *);
+				 const char *u = va_arg(ap, const void *);
+				 errcode = p9pdu_writef(pdu, proto_version, "d",
+						 cnt);
+				 if (!errcode && pdu_write_urw(pdu, k, u, cnt))
+					errcode = -EFAULT;
+			 }
+			 break;
+		case 'F':{
+				 int32_t cnt = va_arg(ap, int32_t);
+				 const char *k = va_arg(ap, const void *);
+				 errcode = p9pdu_writef(pdu, proto_version, "d",
+						 cnt);
+				 if (!errcode && pdu_write_readdir(pdu, k, cnt))
+					errcode = -EFAULT;
+			 }
+			 break;
 		case 'U':{
 				int32_t count = va_arg(ap, int32_t);
 				const char __user *udata =
@@ -579,6 +618,7 @@ EXPORT_SYMBOL(p9stat_read);
 
 int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
 {
+	pdu->id = type;
 	return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
 }
 
@@ -606,6 +646,10 @@ void p9pdu_reset(struct p9_fcall *pdu)
 {
 	pdu->offset = 0;
 	pdu->size = 0;
+	pdu->private = NULL;
+	pdu->pubuf = NULL;
+	pdu->pkbuf = NULL;
+	pdu->pbuf_size = 0;
 }
 
 int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
new file mode 100644
index 00000000000..d62b9aa58df
--- /dev/null
+++ b/net/9p/trans_common.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include <linux/scatterlist.h>
+#include "trans_common.h"
+
+/**
+ *  p9_release_req_pages - Release pages after the transaction.
+ *  @*private: PDU's private page of struct trans_rpage_info
+ */
+void
+p9_release_req_pages(struct trans_rpage_info *rpinfo)
+{
+	int i = 0;
+
+	while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) {
+		put_page(rpinfo->rp_data[i]);
+		i++;
+	}
+}
+EXPORT_SYMBOL(p9_release_req_pages);
+
+/**
+ * p9_nr_pages - Return number of pages needed to accomodate the payload.
+ */
+int
+p9_nr_pages(struct p9_req_t *req)
+{
+	int start_page, end_page;
+	start_page =  (unsigned long long)req->tc->pubuf >> PAGE_SHIFT;
+	end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size +
+			PAGE_SIZE - 1) >> PAGE_SHIFT;
+	return end_page - start_page;
+}
+EXPORT_SYMBOL(p9_nr_pages);
+
+/**
+ * payload_gup - Translates user buffer into kernel pages and
+ * pins them either for read/write through get_user_pages_fast().
+ * @req: Request to be sent to server.
+ * @pdata_off: data offset into the first page after translation (gup).
+ * @pdata_len: Total length of the IO. gup may not return requested # of pages.
+ * @nr_pages: number of pages to accomodate the payload
+ * @rw: Indicates if the pages are for read or write.
+ */
+int
+p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
+		int nr_pages, u8 rw)
+{
+	uint32_t first_page_bytes = 0;
+	uint32_t pdata_mapped_pages;
+	struct trans_rpage_info  *rpinfo;
+
+	*pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1);
+
+	if (*pdata_off)
+		first_page_bytes = min((PAGE_SIZE - *pdata_off),
+				req->tc->pbuf_size);
+
+	rpinfo = req->tc->private;
+	pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
+			nr_pages, rw, &rpinfo->rp_data[0]);
+
+	if (pdata_mapped_pages < 0) {
+		printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p"
+				"nr_pages:%d\n", pdata_mapped_pages,
+				req->tc->pubuf, nr_pages);
+		pdata_mapped_pages = 0;
+		return -EIO;
+	}
+	rpinfo->rp_nr_pages = pdata_mapped_pages;
+	if (*pdata_off) {
+		*pdata_len = first_page_bytes;
+		*pdata_len += min((req->tc->pbuf_size - *pdata_len),
+				((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT);
+	} else {
+		*pdata_len = min(req->tc->pbuf_size,
+				(size_t)pdata_mapped_pages << PAGE_SHIFT);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(p9_payload_gup);
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
new file mode 100644
index 00000000000..76309223bb0
--- /dev/null
+++ b/net/9p/trans_common.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/* TRUE if it is user context */
+#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS))
+
+/**
+ * struct trans_rpage_info - To store mapped page information in PDU.
+ * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu.
+ * @rp_nr_pages: Number of mapped pages
+ * @rp_data: Array of page pointers
+ */
+struct trans_rpage_info {
+	u8 rp_alloc;
+	int rp_nr_pages;
+	struct page *rp_data[0];
+};
+
+void p9_release_req_pages(struct trans_rpage_info *);
+int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8);
+int p9_nr_pages(struct p9_req_t *);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 078eb162d9b..a30471e5174 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -153,10 +153,11 @@ struct p9_conn {
 	unsigned long wsched;
 };
 
+static void p9_poll_workfn(struct work_struct *work);
+
 static DEFINE_SPINLOCK(p9_poll_lock);
 static LIST_HEAD(p9_poll_pending_list);
-static struct workqueue_struct *p9_mux_wq;
-static struct task_struct *p9_poll_task;
+static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
 
 static void p9_mux_poll_stop(struct p9_conn *m)
 {
@@ -384,7 +385,7 @@ static void p9_read_work(struct work_struct *work)
 
 		if (n & POLLIN) {
 			P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
-			queue_work(p9_mux_wq, &m->rq);
+			schedule_work(&m->rq);
 		} else
 			clear_bit(Rworksched, &m->wsched);
 	} else
@@ -497,7 +498,7 @@ static void p9_write_work(struct work_struct *work)
 
 		if (n & POLLOUT) {
 			P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
-			queue_work(p9_mux_wq, &m->wq);
+			schedule_work(&m->wq);
 		} else
 			clear_bit(Wworksched, &m->wsched);
 	} else
@@ -516,15 +517,14 @@ static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 		container_of(wait, struct p9_poll_wait, wait);
 	struct p9_conn *m = pwait->conn;
 	unsigned long flags;
-	DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
 
 	spin_lock_irqsave(&p9_poll_lock, flags);
 	if (list_empty(&m->poll_pending_link))
 		list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
 	spin_unlock_irqrestore(&p9_poll_lock, flags);
 
-	/* perform the default wake up operation */
-	return default_wake_function(&dummy_wait, mode, sync, key);
+	schedule_work(&p9_poll_work);
+	return 1;
 }
 
 /**
@@ -629,7 +629,7 @@ static void p9_poll_mux(struct p9_conn *m)
 		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
 		if (!test_and_set_bit(Rworksched, &m->wsched)) {
 			P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
-			queue_work(p9_mux_wq, &m->rq);
+			schedule_work(&m->rq);
 		}
 	}
 
@@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m)
 		if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
 		    !test_and_set_bit(Wworksched, &m->wsched)) {
 			P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
-			queue_work(p9_mux_wq, &m->wq);
+			schedule_work(&m->wq);
 		}
 	}
 }
@@ -677,7 +677,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
 		n = p9_fd_poll(m->client, NULL);
 
 	if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
-		queue_work(p9_mux_wq, &m->wq);
+		schedule_work(&m->wq);
 
 	return 0;
 }
@@ -1047,12 +1047,12 @@ static struct p9_trans_module p9_fd_trans = {
  *
  */
 
-static int p9_poll_proc(void *a)
+static void p9_poll_workfn(struct work_struct *work)
 {
 	unsigned long flags;
 
 	P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current);
- repeat:
+
 	spin_lock_irqsave(&p9_poll_lock, flags);
 	while (!list_empty(&p9_poll_pending_list)) {
 		struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
@@ -1067,35 +1067,11 @@ static int p9_poll_proc(void *a)
 	}
 	spin_unlock_irqrestore(&p9_poll_lock, flags);
 
-	set_current_state(TASK_INTERRUPTIBLE);
-	if (list_empty(&p9_poll_pending_list)) {
-		P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n");
-		schedule();
-	}
-	__set_current_state(TASK_RUNNING);
-
-	if (!kthread_should_stop())
-		goto repeat;
-
 	P9_DPRINTK(P9_DEBUG_TRANS, "finish\n");
-	return 0;
 }
 
 int p9_trans_fd_init(void)
 {
-	p9_mux_wq = create_workqueue("v9fs");
-	if (!p9_mux_wq) {
-		printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
-		return -ENOMEM;
-	}
-
-	p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll");
-	if (IS_ERR(p9_poll_task)) {
-		destroy_workqueue(p9_mux_wq);
-		printk(KERN_WARNING "v9fs: mux: creating poll task failed\n");
-		return PTR_ERR(p9_poll_task);
-	}
-
 	v9fs_register_trans(&p9_tcp_trans);
 	v9fs_register_trans(&p9_unix_trans);
 	v9fs_register_trans(&p9_fd_trans);
@@ -1105,10 +1081,8 @@ int p9_trans_fd_init(void)
 
 void p9_trans_fd_exit(void)
 {
-	kthread_stop(p9_poll_task);
+	flush_work_sync(&p9_poll_work);
 	v9fs_unregister_trans(&p9_tcp_trans);
 	v9fs_unregister_trans(&p9_unix_trans);
 	v9fs_unregister_trans(&p9_fd_trans);
-
-	destroy_workqueue(p9_mux_wq);
 }
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index c8f3f72ab20..9b550ed9c71 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -45,6 +45,7 @@
 #include <linux/scatterlist.h>
 #include <linux/virtio.h>
 #include <linux/virtio_9p.h>
+#include "trans_common.h"
 
 #define VIRTQUEUE_NUM	128
 
@@ -155,6 +156,14 @@ static void req_done(struct virtqueue *vq)
 					rc->tag);
 			req = p9_tag_lookup(chan->client, rc->tag);
 			req->status = REQ_STATUS_RCVD;
+			if (req->tc->private) {
+				struct trans_rpage_info *rp = req->tc->private;
+				/*Release pages */
+				p9_release_req_pages(rp);
+				if (rp->rp_alloc)
+					kfree(rp);
+				req->tc->private = NULL;
+			}
 			p9_client_cb(chan->client, req);
 		} else {
 			spin_unlock_irqrestore(&chan->lock, flags);
@@ -203,6 +212,38 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
 }
 
 /**
+ * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
+ * this takes a list of pages.
+ * @sg: scatter/gather list to pack into
+ * @start: which segment of the sg_list to start at
+ * @pdata_off: Offset into the first page
+ * @**pdata: a list of pages to add into sg.
+ * @count: amount of data to pack into the scatter/gather list
+ */
+static int
+pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
+		struct page **pdata, int count)
+{
+	int s;
+	int i = 0;
+	int index = start;
+
+	if (pdata_off) {
+		s = min((int)(PAGE_SIZE - pdata_off), count);
+		sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
+		count -= s;
+	}
+
+	while (count) {
+		BUG_ON(index > limit);
+		s = min((int)PAGE_SIZE, count);
+		sg_set_page(&sg[index++], pdata[i++], s, 0);
+		count -= s;
+	}
+	return index-start;
+}
+
+/**
  * p9_virtio_request - issue a request
  * @client: client instance issuing the request
  * @req: request to be issued
@@ -212,22 +253,97 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
 static int
 p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 {
-	int in, out;
+	int in, out, inp, outp;
 	struct virtio_chan *chan = client->trans;
 	char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
 	unsigned long flags;
-	int err;
+	size_t pdata_off = 0;
+	struct trans_rpage_info *rpinfo = NULL;
+	int err, pdata_len = 0;
 
 	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
 
 req_retry:
 	req->status = REQ_STATUS_SENT;
 
+	if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
+		int nr_pages = p9_nr_pages(req);
+		int rpinfo_size = sizeof(struct trans_rpage_info) +
+			sizeof(struct page *) * nr_pages;
+
+		if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
+			/* We can use sdata */
+			req->tc->private = req->tc->sdata + req->tc->size;
+			rpinfo = (struct trans_rpage_info *)req->tc->private;
+			rpinfo->rp_alloc = 0;
+		} else {
+			req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
+			if (!req->tc->private) {
+				P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
+					"private kmalloc returned NULL");
+				return -ENOMEM;
+			}
+			rpinfo = (struct trans_rpage_info *)req->tc->private;
+			rpinfo->rp_alloc = 1;
+		}
+
+		err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
+				req->tc->id == P9_TREAD ? 1 : 0);
+		if (err < 0) {
+			if (rpinfo->rp_alloc)
+				kfree(rpinfo);
+			return err;
+		}
+	}
+
 	spin_lock_irqsave(&chan->lock, flags);
+
+	/* Handle out VirtIO ring buffers */
 	out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
-								req->tc->size);
-	in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata,
-								client->msize);
+			req->tc->size);
+
+	if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
+		/* We have additional write payload buffer to take care */
+		if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
+			outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
+					pdata_off, rpinfo->rp_data, pdata_len);
+		} else {
+			char *pbuf = req->tc->pubuf ? req->tc->pubuf :
+								req->tc->pkbuf;
+			outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
+					req->tc->pbuf_size);
+		}
+		out += outp;
+	}
+
+	/* Handle in VirtIO ring buffers */
+	if (req->tc->pbuf_size &&
+		((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
+		/*
+		 * Take care of additional Read payload.
+		 * 11 is the read/write header = PDU Header(7) + IO Size (4).
+		 * Arrange in such a way that server places header in the
+		 * alloced memory and payload onto the user buffer.
+		 */
+		inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11);
+		/*
+		 * Running executables in the filesystem may result in
+		 * a read request with kernel buffer as opposed to user buffer.
+		 */
+		if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
+			in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
+					pdata_off, rpinfo->rp_data, pdata_len);
+		} else {
+			char *pbuf = req->tc->pubuf ? req->tc->pubuf :
+								req->tc->pkbuf;
+			in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
+					pbuf, req->tc->pbuf_size);
+		}
+		in += inp;
+	} else {
+		in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata,
+				client->msize);
+	}
 
 	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
 	if (err < 0) {
@@ -246,6 +362,8 @@ req_retry:
 			P9_DPRINTK(P9_DEBUG_TRANS,
 					"9p debug: "
 					"virtio rpc add_buf returned failure");
+			if (rpinfo && rpinfo->rp_alloc)
+				kfree(rpinfo);
 			return -EIO;
 		}
 	}
@@ -448,6 +566,7 @@ static struct p9_trans_module p9_virtio_trans = {
 	.request = p9_virtio_request,
 	.cancel = p9_virtio_cancel,
 	.maxsize = PAGE_SIZE*16,
+	.pref = P9_TRANS_PREF_PAYLOAD_SEP,
 	.def = 0,
 	.owner = THIS_MODULE,
 };
diff --git a/net/Makefile b/net/Makefile
index a3330ebe2c5..a51d9465e62 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -19,9 +19,7 @@ obj-$(CONFIG_NETFILTER)		+= netfilter/
 obj-$(CONFIG_INET)		+= ipv4/
 obj-$(CONFIG_XFRM)		+= xfrm/
 obj-$(CONFIG_UNIX)		+= unix/
-ifneq ($(CONFIG_IPV6),)
-obj-y				+= ipv6/
-endif
+obj-$(CONFIG_NET)		+= ipv6/
 obj-$(CONFIG_PACKET)		+= packet/
 obj-$(CONFIG_NET_KEY)		+= key/
 obj-$(CONFIG_BRIDGE)		+= bridge/
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index b1805ff9541..c258796313e 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -727,7 +727,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
 			break;
 		}
 
+		tty_unlock();
 		schedule();
+		tty_lock();
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&dev->wait, &wait);
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 9190ae462cb..6dee7bf648a 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -6,6 +6,7 @@ config BRIDGE
 	tristate "802.1d Ethernet Bridging"
 	select LLC
 	select STP
+	depends on IPV6 || IPV6=n
 	---help---
 	  If you say Y here, then your Linux box will be able to act as an
 	  Ethernet bridge, which means that the different Ethernet segments it
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 09d5c098792..030a002ff8e 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -37,10 +37,9 @@
 	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static inline int ipv6_is_local_multicast(const struct in6_addr *addr)
+static inline int ipv6_is_transient_multicast(const struct in6_addr *addr)
 {
-	if (ipv6_addr_is_multicast(addr) &&
-	    IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL)
+	if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr))
 		return 1;
 	return 0;
 }
@@ -435,7 +434,6 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	eth = eth_hdr(skb);
 
 	memcpy(eth->h_source, br->dev->dev_addr, 6);
-	ipv6_eth_mc_map(group, eth->h_dest);
 	eth->h_proto = htons(ETH_P_IPV6);
 	skb_put(skb, sizeof(*eth));
 
@@ -447,8 +445,10 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	ip6h->payload_len = htons(8 + sizeof(*mldq));
 	ip6h->nexthdr = IPPROTO_HOPOPTS;
 	ip6h->hop_limit = 1;
-	ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0);
+	ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
+			   &ip6h->saddr);
 	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
+	ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
 	hopopt = (u8 *)(ip6h + 1);
 	hopopt[0] = IPPROTO_ICMPV6;		/* next hdr */
@@ -780,11 +780,11 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
 {
 	struct br_ip br_group;
 
-	if (ipv6_is_local_multicast(group))
+	if (!ipv6_is_transient_multicast(group))
 		return 0;
 
 	ipv6_addr_copy(&br_group.u.ip6, group);
-	br_group.proto = htons(ETH_P_IP);
+	br_group.proto = htons(ETH_P_IPV6);
 
 	return br_multicast_add_group(br, port, &br_group);
 }
@@ -1013,18 +1013,19 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 
 		nsrcs = skb_header_pointer(skb,
 					   len + offsetof(struct mld2_grec,
-							  grec_mca),
+							  grec_nsrcs),
 					   sizeof(_nsrcs), &_nsrcs);
 		if (!nsrcs)
 			return -EINVAL;
 
 		if (!pskb_may_pull(skb,
 				   len + sizeof(*grec) +
-				   sizeof(struct in6_addr) * (*nsrcs)))
+				   sizeof(struct in6_addr) * ntohs(*nsrcs)))
 			return -EINVAL;
 
 		grec = (struct mld2_grec *)(skb->data + len);
-		len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs);
+		len += sizeof(*grec) +
+		       sizeof(struct in6_addr) * ntohs(*nsrcs);
 
 		/* We treat these as MLDv1 reports for now. */
 		switch (grec->grec_type) {
@@ -1340,7 +1341,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
 {
 	struct br_ip br_group;
 
-	if (ipv6_is_local_multicast(group))
+	if (!ipv6_is_transient_multicast(group))
 		return;
 
 	ipv6_addr_copy(&br_group.u.ip6, group);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 35b36b86d76..05f357828a2 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -336,7 +336,6 @@ static void reset_connection(struct ceph_connection *con)
 		ceph_msg_put(con->out_msg);
 		con->out_msg = NULL;
 	}
-	con->out_keepalive_pending = false;
 	con->in_seq = 0;
 	con->in_seq_acked = 0;
 }
@@ -1248,8 +1247,6 @@ static int process_connect(struct ceph_connection *con)
 		     con->auth_retry);
 		if (con->auth_retry == 2) {
 			con->error_msg = "connect authorization failure";
-			reset_connection(con);
-			set_bit(CLOSED, &con->state);
 			return -1;
 		}
 		con->auth_retry = 1;
@@ -1715,14 +1712,6 @@ more:
 
 	/* open the socket first? */
 	if (con->sock == NULL) {
-		/*
-		 * if we were STANDBY and are reconnecting _this_
-		 * connection, bump connect_seq now.  Always bump
-		 * global_seq.
-		 */
-		if (test_and_clear_bit(STANDBY, &con->state))
-			con->connect_seq++;
-
 		prepare_write_banner(msgr, con);
 		prepare_write_connect(msgr, con, 1);
 		prepare_read_banner(con);
@@ -1951,7 +1940,24 @@ static void con_work(struct work_struct *work)
 						   work.work);
 
 	mutex_lock(&con->mutex);
+	if (test_and_clear_bit(BACKOFF, &con->state)) {
+		dout("con_work %p backing off\n", con);
+		if (queue_delayed_work(ceph_msgr_wq, &con->work,
+				       round_jiffies_relative(con->delay))) {
+			dout("con_work %p backoff %lu\n", con, con->delay);
+			mutex_unlock(&con->mutex);
+			return;
+		} else {
+			con->ops->put(con);
+			dout("con_work %p FAILED to back off %lu\n", con,
+			     con->delay);
+		}
+	}
 
+	if (test_bit(STANDBY, &con->state)) {
+		dout("con_work %p STANDBY\n", con);
+		goto done;
+	}
 	if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
 		dout("con_work CLOSED\n");
 		con_close_socket(con);
@@ -2008,10 +2014,12 @@ static void ceph_fault(struct ceph_connection *con)
 	/* Requeue anything that hasn't been acked */
 	list_splice_init(&con->out_sent, &con->out_queue);
 
-	/* If there are no messages in the queue, place the connection
-	 * in a STANDBY state (i.e., don't try to reconnect just yet). */
-	if (list_empty(&con->out_queue) && !con->out_keepalive_pending) {
-		dout("fault setting STANDBY\n");
+	/* If there are no messages queued or keepalive pending, place
+	 * the connection in a STANDBY state */
+	if (list_empty(&con->out_queue) &&
+	    !test_bit(KEEPALIVE_PENDING, &con->state)) {
+		dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
+		clear_bit(WRITE_PENDING, &con->state);
 		set_bit(STANDBY, &con->state);
 	} else {
 		/* retry after a delay. */
@@ -2019,11 +2027,24 @@ static void ceph_fault(struct ceph_connection *con)
 			con->delay = BASE_DELAY_INTERVAL;
 		else if (con->delay < MAX_DELAY_INTERVAL)
 			con->delay *= 2;
-		dout("fault queueing %p delay %lu\n", con, con->delay);
 		con->ops->get(con);
 		if (queue_delayed_work(ceph_msgr_wq, &con->work,
-				       round_jiffies_relative(con->delay)) == 0)
+				       round_jiffies_relative(con->delay))) {
+			dout("fault queued %p delay %lu\n", con, con->delay);
+		} else {
 			con->ops->put(con);
+			dout("fault failed to queue %p delay %lu, backoff\n",
+			     con, con->delay);
+			/*
+			 * In many cases we see a socket state change
+			 * while con_work is running and end up
+			 * queuing (non-delayed) work, such that we
+			 * can't backoff with a delay.  Set a flag so
+			 * that when con_work restarts we schedule the
+			 * delay then.
+			 */
+			set_bit(BACKOFF, &con->state);
+		}
 	}
 
 out_unlock:
@@ -2094,6 +2115,19 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr)
 }
 EXPORT_SYMBOL(ceph_messenger_destroy);
 
+static void clear_standby(struct ceph_connection *con)
+{
+	/* come back from STANDBY? */
+	if (test_and_clear_bit(STANDBY, &con->state)) {
+		mutex_lock(&con->mutex);
+		dout("clear_standby %p and ++connect_seq\n", con);
+		con->connect_seq++;
+		WARN_ON(test_bit(WRITE_PENDING, &con->state));
+		WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
+		mutex_unlock(&con->mutex);
+	}
+}
+
 /*
  * Queue up an outgoing message on the given connection.
  */
@@ -2126,6 +2160,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
 
 	/* if there wasn't anything waiting to send before, queue
 	 * new work */
+	clear_standby(con);
 	if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
 		queue_con(con);
 }
@@ -2191,6 +2226,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
  */
 void ceph_con_keepalive(struct ceph_connection *con)
 {
+	dout("con_keepalive %p\n", con);
+	clear_standby(con);
 	if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
 	    test_and_set_bit(WRITE_PENDING, &con->state) == 0)
 		queue_con(con);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 1a040e64c69..cd9c21df87d 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -16,22 +16,30 @@ struct page **ceph_get_direct_page_vector(const char __user *data,
 					  int num_pages, bool write_page)
 {
 	struct page **pages;
-	int rc;
+	int got = 0;
+	int rc = 0;
 
 	pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
 	if (!pages)
 		return ERR_PTR(-ENOMEM);
 
 	down_read(&current->mm->mmap_sem);
-	rc = get_user_pages(current, current->mm, (unsigned long)data,
-			    num_pages, write_page, 0, pages, NULL);
+	while (got < num_pages) {
+		rc = get_user_pages(current, current->mm,
+		    (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
+		    num_pages - got, write_page, 0, pages + got, NULL);
+		if (rc < 0)
+			break;
+		BUG_ON(rc == 0);
+		got += rc;
+	}
 	up_read(&current->mm->mmap_sem);
-	if (rc < num_pages)
+	if (rc < 0)
 		goto fail;
 	return pages;
 
 fail:
-	ceph_put_page_vector(pages, rc > 0 ? rc : 0, false);
+	ceph_put_page_vector(pages, got, false);
 	return ERR_PTR(rc);
 }
 EXPORT_SYMBOL(ceph_get_direct_page_vector);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ae6631abcc..6561021d22d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1114,13 +1114,21 @@ EXPORT_SYMBOL(netdev_bonding_change);
 void dev_load(struct net *net, const char *name)
 {
 	struct net_device *dev;
+	int no_module;
 
 	rcu_read_lock();
 	dev = dev_get_by_name_rcu(net, name);
 	rcu_read_unlock();
 
-	if (!dev && capable(CAP_NET_ADMIN))
-		request_module("%s", name);
+	no_module = !dev;
+	if (no_module && capable(CAP_NET_ADMIN))
+		no_module = request_module("netdev-%s", name);
+	if (no_module && capable(CAP_SYS_MODULE)) {
+		if (!request_module("%s", name))
+			pr_err("Loading kernel module for a network device "
+"with CAP_SYS_MODULE (deprecated).  Use CAP_NET_ADMIN and alias netdev-%s "
+"instead\n", name);
+	}
 }
 EXPORT_SYMBOL(dev_load);
 
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 508f9c18992..133fd22ea28 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -144,7 +144,7 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
 
 	list_for_each_entry(ha, &from_list->list, list) {
 		type = addr_type ? addr_type : ha->type;
-		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+		__hw_addr_del(to_list, ha->addr, addr_len, type);
 	}
 }
 EXPORT_SYMBOL(__hw_addr_del_multiple);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a9e7fc4c461..b5bada92f63 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3321,7 +3321,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
 				    pkt_dev->started_at);
 	ktime_t idle = ns_to_ktime(pkt_dev->idle_acc);
 
-	p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n",
+	p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
 		     (unsigned long long)ktime_to_us(elapsed),
 		     (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)),
 		     (unsigned long long)ktime_to_us(idle),
diff --git a/net/core/scm.c b/net/core/scm.c
index bbe45445080..4c1ef026d69 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 		int fd = fdp[i];
 		struct file *file;
 
-		if (fd < 0 || !(file = fget(fd)))
+		if (fd < 0 || !(file = fget_raw(fd)))
 			return -EBADF;
 		*fpp++ = file;
 		fpl->count++;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index d5074a56728..c44348adba3 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1193,7 +1193,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
 			goto err;
 	}
 
-	if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) {
+	if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) {
 		struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
 		err = ops->ieee_setpfc(netdev, pfc);
 		if (err)
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 8cde009e8b8..4222e7a654b 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -614,6 +614,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		/* Caller (dccp_v4_do_rcv) will send Reset */
 		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
 		return 1;
+	} else if (sk->sk_state == DCCP_CLOSED) {
+		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+		return 1;
 	}
 
 	if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) {
@@ -668,10 +671,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	}
 
 	switch (sk->sk_state) {
-	case DCCP_CLOSED:
-		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
-		return 1;
-
 	case DCCP_REQUESTING:
 		queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
 		if (queued >= 0)
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 739435a6af3..cfa7a5e1c5c 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -67,8 +67,9 @@ dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
 	size_t result_len = 0;
 	const char *data = _data, *end, *opt;
 
-	kenter("%%%d,%s,'%s',%zu",
-	       key->serial, key->description, data, datalen);
+	kenter("%%%d,%s,'%*.*s',%zu",
+	       key->serial, key->description,
+	       (int)datalen, (int)datalen, data, datalen);
 
 	if (datalen <= 1 || !data || data[datalen - 1] != '\0')
 		return -EINVAL;
@@ -217,6 +218,19 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m)
 		seq_printf(m, ": %u", key->datalen);
 }
 
+/*
+ * read the DNS data
+ * - the key's semaphore is read-locked
+ */
+static long dns_resolver_read(const struct key *key,
+			      char __user *buffer, size_t buflen)
+{
+	if (key->type_data.x[0])
+		return key->type_data.x[0];
+
+	return user_read(key, buffer, buflen);
+}
+
 struct key_type key_type_dns_resolver = {
 	.name		= "dns_resolver",
 	.instantiate	= dns_resolver_instantiate,
@@ -224,7 +238,7 @@ struct key_type key_type_dns_resolver = {
 	.revoke		= user_revoke,
 	.destroy	= user_destroy,
 	.describe	= dns_resolver_describe,
-	.read		= user_read,
+	.read		= dns_resolver_read,
 };
 
 static int __init init_dns_resolver(void)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index df4616fce92..036652c8166 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -670,7 +670,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 			     ifap = &ifa->ifa_next) {
 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
 				    sin_orig.sin_addr.s_addr ==
-							ifa->ifa_address) {
+							ifa->ifa_local) {
 					break; /* found */
 				}
 			}
@@ -1040,8 +1040,8 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev,
 		return;
 
 	arp_send(ARPOP_REQUEST, ETH_P_ARP,
-		 ifa->ifa_address, dev,
-		 ifa->ifa_address, NULL,
+		 ifa->ifa_local, dev,
+		 ifa->ifa_local, NULL,
 		 dev->dev_addr, NULL);
 }
 
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c5af909cf70..3c8dfa16614 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -505,7 +505,9 @@ restart:
 			}
 
 			rcu_read_unlock();
+			local_bh_disable();
 			inet_twsk_deschedule(tw, twdr);
+			local_bh_enable();
 			inet_twsk_put(tw);
 			goto restart_rcu;
 		}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6613edfac28..d1d0e2c256f 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1765,4 +1765,4 @@ module_exit(ipgre_fini);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_RTNL_LINK("gre");
 MODULE_ALIAS_RTNL_LINK("gretap");
-MODULE_ALIAS("gre0");
+MODULE_ALIAS_NETDEV("gre0");
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 988f52fba54..a5f58e7cbb2 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -913,4 +913,4 @@ static void __exit ipip_fini(void)
 module_init(ipip_init);
 module_exit(ipip_fini);
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("tunl0");
+MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb7f82ebf4a..65f6c040624 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1222,7 +1222,7 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
 	}
 
 	/* D-SACK for already forgotten data... Do dumb counting. */
-	if (dup_sack &&
+	if (dup_sack && tp->undo_marker && tp->undo_retrans &&
 	    !after(end_seq_0, prior_snd_una) &&
 	    after(end_seq_0, tp->undo_marker))
 		tp->undo_retrans--;
@@ -1299,7 +1299,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
 
 	/* Account D-SACK for retransmitted packet. */
 	if (dup_sack && (sacked & TCPCB_RETRANS)) {
-		if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+		if (tp->undo_marker && tp->undo_retrans &&
+		    after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
 			tp->undo_retrans--;
 		if (sacked & TCPCB_SACKED_ACKED)
 			state->reord = min(fack_count, state->reord);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 406f320336e..dfa5beb0c1c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2162,7 +2162,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 		if (!tp->retrans_stamp)
 			tp->retrans_stamp = TCP_SKB_CB(skb)->when;
 
-		tp->undo_retrans++;
+		tp->undo_retrans += tcp_skb_pcount(skb);
 
 		/* snd_nxt is stored to detect loss of retransmitted segment,
 		 * see tcp_input.c tcp_sacktag_write_queue().
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4f4483e697b..e528a42a52b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -57,6 +57,7 @@
 MODULE_AUTHOR("Ville Nuorvala");
 MODULE_DESCRIPTION("IPv6 tunneling device");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETDEV("ip6tnl0");
 
 #ifdef IP6_TNL_DEBUG
 #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 09c88891a75..de338037a73 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -410,7 +410,7 @@ fallback:
 		if (p != NULL) {
 			sb_add(m, "%02x", *p++);
 			for (i = 1; i < len; i++)
-				sb_add(m, ":%02x", p[i]);
+				sb_add(m, ":%02x", *p++);
 		}
 		sb_add(m, " ");
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a998db6e789..e7db7014e89 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -739,8 +739,10 @@ restart:
 
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
-	else
+	else if (!(rt->dst.flags & DST_HOST))
 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
+	else
+		goto out2;
 
 	dst_release(&rt->dst);
 	rt = nrt ? : net->ipv6.ip6_null_entry;
@@ -2557,14 +2559,16 @@ static
 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
 			      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	struct net *net = current->nsproxy->net_ns;
-	int delay = net->ipv6.sysctl.flush_delay;
-	if (write) {
-		proc_dointvec(ctl, write, buffer, lenp, ppos);
-		fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
-		return 0;
-	} else
+	struct net *net;
+	int delay;
+	if (!write)
 		return -EINVAL;
+
+	net = (struct net *)ctl->extra1;
+	delay = net->ipv6.sysctl.flush_delay;
+	proc_dointvec(ctl, write, buffer, lenp, ppos);
+	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
+	return 0;
 }
 
 ctl_table ipv6_route_table_template[] = {
@@ -2651,6 +2655,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
 
 	if (table) {
 		table[0].data = &net->ipv6.sysctl.flush_delay;
+		table[0].extra1 = net;
 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8ce38f10a54..d2c16e10f65 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1290,4 +1290,4 @@ static int __init sit_init(void)
 module_init(sit_init);
 module_exit(sit_cleanup);
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("sit0");
+MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8acba456744..7a10a8d1b2d 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1229,6 +1229,7 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
 	}
 	mutex_unlock(&local->iflist_mtx);
 	unregister_netdevice_many(&unreg_list);
+	list_del(&unreg_list);
 }
 
 static u32 ieee80211_idle_off(struct ieee80211_local *local,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 45fbb9e3374..c9ceb4d57ab 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1033,6 +1033,12 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
 	if (is_multicast_ether_addr(hdr->addr1))
 		return;
 
+	/*
+	 * In case we receive frames after disassociation.
+	 */
+	if (!sdata->u.mgd.associated)
+		return;
+
 	ieee80211_sta_reset_conn_monitor(sdata);
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 22f7ad5101a..ba98e1308f3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -808,9 +808,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	dest->u_threshold = udest->u_threshold;
 	dest->l_threshold = udest->l_threshold;
 
-	spin_lock(&dest->dst_lock);
+	spin_lock_bh(&dest->dst_lock);
 	ip_vs_dst_reset(dest);
-	spin_unlock(&dest->dst_lock);
+	spin_unlock_bh(&dest->dst_lock);
 
 	if (add)
 		ip_vs_new_estimator(&dest->stats);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index b07393eab88..91816998ed8 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -85,6 +85,8 @@ EXPORT_SYMBOL(nf_log_unregister);
 
 int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
 {
+	if (pf >= ARRAY_SIZE(nf_loggers))
+		return -EINVAL;
 	mutex_lock(&nf_log_mutex);
 	if (__find_logger(pf, logger->name) == NULL) {
 		mutex_unlock(&nf_log_mutex);
@@ -98,6 +100,8 @@ EXPORT_SYMBOL(nf_log_bind_pf);
 
 void nf_log_unbind_pf(u_int8_t pf)
 {
+	if (pf >= ARRAY_SIZE(nf_loggers))
+		return;
 	mutex_lock(&nf_log_mutex);
 	rcu_assign_pointer(nf_loggers[pf], NULL);
 	mutex_unlock(&nf_log_mutex);
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index 4d87befb04c..474d621cbc2 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -28,26 +28,23 @@ nf_tproxy_destructor(struct sk_buff *skb)
 	skb->destructor = NULL;
 
 	if (sk)
-		nf_tproxy_put_sock(sk);
+		sock_put(sk);
 }
 
 /* consumes sk */
-int
+void
 nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
 {
-	bool transparent = (sk->sk_state == TCP_TIME_WAIT) ?
-				inet_twsk(sk)->tw_transparent :
-				inet_sk(sk)->transparent;
-
-	if (transparent) {
-		skb_orphan(skb);
-		skb->sk = sk;
-		skb->destructor = nf_tproxy_destructor;
-		return 1;
-	} else
-		nf_tproxy_put_sock(sk);
-
-	return 0;
+	/* assigning tw sockets complicates things; most
+	 * skb->sk->X checks would have to test sk->sk_state first */
+	if (sk->sk_state == TCP_TIME_WAIT) {
+		inet_twsk_put(inet_twsk(sk));
+		return;
+	}
+
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = nf_tproxy_destructor;
 }
 EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
 
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 640678f47a2..dcfd57eb9d0 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -33,6 +33,20 @@
 #include <net/netfilter/nf_tproxy_core.h>
 #include <linux/netfilter/xt_TPROXY.h>
 
+static bool tproxy_sk_is_transparent(struct sock *sk)
+{
+	if (sk->sk_state != TCP_TIME_WAIT) {
+		if (inet_sk(sk)->transparent)
+			return true;
+		sock_put(sk);
+	} else {
+		if (inet_twsk(sk)->tw_transparent)
+			return true;
+		inet_twsk_put(inet_twsk(sk));
+	}
+	return false;
+}
+
 static inline __be32
 tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
 {
@@ -141,7 +155,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
 					   skb->dev, NFT_LOOKUP_LISTENER);
 
 	/* NOTE: assign_sock consumes our sk reference */
-	if (sk && nf_tproxy_assign_sock(skb, sk)) {
+	if (sk && tproxy_sk_is_transparent(sk)) {
 		/* This should be in a separate target, but we don't do multiple
 		   targets on the same rule yet */
 		skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
@@ -149,6 +163,8 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
 		pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
 			 iph->protocol, &iph->daddr, ntohs(hp->dest),
 			 &laddr, ntohs(lport), skb->mark);
+
+		nf_tproxy_assign_sock(skb, sk);
 		return NF_ACCEPT;
 	}
 
@@ -306,7 +322,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
 					   par->in, NFT_LOOKUP_LISTENER);
 
 	/* NOTE: assign_sock consumes our sk reference */
-	if (sk && nf_tproxy_assign_sock(skb, sk)) {
+	if (sk && tproxy_sk_is_transparent(sk)) {
 		/* This should be in a separate target, but we don't do multiple
 		   targets on the same rule yet */
 		skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
@@ -314,6 +330,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
 		pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
 			 tproto, &iph->saddr, ntohs(hp->source),
 			 laddr, ntohs(lport), skb->mark);
+
+		nf_tproxy_assign_sock(skb, sk);
 		return NF_ACCEPT;
 	}
 
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 00d6ae83830..9cc46356b57 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -35,6 +35,15 @@
 #include <net/netfilter/nf_conntrack.h>
 #endif
 
+static void
+xt_socket_put_sk(struct sock *sk)
+{
+	if (sk->sk_state == TCP_TIME_WAIT)
+		inet_twsk_put(inet_twsk(sk));
+	else
+		sock_put(sk);
+}
+
 static int
 extract_icmp4_fields(const struct sk_buff *skb,
 		    u8 *protocol,
@@ -164,7 +173,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 				       (sk->sk_state == TCP_TIME_WAIT &&
 					inet_twsk(sk)->tw_transparent));
 
-		nf_tproxy_put_sock(sk);
+		xt_socket_put_sk(sk);
 
 		if (wildcard || !transparent)
 			sk = NULL;
@@ -298,7 +307,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
 				       (sk->sk_state == TCP_TIME_WAIT &&
 					inet_twsk(sk)->tw_transparent));
 
-		nf_tproxy_put_sock(sk);
+		xt_socket_put_sk(sk);
 
 		if (wildcard || !transparent)
 			sk = NULL;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 478181d53c5..1f924595bde 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1407,7 +1407,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	int noblock = flags&MSG_DONTWAIT;
 	size_t copied;
 	struct sk_buff *skb, *data_skb;
-	int err;
+	int err, ret;
 
 	if (flags&MSG_OOB)
 		return -EOPNOTSUPP;
@@ -1470,8 +1470,13 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 
 	skb_free_datagram(sk, skb);
 
-	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
-		netlink_dump(sk);
+	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
+		ret = netlink_dump(sk);
+		if (ret) {
+			sk->sk_err = ret;
+			sk->sk_error_report(sk);
+		}
+	}
 
 	scm_recv(sock, msg, siocb->scm, flags);
 out:
@@ -1736,6 +1741,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	struct netlink_callback *cb;
 	struct sock *sk;
 	struct netlink_sock *nlk;
+	int ret;
 
 	cb = kzalloc(sizeof(*cb), GFP_KERNEL);
 	if (cb == NULL)
@@ -1764,9 +1770,13 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	nlk->cb = cb;
 	mutex_unlock(nlk->cb_mutex);
 
-	netlink_dump(sk);
+	ret = netlink_dump(sk);
+
 	sock_put(sk);
 
+	if (ret)
+		return ret;
+
 	/* We successfully started a dump, by returning -EINTR we
 	 * signal not to send ACK even if it was requested.
 	 */
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 4123967d4d6..cce19f95c62 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -364,7 +364,6 @@ void rds_ib_exit(void)
 	rds_ib_sysctl_exit();
 	rds_ib_recv_exit();
 	rds_trans_unregister(&rds_ib_transport);
-	rds_ib_fmr_exit();
 }
 
 struct rds_transport rds_ib_transport = {
@@ -400,13 +399,9 @@ int rds_ib_init(void)
 
 	INIT_LIST_HEAD(&rds_ib_devices);
 
-	ret = rds_ib_fmr_init();
-	if (ret)
-		goto out;
-
 	ret = ib_register_client(&rds_ib_client);
 	if (ret)
-		goto out_fmr_exit;
+		goto out;
 
 	ret = rds_ib_sysctl_init();
 	if (ret)
@@ -430,8 +425,6 @@ out_sysctl:
 	rds_ib_sysctl_exit();
 out_ibreg:
 	rds_ib_unregister_client();
-out_fmr_exit:
-	rds_ib_fmr_exit();
 out:
 	return ret;
 }
diff --git a/net/rds/ib.h b/net/rds/ib.h
index e34ad032b66..4297d92788d 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -307,8 +307,6 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
 void rds_ib_sync_mr(void *trans_private, int dir);
 void rds_ib_free_mr(void *trans_private, int invalidate);
 void rds_ib_flush_mrs(void);
-int rds_ib_fmr_init(void);
-void rds_ib_fmr_exit(void);
 
 /* ib_recv.c */
 int rds_ib_recv_init(void);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 18a833c450c..819c35a0d9c 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -38,8 +38,6 @@
 #include "ib.h"
 #include "xlist.h"
 
-static struct workqueue_struct *rds_ib_fmr_wq;
-
 static DEFINE_PER_CPU(unsigned long, clean_list_grace);
 #define CLEAN_LIST_BUSY_BIT 0
 
@@ -307,7 +305,7 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
 	int err = 0, iter = 0;
 
 	if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
-		queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
+		schedule_delayed_work(&pool->flush_worker, 10);
 
 	while (1) {
 		ibmr = rds_ib_reuse_fmr(pool);
@@ -696,24 +694,6 @@ out_nolock:
 	return ret;
 }
 
-int rds_ib_fmr_init(void)
-{
-	rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
-	if (!rds_ib_fmr_wq)
-		return -ENOMEM;
-	return 0;
-}
-
-/*
- * By the time this is called all the IB devices should have been torn down and
- * had their pools freed.  As each pool is freed its work struct is waited on,
- * so the pool flushing work queue should be idle by the time we get here.
- */
-void rds_ib_fmr_exit(void)
-{
-	destroy_workqueue(rds_ib_fmr_wq);
-}
-
 static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
 {
 	struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
@@ -741,7 +721,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
 	/* If we've pinned too many pages, request a flush */
 	if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
 	    atomic_read(&pool->dirty_count) >= pool->max_items / 10)
-		queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
+		schedule_delayed_work(&pool->flush_worker, 10);
 
 	if (invalidate) {
 		if (likely(!in_interrupt())) {
@@ -749,8 +729,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
 		} else {
 			/* We get here if the user created a MR marked
 			 * as use_once and invalidate at the same time. */
-			queue_delayed_work(rds_ib_fmr_wq,
-					   &pool->flush_worker, 10);
+			schedule_delayed_work(&pool->flush_worker, 10);
 		}
 	}
 
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 71f373c421b..c47a511f203 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -551,7 +551,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 	if (conn->c_loopback
 	    && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
 		rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
-		return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+		scat = &rm->data.op_sg[sg];
+		ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+		ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
+		return ret;
 	}
 
 	/* FIXME we may overallocate here */
diff --git a/net/rds/loop.c b/net/rds/loop.c
index aeec1d483b1..bca6761a3ca 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
 			 unsigned int hdr_off, unsigned int sg,
 			 unsigned int off)
 {
+	struct scatterlist *sgp = &rm->data.op_sg[sg];
+	int ret = sizeof(struct rds_header) +
+			be32_to_cpu(rm->m_inc.i_hdr.h_len);
+
 	/* Do not send cong updates to loopback */
 	if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
 		rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
-		return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+		ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off);
+		goto out;
 	}
 
 	BUG_ON(hdr_off || sg || off);
@@ -80,8 +85,8 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
 			    NULL);
 
 	rds_inc_put(&rm->m_inc);
-
-	return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len);
+out:
+	return ret;
 }
 
 /*
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 89315009bab..1a2b0633fec 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -423,6 +423,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
 			goto protocol_error;
 		}
 
+	case RXRPC_PACKET_TYPE_ACKALL:
 	case RXRPC_PACKET_TYPE_ACK:
 		/* ACK processing is done in process context */
 		read_lock_bh(&call->state_lock);
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 5ee16f0353f..43ea7de2fc8 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -25,6 +25,7 @@
 #include <keys/user-type.h>
 #include "ar-internal.h"
 
+static int rxrpc_vet_description_s(const char *);
 static int rxrpc_instantiate(struct key *, const void *, size_t);
 static int rxrpc_instantiate_s(struct key *, const void *, size_t);
 static void rxrpc_destroy(struct key *);
@@ -52,6 +53,7 @@ EXPORT_SYMBOL(key_type_rxrpc);
  */
 struct key_type key_type_rxrpc_s = {
 	.name		= "rxrpc_s",
+	.vet_description = rxrpc_vet_description_s,
 	.instantiate	= rxrpc_instantiate_s,
 	.match		= user_match,
 	.destroy	= rxrpc_destroy_s,
@@ -59,6 +61,23 @@ struct key_type key_type_rxrpc_s = {
 };
 
 /*
+ * Vet the description for an RxRPC server key
+ */
+static int rxrpc_vet_description_s(const char *desc)
+{
+	unsigned long num;
+	char *p;
+
+	num = simple_strtoul(desc, &p, 10);
+	if (*p != ':' || num > 65535)
+		return -EINVAL;
+	num = simple_strtoul(p + 1, &p, 10);
+	if (*p || num < 1 || num > 255)
+		return -EINVAL;
+	return 0;
+}
+
+/*
  * parse an RxKAD type XDR format token
  * - the caller guarantees we have at least 4 words
  */
@@ -89,11 +108,11 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
 		return ret;
 
 	plen -= sizeof(*token);
-	token = kmalloc(sizeof(*token), GFP_KERNEL);
+	token = kzalloc(sizeof(*token), GFP_KERNEL);
 	if (!token)
 		return -ENOMEM;
 
-	token->kad = kmalloc(plen, GFP_KERNEL);
+	token->kad = kzalloc(plen, GFP_KERNEL);
 	if (!token->kad) {
 		kfree(token);
 		return -ENOMEM;
@@ -731,10 +750,10 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 		goto error;
 
 	ret = -ENOMEM;
-	token = kmalloc(sizeof(*token), GFP_KERNEL);
+	token = kzalloc(sizeof(*token), GFP_KERNEL);
 	if (!token)
 		goto error;
-	token->kad = kmalloc(plen, GFP_KERNEL);
+	token->kad = kzalloc(plen, GFP_KERNEL);
 	if (!token->kad)
 		goto error_free;
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 34dc598440a..1bc698039ae 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -839,6 +839,7 @@ void dev_deactivate(struct net_device *dev)
 
 	list_add(&dev->unreg_list, &single);
 	dev_deactivate_many(&single);
+	list_del(&single);
 }
 
 static void dev_init_scheduler_queue(struct net_device *dev,
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 2cc46f0962c..b23428f3c0d 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2029,11 +2029,11 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
 			*errp = sctp_make_op_error_fixed(asoc, chunk);
 
 		if (*errp) {
-			sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
-					WORD_ROUND(ntohs(param.p->length)));
-			sctp_addto_chunk_fixed(*errp,
-					WORD_ROUND(ntohs(param.p->length)),
-					param.v);
+			if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+					WORD_ROUND(ntohs(param.p->length))))
+				sctp_addto_chunk_fixed(*errp,
+						WORD_ROUND(ntohs(param.p->length)),
+						param.v);
 		} else {
 			/* If there is no memory for generating the ERROR
 			 * report as specified, an ABORT will be triggered
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 243fc09b164..3fc8624fcd1 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -252,23 +252,37 @@ static void rpc_set_active(struct rpc_task *task)
 
 /*
  * Mark an RPC call as having completed by clearing the 'active' bit
+ * and then waking up all tasks that were sleeping.
  */
-static void rpc_mark_complete_task(struct rpc_task *task)
+static int rpc_complete_task(struct rpc_task *task)
 {
-	smp_mb__before_clear_bit();
+	void *m = &task->tk_runstate;
+	wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE);
+	struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&wq->lock, flags);
 	clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
-	smp_mb__after_clear_bit();
-	wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE);
+	ret = atomic_dec_and_test(&task->tk_count);
+	if (waitqueue_active(wq))
+		__wake_up_locked_key(wq, TASK_NORMAL, &k);
+	spin_unlock_irqrestore(&wq->lock, flags);
+	return ret;
 }
 
 /*
  * Allow callers to wait for completion of an RPC call
+ *
+ * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit()
+ * to enforce taking of the wq->lock and hence avoid races with
+ * rpc_complete_task().
  */
 int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
 {
 	if (action == NULL)
 		action = rpc_wait_bit_killable;
-	return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
+	return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
 			action, TASK_KILLABLE);
 }
 EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
@@ -857,34 +871,67 @@ static void rpc_async_release(struct work_struct *work)
 	rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
 }
 
-void rpc_put_task(struct rpc_task *task)
+static void rpc_release_resources_task(struct rpc_task *task)
 {
-	if (!atomic_dec_and_test(&task->tk_count))
-		return;
-	/* Release resources */
 	if (task->tk_rqstp)
 		xprt_release(task);
 	if (task->tk_msg.rpc_cred)
 		put_rpccred(task->tk_msg.rpc_cred);
 	rpc_task_release_client(task);
-	if (task->tk_workqueue != NULL) {
+}
+
+static void rpc_final_put_task(struct rpc_task *task,
+		struct workqueue_struct *q)
+{
+	if (q != NULL) {
 		INIT_WORK(&task->u.tk_work, rpc_async_release);
-		queue_work(task->tk_workqueue, &task->u.tk_work);
+		queue_work(q, &task->u.tk_work);
 	} else
 		rpc_free_task(task);
 }
+
+static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q)
+{
+	if (atomic_dec_and_test(&task->tk_count)) {
+		rpc_release_resources_task(task);
+		rpc_final_put_task(task, q);
+	}
+}
+
+void rpc_put_task(struct rpc_task *task)
+{
+	rpc_do_put_task(task, NULL);
+}
 EXPORT_SYMBOL_GPL(rpc_put_task);
 
+void rpc_put_task_async(struct rpc_task *task)
+{
+	rpc_do_put_task(task, task->tk_workqueue);
+}
+EXPORT_SYMBOL_GPL(rpc_put_task_async);
+
 static void rpc_release_task(struct rpc_task *task)
 {
 	dprintk("RPC: %5u release task\n", task->tk_pid);
 
 	BUG_ON (RPC_IS_QUEUED(task));
 
-	/* Wake up anyone who is waiting for task completion */
-	rpc_mark_complete_task(task);
+	rpc_release_resources_task(task);
 
-	rpc_put_task(task);
+	/*
+	 * Note: at this point we have been removed from rpc_clnt->cl_tasks,
+	 * so it should be safe to use task->tk_count as a test for whether
+	 * or not any other processes still hold references to our rpc_task.
+	 */
+	if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) {
+		/* Wake up anyone who may be waiting for task completion */
+		if (!rpc_complete_task(task))
+			return;
+	} else {
+		if (!atomic_dec_and_test(&task->tk_count))
+			return;
+	}
+	rpc_final_put_task(task, task->tk_workqueue);
 }
 
 int rpciod_up(void)
@@ -908,7 +955,7 @@ static int rpciod_start(void)
 	 * Create the rpciod thread and wait for it to start.
 	 */
 	dprintk("RPC:       creating workqueue rpciod\n");
-	wq = alloc_workqueue("rpciod", WQ_RESCUER, 0);
+	wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0);
 	rpciod_workqueue = wq;
 	return rpciod_workqueue != NULL;
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9df1eadc912..1a10dcd999e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1335,6 +1335,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 					    p, 0, length, DMA_FROM_DEVICE);
 	if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
 		put_page(p);
+		svc_rdma_put_context(ctxt, 1);
 		return;
 	}
 	atomic_inc(&xprt->sc_dma_used);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c431f5a5796..be96d429b47 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1631,7 +1631,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
 	}
 	xs_reclassify_socket(family, sock);
 
-	if (xs_bind(transport, sock)) {
+	err = xs_bind(transport, sock);
+	if (err) {
 		sock_release(sock);
 		goto out;
 	}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dd419d28620..ba5b8c20849 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		 * Get the parent directory, calculate the hash for last
 		 * component.
 		 */
-		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
+		err = kern_path_parent(sunaddr->sun_path, &nd);
 		if (err)
 			goto out_mknod_parent;
 
@@ -1724,7 +1724,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	msg->msg_namelen = 0;
 
-	mutex_lock(&u->readlock);
+	err = mutex_lock_interruptible(&u->readlock);
+	if (err) {
+		err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
+		goto out;
+	}
 
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	if (!skb) {
@@ -1864,7 +1868,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		memset(&tmp_scm, 0, sizeof(tmp_scm));
 	}
 
-	mutex_lock(&u->readlock);
+	err = mutex_lock_interruptible(&u->readlock);
+	if (err) {
+		err = sock_intr_errno(timeo);
+		goto out;
+	}
 
 	do {
 		int chunk;
@@ -1895,11 +1903,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 			timeo = unix_stream_data_wait(sk, timeo);
 
-			if (signal_pending(current)) {
+			if (signal_pending(current)
+			    ||  mutex_lock_interruptible(&u->readlock)) {
 				err = sock_intr_errno(timeo);
 				goto out;
 			}
-			mutex_lock(&u->readlock);
+
 			continue;
  unlock:
 			unix_state_unlock(sk);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index f89f83bf828..b6f4b994eb3 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp)
 	/*
 	 *	Socket ?
 	 */
-	if (S_ISSOCK(inode->i_mode)) {
+	if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
 		struct socket *sock = SOCKET_I(inode);
 		struct sock *s = sock->sk;
 
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 3e5dbd4e4cd..d112f038edf 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -802,11 +802,11 @@ int cfg80211_wext_siwfreq(struct net_device *dev,
 			return freq;
 		if (freq == 0)
 			return -EINVAL;
-		wdev_lock(wdev);
 		mutex_lock(&rdev->devlist_mtx);
+		wdev_lock(wdev);
 		err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
-		mutex_unlock(&rdev->devlist_mtx);
 		wdev_unlock(wdev);
+		mutex_unlock(&rdev->devlist_mtx);
 		return err;
 	default:
 		return -EOPNOTSUPP;