From f75e6745aa3084124ae1434fd7629853bdaf6798 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 21 Apr 2009 17:18:20 -0400
Subject: SUNRPC: Fix the problem of EADDRNOTAVAIL syslog floods on reconnect

See http://bugzilla.kernel.org/show_bug.cgi?id=13034

If the port gets into a TIME_WAIT state, then we cannot reconnect without
binding to a new port.

Tested-by: Petr Vandrovec <petr@vandrovec.name>
Tested-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/sunrpc/xprt.c     |  6 ++----
 net/sunrpc/xprtsock.c | 26 +++++++++++++++++++++-----
 2 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a0bfe53f162..06ca058572f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -672,10 +672,8 @@ xprt_init_autodisconnect(unsigned long data)
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
 		goto out_abort;
 	spin_unlock(&xprt->transport_lock);
-	if (xprt_connecting(xprt))
-		xprt_release_write(xprt, NULL);
-	else
-		queue_work(rpciod_workqueue, &xprt->task_cleanup);
+	set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+	queue_work(rpciod_workqueue, &xprt->task_cleanup);
 	return;
 out_abort:
 	spin_unlock(&xprt->transport_lock);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d40ff50887a..e1859614601 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -807,6 +807,9 @@ static void xs_reset_transport(struct sock_xprt *transport)
  *
  * This is used when all requests are complete; ie, no DRC state remains
  * on the server we want to save.
+ *
+ * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with
+ * xs_reset_transport() zeroing the socket from underneath a writer.
  */
 static void xs_close(struct rpc_xprt *xprt)
 {
@@ -824,6 +827,14 @@ static void xs_close(struct rpc_xprt *xprt)
 	xprt_disconnect_done(xprt);
 }
 
+static void xs_tcp_close(struct rpc_xprt *xprt)
+{
+	if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state))
+		xs_close(xprt);
+	else
+		xs_tcp_shutdown(xprt);
+}
+
 /**
  * xs_destroy - prepare to shutdown a transport
  * @xprt: doomed transport
@@ -1772,6 +1783,15 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
 			xprt, -status, xprt_connected(xprt),
 			sock->sk->sk_state);
 	switch (status) {
+	default:
+		printk("%s: connect returned unhandled error %d\n",
+			__func__, status);
+	case -EADDRNOTAVAIL:
+		/* We're probably in TIME_WAIT. Get rid of existing socket,
+		 * and retry
+		 */
+		set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+		xprt_force_disconnect(xprt);
 	case -ECONNREFUSED:
 	case -ECONNRESET:
 	case -ENETUNREACH:
@@ -1782,10 +1802,6 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
 		xprt_clear_connecting(xprt);
 		return;
 	}
-	/* get rid of existing socket, and retry */
-	xs_tcp_shutdown(xprt);
-	printk("%s: connect returned unhandled error %d\n",
-			__func__, status);
 out_eagain:
 	status = -EAGAIN;
 out:
@@ -1994,7 +2010,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.buf_free		= rpc_free,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
-	.close			= xs_tcp_shutdown,
+	.close			= xs_tcp_close,
 	.destroy		= xs_destroy,
 	.print_stats		= xs_tcp_print_stats,
 };
-- 
cgit v1.2.3


From 453ed90d1395a5281a8f1a0de5d8aabc66202e34 Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sun, 5 Apr 2009 16:22:16 -0500
Subject: net/9p: set correct stat size when sending Twstat messages

The 9P2000 Twstat message requires the size of the stat structure to be
specified. Currently the 9p code writes zero instead of the actual size.
This behavior confuses some of the file servers that check if the size is
correct.

This patch adds a new function that calculcates the stat size and puts the
value in the appropriate place in the 9P message.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Reviewed-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 1eb580c38fb..93f442aaa11 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1251,12 +1251,42 @@ error:
 }
 EXPORT_SYMBOL(p9_client_stat);
 
+static int p9_client_statsize(struct p9_wstat *wst, int optional)
+{
+	int ret;
+
+	/* size[2] type[2] dev[4] qid[13] */
+	/* mode[4] atime[4] mtime[4] length[8]*/
+	/* name[s] uid[s] gid[s] muid[s] */
+	ret = 2+2+4+13+4+4+4+8+2+2+2+2;
+
+	if (wst->name)
+		ret += strlen(wst->name);
+	if (wst->uid)
+		ret += strlen(wst->uid);
+	if (wst->gid)
+		ret += strlen(wst->gid);
+	if (wst->muid)
+		ret += strlen(wst->muid);
+
+	if (optional) {
+		ret += 2+4+4+4;	/* extension[s] n_uid[4] n_gid[4] n_muid[4] */
+		if (wst->extension)
+			ret += strlen(wst->extension);
+	}
+
+	return ret;
+}
+
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 {
 	int err;
 	struct p9_req_t *req;
 	struct p9_client *clnt;
 
+	err = 0;
+	clnt = fid->clnt;
+	wst->size = p9_client_statsize(wst, clnt->dotu);
 	P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
 	P9_DPRINTK(P9_DEBUG_9P,
 		"     sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
@@ -1268,10 +1298,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 		wst->atime, wst->mtime, (unsigned long long)wst->length,
 		wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
 		wst->n_uid, wst->n_gid, wst->n_muid);
-	err = 0;
-	clnt = fid->clnt;
 
-	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst);
+	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size, wst);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
-- 
cgit v1.2.3


From 742b11a7ec60faa25d76c95c268041ab215c25ad Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sun, 5 Apr 2009 16:26:41 -0500
Subject: net/9p: return error when p9_client_stat fails

p9_client_stat function doesn't return correct value if it fails.
p9_client_stat should return ERR_PTR of the error value when it fails.
Instead, it always returns a value to the allocated p9_wstat struct even
when it is not populated correctly.

This patch makes p9_client_stat to handle failure correctly.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Reviewed-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 93f442aaa11..781d89a952e 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1244,10 +1244,14 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 		ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
 		ret->n_uid, ret->n_gid, ret->n_muid);
 
+	p9_free_req(clnt, req);
+	return ret;
+
 free_and_error:
 	p9_free_req(clnt, req);
 error:
-	return ret;
+	kfree(ret);
+	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_stat);
 
-- 
cgit v1.2.3


From 1bab88b2310998de18b32529a27ea835d164254a Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sun, 5 Apr 2009 16:28:59 -0500
Subject: net/9p: handle correctly interrupted 9P requests

Currently the 9p code crashes when a operation is interrupted, i.e. for
example when the user presses ^C while reading from a file.

This patch fixes the code that is responsible for interruption and flushing
of 9P operations.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
---
 net/9p/client.c       | 74 +++++++++++++--------------------------------------
 net/9p/trans_fd.c     | 14 ++++++----
 net/9p/trans_rdma.c   |  1 +
 net/9p/trans_virtio.c |  1 +
 4 files changed, 30 insertions(+), 60 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 781d89a952e..dd43a8289b0 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -203,7 +203,6 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 	p9pdu_reset(req->tc);
 	p9pdu_reset(req->rc);
 
-	req->flush_tag = 0;
 	req->tc->tag = tag-1;
 	req->status = REQ_STATUS_ALLOC;
 
@@ -324,35 +323,9 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
  */
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req)
 {
-	struct p9_req_t *other_req;
-	unsigned long flags;
-
 	P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
-
-	if (req->status == REQ_STATUS_ERROR)
-		wake_up(req->wq);
-
-	if (req->flush_tag) { 			/* flush receive path */
-		P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag);
-		spin_lock_irqsave(&c->lock, flags);
-		other_req = p9_tag_lookup(c, req->flush_tag);
-		if (other_req->status != REQ_STATUS_FLSH) /* stale flush */
-			spin_unlock_irqrestore(&c->lock, flags);
-		else {
-			other_req->status = REQ_STATUS_FLSHD;
-			spin_unlock_irqrestore(&c->lock, flags);
-			wake_up(other_req->wq);
-		}
-		p9_free_req(c, req);
-	} else { 				/* normal receive path */
-		P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag);
-		spin_lock_irqsave(&c->lock, flags);
-		if (req->status != REQ_STATUS_FLSHD)
-			req->status = REQ_STATUS_RCVD;
-		spin_unlock_irqrestore(&c->lock, flags);
-		wake_up(req->wq);
-		P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
-	}
+	wake_up(req->wq);
+	P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
 }
 EXPORT_SYMBOL(p9_client_cb);
 
@@ -486,9 +459,15 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	req->flush_tag = oldtag;
 
-	/* we don't free anything here because RPC isn't complete */
+	/* if we haven't received a response for oldreq,
+	   remove it from the list. */
+	spin_lock(&c->lock);
+	if (oldreq->status == REQ_STATUS_FLSH)
+		list_del(&oldreq->req_list);
+	spin_unlock(&c->lock);
+
+	p9_free_req(c, req);
 	return 0;
 }
 
@@ -509,7 +488,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 	struct p9_req_t *req;
 	unsigned long flags;
 	int sigpending;
-	int flushed = 0;
 
 	P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type);
 
@@ -546,42 +524,28 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 		goto reterr;
 	}
 
-	/* if it was a flush we just transmitted, return our tag */
-	if (type == P9_TFLUSH)
-		return req;
-again:
 	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag);
 	err = wait_event_interruptible(*req->wq,
 						req->status >= REQ_STATUS_RCVD);
-	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n",
-						req->wq, tag, err, flushed);
+	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n",
+						req->wq, tag, err);
 
 	if (req->status == REQ_STATUS_ERROR) {
 		P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
 		err = req->t_err;
-	} else if (err == -ERESTARTSYS && flushed) {
-		P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n");
-		goto again;
-	} else if (req->status == REQ_STATUS_FLSHD) {
-		P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n");
-		err = -ERESTARTSYS;
 	}
 
-	if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) {
+	if ((err == -ERESTARTSYS) && (c->status == Connected)) {
 		P9_DPRINTK(P9_DEBUG_MUX, "flushing\n");
-		spin_lock_irqsave(&c->lock, flags);
-		if (req->status == REQ_STATUS_SENT)
-			req->status = REQ_STATUS_FLSH;
-		spin_unlock_irqrestore(&c->lock, flags);
 		sigpending = 1;
-		flushed = 1;
 		clear_thread_flag(TIF_SIGPENDING);
 
-		if (c->trans_mod->cancel(c, req)) {
-			err = p9_client_flush(c, req);
-			if (err == 0)
-				goto again;
-		}
+		if (c->trans_mod->cancel(c, req))
+			p9_client_flush(c, req);
+
+		/* if we received the response anyway, don't signal error */
+		if (req->status == REQ_STATUS_RCVD)
+			err = 0;
 	}
 
 	if (sigpending) {
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index c613ed08a5e..a2a1814c7a8 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -213,8 +213,8 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
 	spin_unlock_irqrestore(&m->client->lock, flags);
 
 	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
-		list_del(&req->req_list);
 		P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req);
+		list_del(&req->req_list);
 		p9_client_cb(m->client, req);
 	}
 }
@@ -336,7 +336,8 @@ static void p9_read_work(struct work_struct *work)
 			"mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
 
 		m->req = p9_tag_lookup(m->client, tag);
-		if (!m->req) {
+		if (!m->req || (m->req->status != REQ_STATUS_SENT &&
+					m->req->status != REQ_STATUS_FLSH)) {
 			P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
 								 tag);
 			err = -EIO;
@@ -361,10 +362,11 @@ static void p9_read_work(struct work_struct *work)
 	if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
 		P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n");
 		spin_lock(&m->client->lock);
+		if (m->req->status != REQ_STATUS_ERROR)
+			m->req->status = REQ_STATUS_RCVD;
 		list_del(&m->req->req_list);
 		spin_unlock(&m->client->lock);
 		p9_client_cb(m->client, m->req);
-
 		m->rbuf = NULL;
 		m->rpos = 0;
 		m->rsize = 0;
@@ -454,6 +456,7 @@ static void p9_write_work(struct work_struct *work)
 		req = list_entry(m->unsent_req_list.next, struct p9_req_t,
 			       req_list);
 		req->status = REQ_STATUS_SENT;
+		P9_DPRINTK(P9_DEBUG_TRANS, "move req %p\n", req);
 		list_move_tail(&req->req_list, &m->req_list);
 
 		m->wbuf = req->tc->sdata;
@@ -683,12 +686,13 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 	P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
 
 	spin_lock(&client->lock);
-	list_del(&req->req_list);
 
 	if (req->status == REQ_STATUS_UNSENT) {
+		list_del(&req->req_list);
 		req->status = REQ_STATUS_FLSHD;
 		ret = 0;
-	}
+	} else if (req->status == REQ_STATUS_SENT)
+		req->status = REQ_STATUS_FLSH;
 
 	spin_unlock(&client->lock);
 
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 7fa0eb20b2f..ac4990041eb 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -295,6 +295,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
 		goto err_out;
 
 	req->rc = c->rc;
+	req->status = REQ_STATUS_RCVD;
 	p9_client_cb(client, req);
 
 	return;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 2d7781ec663..bb8579a141a 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -134,6 +134,7 @@ static void req_done(struct virtqueue *vq)
 		P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
 		P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
 		req = p9_tag_lookup(chan->client, rc->tag);
+		req->status = REQ_STATUS_RCVD;
 		p9_client_cb(chan->client, req);
 	}
 }
-- 
cgit v1.2.3


From 9b8de7479d0dbab1ed98b5b015d44232c9d3d08e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 21 Apr 2009 23:00:24 +0100
Subject: FRV: Fix the section attribute on UP DECLARE_PER_CPU()

In non-SMP mode, the variable section attribute specified by DECLARE_PER_CPU()
does not agree with that specified by DEFINE_PER_CPU().  This means that
architectures that have a small data section references relative to a base
register may throw up linkage errors due to too great a displacement between
where the base register points and the per-CPU variable.

On FRV, the .h declaration says that the variable is in the .sdata section, but
the .c definition says it's actually in the .data section.  The linker throws
up the following errors:

kernel/built-in.o: In function `release_task':
kernel/exit.c:78: relocation truncated to fit: R_FRV_GPREL12 against symbol `per_cpu__process_counts' defined in .data section in kernel/built-in.o
kernel/exit.c:78: relocation truncated to fit: R_FRV_GPREL12 against symbol `per_cpu__process_counts' defined in .data section in kernel/built-in.o

To fix this, DECLARE_PER_CPU() should simply apply the same section attribute
as does DEFINE_PER_CPU().  However, this is made slightly more complex by
virtue of the fact that there are several variants on DEFINE, so these need to
be matched by variants on DECLARE.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/rds/rds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/rds.h b/net/rds/rds.h
index 619f0a30a4e..71794449ca4 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -638,7 +638,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *,
 void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
 
 /* stats.c */
-DECLARE_PER_CPU(struct rds_statistics, rds_stats);
+DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
 #define rds_stats_inc_which(which, member) do {		\
 	per_cpu(which, get_cpu()).member++;		\
 	put_cpu();					\
-- 
cgit v1.2.3


From d0687be7c7ae21461da4438d5fd059b48487bfe1 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 3 Apr 2009 15:18:24 -0500
Subject: svcrdma: Fix dma map direction for rdma read targets

The nfs server rdma transport was mapping rdma read target pages for
TO_DEVICE instead of FROM_DEVICE.  This causes data corruption on non
cache-coherent systems if frmrs are used.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 629a28764da..42a6f9f2028 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -265,7 +265,7 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
 		frmr->page_list->page_list[page_no] =
 			ib_dma_map_single(xprt->sc_cm_id->device,
 					  page_address(rqstp->rq_arg.pages[page_no]),
-					  PAGE_SIZE, DMA_TO_DEVICE);
+					  PAGE_SIZE, DMA_FROM_DEVICE);
 		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
 					 frmr->page_list->page_list[page_no]))
 			goto fatal_err;
-- 
cgit v1.2.3


From 21515e46bc6a6279dd13f6c01898ada9720100a3 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Wed, 29 Apr 2009 14:14:00 -0500
Subject: svcrdma: clean up error paths.

These fixes resolved crashes due to resource leak BUG_ON checks. The
resource leaks were detected by introducing asynchronous transport errors.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 3 +++
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 6c26a675435..8b510c5e877 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -183,6 +183,7 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
 
  fatal_err:
 	printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
+	vec->frmr = NULL;
 	svc_rdma_put_frmr(xprt, frmr);
 	return -EIO;
 }
@@ -516,6 +517,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		       "svcrdma: could not post a receive buffer, err=%d."
 		       "Closing transport %p.\n", ret, rdma);
 		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+		svc_rdma_put_frmr(rdma, vec->frmr);
 		svc_rdma_put_context(ctxt, 0);
 		return -ENOTCONN;
 	}
@@ -606,6 +608,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	return 0;
 
  err:
+	svc_rdma_unmap_dma(ctxt);
 	svc_rdma_put_frmr(rdma, vec->frmr);
 	svc_rdma_put_context(ctxt, 1);
 	return -EIO;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 3d810e7df3f..4b0c2fa15e0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -520,8 +520,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 	svc_xprt_get(&xprt->sc_xprt);
 	ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
 	if (ret) {
-		svc_xprt_put(&xprt->sc_xprt);
+		svc_rdma_unmap_dma(ctxt);
 		svc_rdma_put_context(ctxt, 1);
+		svc_xprt_put(&xprt->sc_xprt);
 	}
 	return ret;
 
-- 
cgit v1.2.3


From e81963b180ac502fda0326edf059b1e29cdef1a2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 8 May 2009 12:45:26 -0700
Subject: ipv4: Make INET_LRO a bool instead of tristate.

This code is used as a library by several device drivers,
which select INET_LRO.

If some are modules and some are statically built into the
kernel, we get build failures if INET_LRO is modular.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index b2cf91e4cca..9d26a3da37e 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -407,7 +407,7 @@ config INET_XFRM_MODE_BEET
 	  If unsure, say Y.
 
 config INET_LRO
-	tristate "Large Receive Offload (ipv4/tcp)"
+	bool "Large Receive Offload (ipv4/tcp)"
 
 	---help---
 	  Support for Large Receive Offload (ipv4/tcp).
-- 
cgit v1.2.3


From be8be9eccbf2d908a7e56b3f7a71105cd88da06b Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Wed, 6 May 2009 15:02:29 +0000
Subject: ipvs: Fix IPv4 FWMARK virtual services
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the use of fwmarks to denote IPv4 virtual services
which was unfortunately broken as a result of the integration
of IPv6 support into IPVS, which was included in 2.6.28.

The problem arises because fwmarks are stored in the 4th octet
of a union nf_inet_addr .all, however in the case of IPv4 only
the first octet, corresponding to .ip, is assigned and compared.

In other words, using .all = { 0, 0, 0, htonl(svc->fwmark) always
results in a value of 0 (32bits) being stored for IPv4. This means
that one fwmark can be used, as it ends up being mapped to 0, but things
break down when multiple fwmarks are used, as they all end up being mapped
to 0.

As fwmarks are 32bits a reasonable fix seems to be to just store the fwmark
in .ip, and comparing and storing .ip when fwmarks are used.

This patch makes the assumption that in calls to ip_vs_ct_in_get()
and ip_vs_sched_persist() if the proto parameter is IPPROTO_IP then
we are dealing with an fwmark. I believe this is valid as ip_vs_in()
does fairly strict filtering on the protocol and IPPROTO_IP should
not be used in these calls unless explicitly passed when making
these calls for fwmarks in ip_vs_sched_persist().

Tested-by: Fabien Duchêne <fabien.duchene@student.uclouvain.be>
Cc: Joseph Mack NA3T <jmack@wm7d.net>
Cc: Julius Volz <julius.volz@gmail.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_conn.c | 9 +++++++--
 net/netfilter/ipvs/ip_vs_core.c | 4 ++--
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 60aba45023f..77bfdfeb966 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -260,7 +260,10 @@ struct ip_vs_conn *ip_vs_ct_in_get
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
 		if (cp->af == af &&
 		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
-		    ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+		    /* protocol should only be IPPROTO_IP if
+		     * d_addr is a fwmark */
+		    ip_vs_addr_equal(protocol == IPPROTO_IP ? AF_UNSPEC : af,
+		                     d_addr, &cp->vaddr) &&
 		    s_port == cp->cport && d_port == cp->vport &&
 		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
 		    protocol == cp->protocol) {
@@ -698,7 +701,9 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
 	cp->cport	   = cport;
 	ip_vs_addr_copy(af, &cp->vaddr, vaddr);
 	cp->vport	   = vport;
-	ip_vs_addr_copy(af, &cp->daddr, daddr);
+	/* proto should only be IPPROTO_IP if d_addr is a fwmark */
+	ip_vs_addr_copy(proto == IPPROTO_IP ? AF_UNSPEC : af,
+			&cp->daddr, daddr);
 	cp->dport          = dport;
 	cp->flags	   = flags;
 	spin_lock_init(&cp->lock);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index cb3e031335e..8dddb17a947 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -278,7 +278,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		 */
 		if (svc->fwmark) {
 			union nf_inet_addr fwmark = {
-				.all = { 0, 0, 0, htonl(svc->fwmark) }
+				.ip = htonl(svc->fwmark)
 			};
 
 			ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
@@ -306,7 +306,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 			 */
 			if (svc->fwmark) {
 				union nf_inet_addr fwmark = {
-					.all = { 0, 0, 0, htonl(svc->fwmark) }
+					.ip = htonl(svc->fwmark)
 				};
 
 				ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
-- 
cgit v1.2.3


From 384943ec1bb462e410390ad8f108ff1474cd882d Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 8 May 2009 18:20:43 -0700
Subject: Bluetooth: Fix wrong module refcount when connection setup fails

The module refcount is increased by hci_dev_hold() call in hci_conn_add()
and decreased by hci_dev_put() call in del_conn(). In case the connection
setup fails, hci_dev_put() is never called.

Procedure to reproduce the issue:

  # hciconfig hci0 up
  # lsmod | grep btusb                   -> "used by" refcount = 1

  # hcitool cc <non-exisiting bdaddr>    -> will get timeout

  # lsmod | grep btusb                   -> "used by" refcount = 2
  # hciconfig hci0 down
  # lsmod | grep btusb                   -> "used by" refcount = 1
  # rmmod btusb                          -> ERROR: Module btusb is in use

The hci_dev_put() call got moved into del_conn() with the 2.6.25 kernel
to fix an issue with hci_dev going away before hci_conn. However that
change was wrong and introduced this problem.

When calling hci_conn_del() it has to call hci_dev_put() after freeing
the connection details. This handling should be fully symmetric. The
execution of del_conn() is done in a work queue and needs it own calls
to hci_dev_hold() and hci_dev_put() to ensure that the hci_dev stays
until the connection cleanup has been finished.

Based on a report by Bing Zhao <bzhao@marvell.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Tested-by: Bing Zhao <bzhao@marvell.com>
---
 net/bluetooth/hci_conn.c  | 2 ++
 net/bluetooth/hci_sysfs.c | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 61309b26f27..85a1c6be2db 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -292,6 +292,8 @@ int hci_conn_del(struct hci_conn *conn)
 
 	hci_conn_del_sysfs(conn);
 
+	hci_dev_put(hdev);
+
 	return 0;
 }
 
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index a05d45eb3ba..4cc3624bd22 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -99,6 +99,8 @@ static void add_conn(struct work_struct *work)
 		BT_ERR("Failed to register connection device");
 		return;
 	}
+
+	hci_dev_hold(hdev);
 }
 
 /*
@@ -134,6 +136,7 @@ static void del_conn(struct work_struct *work)
 
 	device_del(&conn->dev);
 	put_device(&conn->dev);
+
 	hci_dev_put(hdev);
 }
 
-- 
cgit v1.2.3


From 1b0336bb36f88976f1210a65b62f6a3e9578ee7b Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 9 May 2009 12:04:08 -0700
Subject: Bluetooth: Don't use hci_acl_connect_cancel() for incoming
 connections

The connection setup phase takes around 2 seconds or longer and in
that time it is possible that the need for an ACL connection is no
longer present. If that happens then, the connection attempt will
be canceled.

This only applies to outgoing connections, but currently it can also
be triggered by incoming connection. Don't call hci_acl_connect_cancel()
on incoming connection since these have to be either accepted or rejected
in this state. Once they are successfully connected they need to be
fully disconnected anyway.

Also remove the wrong hci_acl_disconn() call for SCO and eSCO links
since at this stage they can't be disconnected either, because the
connection handle is still unknown.

Based on a report by Johan Hedberg <johan.hedberg@nokia.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Tested-by: Johan Hedberg <johan.hedberg@nokia.com>
---
 net/bluetooth/hci_conn.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 85a1c6be2db..fa47d5d84f5 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -171,10 +171,8 @@ static void hci_conn_timeout(unsigned long arg)
 	switch (conn->state) {
 	case BT_CONNECT:
 	case BT_CONNECT2:
-		if (conn->type == ACL_LINK)
+		if (conn->type == ACL_LINK && conn->out)
 			hci_acl_connect_cancel(conn);
-		else
-			hci_acl_disconn(conn, 0x13);
 		break;
 	case BT_CONFIG:
 	case BT_CONNECTED:
-- 
cgit v1.2.3


From 3d7a9d1c7ee251a04095d43eec5a3f4ff3f710a8 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 9 May 2009 12:09:21 -0700
Subject: Bluetooth: Don't trigger disconnect timeout for security mode 3
 pairing

A remote device in security mode 3 that tries to connect will require
the pairing during the connection setup phase. The disconnect timeout
is now triggered within 10 milliseconds and causes the pairing to fail.

If a connection is not fully established and a PIN code request is
received, don't trigger the disconnect timeout. The either successful
or failing connection complete event will make sure that the timeout
is triggered at the right time.

The biggest problem with security mode 3 is that many Bluetooth 2.0
device and before use a temporary security mode 3 for dedicated
bonding.

Based on a report by Johan Hedberg <johan.hedberg@nokia.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Tested-by: Johan Hedberg <johan.hedberg@nokia.com>
---
 net/bluetooth/hci_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4e7cb88e5da..184ba0a88ec 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1493,7 +1493,7 @@ static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
-	if (conn) {
+	if (conn && conn->state == BT_CONNECTED) {
 		hci_conn_hold(conn);
 		conn->disc_timeout = HCI_PAIRING_TIMEOUT;
 		hci_conn_put(conn);
-- 
cgit v1.2.3


From 621ad7c96aa138cfeab53cd4debc5a4e08b2189b Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 5 May 2009 15:18:26 -0400
Subject: mac80211: avoid NULL ptr deref when finding max_rates in PID and
 minstrel

"There is another problem with this piece of code. The sband will be NULL
after second iteration on single band device and cause null pointer
dereference. Everything is working with dual band card. Sorry, but i
don't know how to explain this clearly in English. I have looked on the
second patch for pid algorithm and found similar bug."

Reported-by: Karol Szuster <qflon@o2.pl>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel.c | 2 +-
 net/mac80211/rc80211_pid_algo.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 70df3dcc3cf..d9233ec5061 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -477,7 +477,7 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
 
 	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
 		sband = hw->wiphy->bands[i];
-		if (sband->n_bitrates > max_rates)
+		if (sband && sband->n_bitrates > max_rates)
 			max_rates = sband->n_bitrates;
 	}
 
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 01d59a8e334..8bef9a1262f 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -378,7 +378,7 @@ static void *rate_control_pid_alloc(struct ieee80211_hw *hw,
 
 	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
 		sband = hw->wiphy->bands[i];
-		if (sband->n_bitrates > max_rates)
+		if (sband && sband->n_bitrates > max_rates)
 			max_rates = sband->n_bitrates;
 	}
 
-- 
cgit v1.2.3


From 5e392739d6ab72f7c35040aa07f4097904bce6e7 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 11 May 2009 00:36:35 +0000
Subject: netpoll: don't dereference NULL dev from np

It looks like the dev in netpoll_poll can be NULL - at lease it's
checked at the function beginning. Thus the dev->netde_ops dereference
looks dangerous.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b5873bdff61..64f51eec657 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -175,9 +175,13 @@ static void service_arp_queue(struct netpoll_info *npi)
 void netpoll_poll(struct netpoll *np)
 {
 	struct net_device *dev = np->dev;
-	const struct net_device_ops *ops = dev->netdev_ops;
+	const struct net_device_ops *ops;
+
+	if (!dev || !netif_running(dev))
+		return;
 
-	if (!dev || !netif_running(dev) || !ops->ndo_poll_controller)
+	ops = dev->netdev_ops;
+	if (!ops->ndo_poll_controller)
 		return;
 
 	/* Process pending work on NIC */
-- 
cgit v1.2.3


From 2513dfb83fc775364fe85803d3a84d7ebe5763a5 Mon Sep 17 00:00:00 2001
From: Chris Friesen <cfriesen@nortel.com>
Date: Sun, 17 May 2009 20:39:33 -0700
Subject: ipconfig: handle case of delayed DHCP server

If a DHCP server is delayed, it's possible for the client to receive the
DHCPOFFER after it has already sent out a new DHCPDISCOVER message from
a second interface.  The client then sends out a DHCPREQUEST from the
second interface, but the server doesn't recognize the device and
rejects the request.

This patch simply tracks the current device being configured and throws
away the OFFER if it is not intended for the current device.  A more
sophisticated approach would be to put the OFFER information into the
struct ic_device rather than storing it globally.

Signed-off-by: Chris Friesen <cfriesen@nortel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 90d22ae0a41..88bf051d0cb 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -139,6 +139,8 @@ __be32 ic_servaddr = NONE;	/* Boot server IP address */
 __be32 root_server_addr = NONE;	/* Address of NFS server */
 u8 root_server_path[256] = { 0, };	/* Path to mount as root */
 
+u32 ic_dev_xid;		/* Device under configuration */
+
 /* vendor class identifier */
 static char vendor_class_identifier[253] __initdata;
 
@@ -932,6 +934,13 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 		goto drop_unlock;
 	}
 
+	/* Is it a reply for the device we are configuring? */
+	if (b->xid != ic_dev_xid) {
+		if (net_ratelimit())
+			printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n");
+		goto drop_unlock;
+	}
+
 	/* Parse extensions */
 	if (ext_len >= 4 &&
 	    !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */
@@ -1115,6 +1124,9 @@ static int __init ic_dynamic(void)
 	get_random_bytes(&timeout, sizeof(timeout));
 	timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
 	for (;;) {
+		/* Track the device we are configuring */
+		ic_dev_xid = d->xid;
+
 #ifdef IPCONFIG_BOOTP
 		if (do_bootp && (d->able & IC_BOOTP))
 			ic_bootp_send_if(d, jiffies - start_jiffies);
-- 
cgit v1.2.3


From a598f6aebea2481531b0757ed90cfb0d8cf1d8f5 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 15 May 2009 06:10:13 +0000
Subject: bridge: relay bridge multicast pkgs if !STP

Currently the bridge catches all STP packets; even if STP is turned
off.  This prevents other systems (which do have STP turned on)
from being able to detect loops in the network.

With this patch, if STP is off, then any packet sent to the STP
multicast group address is forwarded to all ports.

Based on earlier patch by Joakim Tjernlund with changes
to go through forwarding (not local chain), and optimization
that only last octet needs to be checked.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 30b88777c3d..5ee1a3682bf 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -134,6 +134,10 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 		if (skb->protocol == htons(ETH_P_PAUSE))
 			goto drop;
 
+		/* If STP is turned off, then forward */
+		if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0)
+			goto forward;
+
 		if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
 			    NULL, br_handle_local_finish))
 			return NULL;	/* frame consumed by filter */
@@ -141,6 +145,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 			return skb;	/* continue processing */
 	}
 
+forward:
 	switch (p->state) {
 	case BR_STATE_FORWARDING:
 		rhook = rcu_dereference(br_should_route_hook);
-- 
cgit v1.2.3


From 4f0611af47e25807cf18cd2b4d4e94206c75b29e Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 15 May 2009 06:11:58 +0000
Subject: bridge: fix initial packet flood if !STP

If bridge is configured with no STP and forwarding delay of 0 (which
is typical for virtualization) then when link starts it will flood all
packets for the first 20 seconds.

This bug was introduced by a combination of earlier changes:
  * forwarding database uses hold time of zero to indicate
    user wants to always flood packets
  * optimzation of the case of forwarding delay of 0 avoids the initial
    timer tick

The fix is to just skip all the topology change detection code if
kernel STP is not being used.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 6e63ec3f1fc..0660515f399 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -297,6 +297,9 @@ void br_topology_change_detection(struct net_bridge *br)
 {
 	int isroot = br_is_root_bridge(br);
 
+	if (br->stp_enabled != BR_KERNEL_STP)
+		return;
+
 	pr_info("%s: topology change detected, %s\n", br->dev->name,
 		isroot ? "propagating" : "sending tcn bpdu");
 
-- 
cgit v1.2.3


From 775273131810caa41dfc7f9e552ea5d8508caf40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 10 May 2009 20:32:34 +0000
Subject: tcp: fix MSG_PEEK race check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 518a09ef11 (tcp: Fix recvmsg MSG_PEEK influence of
blocking behavior) lets the loop run longer than the race check
did previously expect, so we need to be more careful with this
check and consider the work we have been doing.

I tried my best to deal with urg hole madness too which happens
here:
	if (!sock_flag(sk, SOCK_URGINLINE)) {
		++*seq;
		...
by using additional offset by one but I certainly have very
little interest in testing that part.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Tested-by: Frans Pop <elendil@planet.nl>
Tested-by: Ian Zimmermann <itz@buug.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1d7f49c6f0c..7a0f0b27bf1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1321,6 +1321,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	struct task_struct *user_recv = NULL;
 	int copied_early = 0;
 	struct sk_buff *skb;
+	u32 urg_hole = 0;
 
 	lock_sock(sk);
 
@@ -1532,7 +1533,8 @@ do_prequeue:
 				}
 			}
 		}
-		if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {
+		if ((flags & MSG_PEEK) &&
+		    (peek_seq - copied - urg_hole != tp->copied_seq)) {
 			if (net_ratelimit())
 				printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
 				       current->comm, task_pid_nr(current));
@@ -1553,6 +1555,7 @@ do_prequeue:
 				if (!urg_offset) {
 					if (!sock_flag(sk, SOCK_URGINLINE)) {
 						++*seq;
+						urg_hole++;
 						offset++;
 						used--;
 						if (!used)
-- 
cgit v1.2.3


From c0f84d0d4be3f7d818b4ffb04d27f9bae64397f0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 18 May 2009 15:12:31 -0700
Subject: sch_teql: should not dereference skb after ndo_start_xmit()

It is illegal to dereference a skb after a successful ndo_start_xmit()
call. We must store skb length in a local variable instead.

Bug was introduced in 2.6.27 by commit 0abf77e55a2459aa9905be4b226e4729d5b4f0cb
(net_sched: Add accessor function for packet length for qdiscs)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_teql.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index ec697cebb63..3b641829723 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -303,6 +303,8 @@ restart:
 		switch (teql_resolve(skb, skb_res, slave)) {
 		case 0:
 			if (__netif_tx_trylock(slave_txq)) {
+				unsigned int length = qdisc_pkt_len(skb);
+
 				if (!netif_tx_queue_stopped(slave_txq) &&
 				    !netif_tx_queue_frozen(slave_txq) &&
 				    slave_ops->ndo_start_xmit(skb, slave) == 0) {
@@ -310,8 +312,7 @@ restart:
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
 					master->stats.tx_packets++;
-					master->stats.tx_bytes +=
-						qdisc_pkt_len(skb);
+					master->stats.tx_bytes += length;
 					return 0;
 				}
 				__netif_tx_unlock(slave_txq);
-- 
cgit v1.2.3


From 511e11e396dc596825ce04d53d7f6d579404bc01 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 18 May 2009 19:26:37 -0700
Subject: pkt_sched: gen_estimator: use 64 bit intermediate counters for bps

gen_estimator can overflow bps (bytes per second) with Gb links, while
it was designed with a u32 API, with a theorical limit of 34360Mbit
(2^32 bytes)

Using 64 bit intermediate avbps/brate counters can allow us to reach
this theorical limit.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 9cc9f95b109..6d62d4618cf 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -66,9 +66,9 @@
 
    NOTES.
 
-   * The stored value for avbps is scaled by 2^5, so that maximal
-     rate is ~1Gbit, avpps is scaled by 2^10.
-
+   * avbps is scaled by 2^5, avpps is scaled by 2^10.
+   * both values are reported as 32 bit unsigned values. bps can
+     overflow for fast links : max speed being 34360Mbit/sec
    * Minimal interval is HZ/4=250msec (it is the greatest common divisor
      for HZ=100 and HZ=1024 8)), maximal interval
      is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
@@ -86,9 +86,9 @@ struct gen_estimator
 	spinlock_t		*stats_lock;
 	int			ewma_log;
 	u64			last_bytes;
+	u64			avbps;
 	u32			last_packets;
 	u32			avpps;
-	u32			avbps;
 	struct rcu_head		e_rcu;
 	struct rb_node		node;
 };
@@ -115,6 +115,7 @@ static void est_timer(unsigned long arg)
 	rcu_read_lock();
 	list_for_each_entry_rcu(e, &elist[idx].list, list) {
 		u64 nbytes;
+		u64 brate;
 		u32 npackets;
 		u32 rate;
 
@@ -125,9 +126,9 @@ static void est_timer(unsigned long arg)
 
 		nbytes = e->bstats->bytes;
 		npackets = e->bstats->packets;
-		rate = (nbytes - e->last_bytes)<<(7 - idx);
+		brate = (nbytes - e->last_bytes)<<(7 - idx);
 		e->last_bytes = nbytes;
-		e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
+		e->avbps += ((s64)(brate - e->avbps)) >> e->ewma_log;
 		e->rate_est->bps = (e->avbps+0xF)>>5;
 
 		rate = (npackets - e->last_packets)<<(12 - idx);
-- 
cgit v1.2.3