From 2f28c8b31dc501027d9aa6acf496c5941736312b Mon Sep 17 00:00:00 2001
From: Jim Garlick <garlick@llnl.gov>
Date: Wed, 29 May 2013 12:15:07 -0700
Subject: net/9p: add privport option to 9p tcp transport

If the privport option is specified, the tcp transport binds local
address to a reserved port before connecting to the 9p server.

In some cases when 9P AUTH cannot be implemented, this is better than
nothing.

Signed-off-by: Jim Garlick <garlick@llnl.gov>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 02efb25c2957..3ffda1b3799b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -63,6 +63,7 @@ struct p9_fd_opts {
 	int rfd;
 	int wfd;
 	u16 port;
+	int privport;
 };
 
 /**
@@ -87,12 +88,15 @@ struct p9_trans_fd {
 enum {
 	/* Options that take integer arguments */
 	Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
+	/* Options that take no arguments */
+	Opt_privport,
 };
 
 static const match_table_t tokens = {
 	{Opt_port, "port=%u"},
 	{Opt_rfdno, "rfdno=%u"},
 	{Opt_wfdno, "wfdno=%u"},
+	{Opt_privport, "privport"},
 	{Opt_err, NULL},
 };
 
@@ -161,6 +165,9 @@ static DEFINE_SPINLOCK(p9_poll_lock);
 static LIST_HEAD(p9_poll_pending_list);
 static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
 
+static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
+static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
+
 static void p9_mux_poll_stop(struct p9_conn *m)
 {
 	unsigned long flags;
@@ -741,7 +748,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 		if (!*p)
 			continue;
 		token = match_token(p, tokens, args);
-		if (token != Opt_err) {
+		if ((token != Opt_err) && (token != Opt_privport)) {
 			r = match_int(&args[0], &option);
 			if (r < 0) {
 				p9_debug(P9_DEBUG_ERROR,
@@ -759,6 +766,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 		case Opt_wfdno:
 			opts->wfd = option;
 			break;
+		case Opt_privport:
+			opts->privport = 1;
+			break;
 		default:
 			continue;
 		}
@@ -898,6 +908,24 @@ static inline int valid_ipaddr4(const char *buf)
 	return 0;
 }
 
+static int p9_bind_privport(struct socket *sock)
+{
+	struct sockaddr_in cl;
+	int port, err = -EINVAL;
+
+	memset(&cl, 0, sizeof(cl));
+	cl.sin_family = AF_INET;
+	cl.sin_addr.s_addr = INADDR_ANY;
+	for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
+		cl.sin_port = htons((ushort)port);
+		err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
+		if (err != -EADDRINUSE)
+			break;
+	}
+	return err;
+}
+
+
 static int
 p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 {
@@ -926,6 +954,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 		return err;
 	}
 
+	if (opts.privport) {
+		err = p9_bind_privport(csocket);
+		if (err < 0) {
+			pr_err("%s (%d): problem binding to privport\n",
+			       __func__, task_pid_nr(current));
+			sock_release(csocket);
+			return err;
+		}
+	}
+
 	err = csocket->ops->connect(csocket,
 				    (struct sockaddr *)&sin_server,
 				    sizeof(struct sockaddr_in), 0);
-- 
cgit v1.2.3


From ea071aa1365eaf8a79b33bd8699cb0811dcddf34 Mon Sep 17 00:00:00 2001
From: Simon Derr <simon.derr@bull.net>
Date: Fri, 21 Jun 2013 15:32:34 +0200
Subject: 9P: Fix fcall allocation for rdma

The current code assumes that when a request in the request array
does have a tc, it also has a rc.

This is normally true, but not always : when using RDMA, req->rc
will temporarily be set to NULL after the request has been sent.
That is usually OK though, as when the reply arrives, req->rc will be
reassigned to a sane value before the request is recycled.

But there is a catch : if the request is flushed, the reply will never
arrive, and req->rc will be NULL, but not req->tc.

This patch fixes p9_tag_alloc to take this into account.

Signed-off-by: Simon Derr <simon.derr@bull.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 01f1779eba80..5828769d1f3d 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -258,27 +258,25 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
 	req = &c->reqs[row][col];
 	if (!req->tc) {
 		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
-		if (!req->wq) {
-			pr_err("Couldn't grow tag array\n");
-			return ERR_PTR(-ENOMEM);
-		}
+		if (!req->wq)
+			goto grow_failed;
+
 		init_waitqueue_head(req->wq);
 		req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
 				  GFP_NOFS);
+		if (!req->tc)
+			goto grow_failed;
+
+		req->tc->capacity = alloc_msize;
+		req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
+	}
+	if (!req->rc) {
 		req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
 				  GFP_NOFS);
-		if ((!req->tc) || (!req->rc)) {
-			pr_err("Couldn't grow tag array\n");
-			kfree(req->tc);
-			kfree(req->rc);
-			kfree(req->wq);
-			req->tc = req->rc = NULL;
-			req->wq = NULL;
-			return ERR_PTR(-ENOMEM);
-		}
-		req->tc->capacity = alloc_msize;
+		if (!req->rc)
+			goto grow_failed;
+
 		req->rc->capacity = alloc_msize;
-		req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
 		req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
 	}
 
@@ -288,7 +286,16 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
 	req->tc->tag = tag-1;
 	req->status = REQ_STATUS_ALLOC;
 
-	return &c->reqs[row][col];
+	return req;
+
+grow_failed:
+	pr_err("Couldn't grow tag array\n");
+	kfree(req->tc);
+	kfree(req->rc);
+	kfree(req->wq);
+	req->tc = req->rc = NULL;
+	req->wq = NULL;
+	return ERR_PTR(-ENOMEM);
 }
 
 /**
-- 
cgit v1.2.3


From 17b6fd9d6dfa0faed3a25a6045f7456821ea140a Mon Sep 17 00:00:00 2001
From: Simon Derr <simon.derr@bull.net>
Date: Fri, 21 Jun 2013 15:32:35 +0200
Subject: 9P/RDMA: rdma_request() needs not allocate req->rc

p9_tag_alloc() takes care of that.

Signed-off-by: Simon Derr <simon.derr@bull.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_rdma.c | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 2c69ddd691a1..b1dfdf2078ff 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -427,26 +427,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 		err = -ENOMEM;
 		goto err_close;
 	}
-
-	/*
-	 * If the request has a buffer, steal it, otherwise
-	 * allocate a new one.  Typically, requests should already
-	 * have receive buffers allocated and just swap them around
-	 */
-	if (!req->rc) {
-		req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
-				  GFP_NOFS);
-		if (req->rc) {
-			req->rc->sdata = (char *) req->rc +
-						sizeof(struct p9_fcall);
-			req->rc->capacity = client->msize;
-		}
-	}
 	rpl_context->rc = req->rc;
-	if (!rpl_context->rc) {
-		err = -ENOMEM;
-		goto err_free2;
-	}
 
 	/*
 	 * Post a receive buffer for this request. We need to ensure
-- 
cgit v1.2.3


From 5387320d4814aa1e40b50529d960a8f2b3340535 Mon Sep 17 00:00:00 2001
From: Simon Derr <simon.derr@bull.net>
Date: Fri, 21 Jun 2013 15:32:36 +0200
Subject: 9pnet: refactor struct p9_fcall alloc code

Signed-off-by: Simon Derr <simon.derr@bull.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 5828769d1f3d..db5bf2480a33 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -204,6 +204,17 @@ free_and_return:
 	return ret;
 }
 
+struct p9_fcall *p9_fcall_alloc(int alloc_msize)
+{
+	struct p9_fcall *fc;
+	fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
+	if (!fc)
+		return NULL;
+	fc->capacity = alloc_msize;
+	fc->sdata = (char *) fc + sizeof(struct p9_fcall);
+	return fc;
+}
+
 /**
  * p9_tag_alloc - lookup/allocate a request by tag
  * @c: client session to lookup tag within
@@ -256,29 +267,19 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
 	col = tag % P9_ROW_MAXTAG;
 
 	req = &c->reqs[row][col];
-	if (!req->tc) {
+	if (!req->wq) {
 		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
 		if (!req->wq)
 			goto grow_failed;
-
 		init_waitqueue_head(req->wq);
-		req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
-				  GFP_NOFS);
-		if (!req->tc)
-			goto grow_failed;
-
-		req->tc->capacity = alloc_msize;
-		req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
 	}
-	if (!req->rc) {
-		req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
-				  GFP_NOFS);
-		if (!req->rc)
-			goto grow_failed;
 
-		req->rc->capacity = alloc_msize;
-		req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
-	}
+	if (!req->tc)
+		req->tc = p9_fcall_alloc(alloc_msize);
+	if (!req->rc)
+		req->rc = p9_fcall_alloc(alloc_msize);
+	if (!req->tc || !req->rc)
+		goto grow_failed;
 
 	p9pdu_reset(req->tc);
 	p9pdu_reset(req->rc);
-- 
cgit v1.2.3


From 3fcc62f4e8620fd5f85f957a5e708e69a20adb51 Mon Sep 17 00:00:00 2001
From: Simon Derr <simon.derr@bull.net>
Date: Fri, 21 Jun 2013 15:32:37 +0200
Subject: 9P/RDMA: increase P9_RDMA_MAXSIZE to 1MB

The current value is too low to get good performance.

Signed-off-by: Simon Derr <simon.derr@bull.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_rdma.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index b1dfdf2078ff..b8b66d38f5b0 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -57,9 +57,7 @@
 #define P9_RDMA_IRD		0
 #define P9_RDMA_ORD		0
 #define P9_RDMA_TIMEOUT		30000		/* 30 seconds */
-#define P9_RDMA_MAXSIZE		(4*4096)	/* Min SGE is 4, so we can
-						 * safely advertise a maxsize
-						 * of 64k */
+#define P9_RDMA_MAXSIZE		(1024*1024)	/* 1MB */
 
 /**
  * struct p9_trans_rdma - RDMA transport instance
-- 
cgit v1.2.3


From 47229ff85e5a0b0613df2288d212938aeb9687da Mon Sep 17 00:00:00 2001
From: Simon Derr <simon.derr@bull.net>
Date: Fri, 21 Jun 2013 15:32:38 +0200
Subject: 9P/RDMA: Protect against duplicate replies

A well-behaved server would not send twice the reply to a request.
But if it ever happens...
This additional check prevents the kernel from leaking memory
and possibly more nasty consequences in that unlikely event.

Signed-off-by: Simon Derr <simon.derr@bull.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_rdma.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index b8b66d38f5b0..274a9c1d3c3d 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -294,6 +294,13 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
 	if (!req)
 		goto err_out;
 
+	/* Check that we have not yet received a reply for this request.
+	 */
+	if (unlikely(req->rc)) {
+		pr_err("Duplicate reply for request %d", tag);
+		goto err_out;
+	}
+
 	req->rc = c->rc;
 	req->status = REQ_STATUS_RCVD;
 	p9_client_cb(client, req);
-- 
cgit v1.2.3


From fd453d0ed6c1dacef8eff466df473d62d63db1e9 Mon Sep 17 00:00:00 2001
From: Simon Derr <simon.derr@bull.net>
Date: Fri, 21 Jun 2013 15:32:39 +0200
Subject: 9P/RDMA: Use a semaphore to protect the RQ

The current code keeps track of the number of buffers posted in the RQ,
and will prevent it from overflowing. But it does so by simply dropping
post requests (And leaking memory in the process).
When this happens there will actually be too few buffers posted, and
soon the 9P server will complain about 'RNR retry counter exceeded'
errors.

Instead, use a semaphore, and block until the RQ is ready for another
buffer to be posted.

Signed-off-by: Simon Derr <simon.derr@bull.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_rdma.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 274a9c1d3c3d..ad8dc331574b 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -73,7 +73,7 @@
  * @sq_depth: The depth of the Send Queue
  * @sq_sem: Semaphore for the SQ
  * @rq_depth: The depth of the Receive Queue.
- * @rq_count: Count of requests in the Receive Queue.
+ * @rq_sem: Semaphore for the RQ
  * @addr: The remote peer's address
  * @req_lock: Protects the active request list
  * @cm_done: Completion event for connection management tracking
@@ -98,7 +98,7 @@ struct p9_trans_rdma {
 	int sq_depth;
 	struct semaphore sq_sem;
 	int rq_depth;
-	atomic_t rq_count;
+	struct semaphore rq_sem;
 	struct sockaddr_in addr;
 	spinlock_t req_lock;
 
@@ -341,8 +341,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
 
 		switch (c->wc_op) {
 		case IB_WC_RECV:
-			atomic_dec(&rdma->rq_count);
 			handle_recv(client, rdma, c, wc.status, wc.byte_len);
+			up(&rdma->rq_sem);
 			break;
 
 		case IB_WC_SEND:
@@ -441,12 +441,14 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	 * outstanding request, so we must keep a count to avoid
 	 * overflowing the RQ.
 	 */
-	if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
-		err = post_recv(client, rpl_context);
-		if (err)
-			goto err_free1;
-	} else
-		atomic_dec(&rdma->rq_count);
+	if (down_interruptible(&rdma->rq_sem))
+		goto error; /* FIXME : -EINTR instead */
+
+	err = post_recv(client, rpl_context);
+	if (err) {
+		p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
+		goto err_free1;
+	}
 
 	/* remove posted receive buffer from request structure */
 	req->rc = NULL;
@@ -537,7 +539,7 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
 	spin_lock_init(&rdma->req_lock);
 	init_completion(&rdma->cm_done);
 	sema_init(&rdma->sq_sem, rdma->sq_depth);
-	atomic_set(&rdma->rq_count, 0);
+	sema_init(&rdma->rq_sem, rdma->rq_depth);
 
 	return rdma;
 }
-- 
cgit v1.2.3


From b530e252e291c27fdcb1b73c72ad17f75c8bdba6 Mon Sep 17 00:00:00 2001
From: Simon Derr <simon.derr@bull.net>
Date: Fri, 21 Jun 2013 15:32:40 +0200
Subject: 9P/RDMA: Do not free req->rc in error handling in rdma_request()

rdma_request() should never be in charge of freeing rc.

When an error occurs:
* Either the rc buffer has been recv_post()'ed.
  then kfree()'ing it certainly is a bad idea.
* Or is has not, and in that case req->rc still points to it,
  hence it needs not be freed.

Signed-off-by: Simon Derr <simon.derr@bull.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_rdma.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index ad8dc331574b..1bd4c7150114 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -447,7 +447,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	err = post_recv(client, rpl_context);
 	if (err) {
 		p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
-		goto err_free1;
+		goto err_free;
 	}
 
 	/* remove posted receive buffer from request structure */
@@ -457,7 +457,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	c = kmalloc(sizeof *c, GFP_NOFS);
 	if (!c) {
 		err = -ENOMEM;
-		goto err_free1;
+		goto err_free;
 	}
 	c->req = req;
 
@@ -486,13 +486,10 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 
  error:
 	kfree(c);
-	kfree(rpl_context->rc);
 	kfree(rpl_context);
 	p9_debug(P9_DEBUG_ERROR, "EIO\n");
 	return -EIO;
- err_free1:
-	kfree(rpl_context->rc);
- err_free2:
+ err_free:
 	kfree(rpl_context);
  err_close:
 	spin_lock_irqsave(&rdma->req_lock, flags);
-- 
cgit v1.2.3


From 2f52d07cb75d96fcbb5b9ab72938590fa9ffb19d Mon Sep 17 00:00:00 2001
From: Simon Derr <simon.derr@bull.net>
Date: Fri, 21 Jun 2013 15:32:41 +0200
Subject: 9P/RDMA: Improve error handling in rdma_request

Most importantly:
- do not free the recv context (rpl_context) after a successful post_recv()
- but do free the send context (c) after a failed send.

Signed-off-by: Simon Derr <simon.derr@bull.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_rdma.c | 44 ++++++++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 1bd4c7150114..926e72d00e57 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -430,7 +430,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
 	if (!rpl_context) {
 		err = -ENOMEM;
-		goto err_close;
+		goto recv_error;
 	}
 	rpl_context->rc = req->rc;
 
@@ -441,13 +441,15 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	 * outstanding request, so we must keep a count to avoid
 	 * overflowing the RQ.
 	 */
-	if (down_interruptible(&rdma->rq_sem))
-		goto error; /* FIXME : -EINTR instead */
+	if (down_interruptible(&rdma->rq_sem)) {
+		err = -EINTR;
+		goto recv_error;
+	}
 
 	err = post_recv(client, rpl_context);
 	if (err) {
 		p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
-		goto err_free;
+		goto recv_error;
 	}
 
 	/* remove posted receive buffer from request structure */
@@ -457,15 +459,17 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	c = kmalloc(sizeof *c, GFP_NOFS);
 	if (!c) {
 		err = -ENOMEM;
-		goto err_free;
+		goto send_error;
 	}
 	c->req = req;
 
 	c->busa = ib_dma_map_single(rdma->cm_id->device,
 				    c->req->tc->sdata, c->req->tc->size,
 				    DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
-		goto error;
+	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
+		err = -EIO;
+		goto send_error;
+	}
 
 	sge.addr = c->busa;
 	sge.length = c->req->tc->size;
@@ -479,19 +483,27 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	wr.sg_list = &sge;
 	wr.num_sge = 1;
 
-	if (down_interruptible(&rdma->sq_sem))
-		goto error;
+	if (down_interruptible(&rdma->sq_sem)) {
+		err = -EINTR;
+		goto send_error;
+	}
 
-	return ib_post_send(rdma->qp, &wr, &bad_wr);
+	err = ib_post_send(rdma->qp, &wr, &bad_wr);
+	if (err)
+		goto send_error;
 
- error:
+	/* Success */
+	return 0;
+
+ /* Handle errors that happened during or while preparing the send: */
+ send_error:
 	kfree(c);
+	p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
+	return err;
+
+ /* Handle errors that happened during or while preparing post_recv(): */
+ recv_error:
 	kfree(rpl_context);
-	p9_debug(P9_DEBUG_ERROR, "EIO\n");
-	return -EIO;
- err_free:
-	kfree(rpl_context);
- err_close:
 	spin_lock_irqsave(&rdma->req_lock, flags);
 	if (rdma->state < P9_RDMA_CLOSING) {
 		rdma->state = P9_RDMA_CLOSING;
-- 
cgit v1.2.3


From 1cff33069a4a1ac9ed080756113ecd17ad408282 Mon Sep 17 00:00:00 2001
From: Simon Derr <simon.derr@bull.net>
Date: Fri, 21 Jun 2013 15:32:42 +0200
Subject: 9P/RDMA: count posted buffers without a pending request

In rdma_request():

If an error occurs between posting the recv and the send,
there will be a reply context posted without a pending
request.
Since there is no way to "un-post" it, we remember it and
skip post_recv() for the next request.

Signed-off-by: Simon Derr <simon.derr@bull.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c     |  6 ++++--
 net/9p/trans_rdma.c | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index db5bf2480a33..d18a0b22f62c 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -656,8 +656,10 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
 		return PTR_ERR(req);
 
 
-	/* if we haven't received a response for oldreq,
-	   remove it from the list. */
+	/*
+	 * if we haven't received a response for oldreq,
+	 * remove it from the list.
+	 */
 	spin_lock(&c->lock);
 	if (oldreq->status == REQ_STATUS_FLSH)
 		list_del(&oldreq->req_list);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 926e72d00e57..8f68df5d2973 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -74,6 +74,8 @@
  * @sq_sem: Semaphore for the SQ
  * @rq_depth: The depth of the Receive Queue.
  * @rq_sem: Semaphore for the RQ
+ * @excess_rc : Amount of posted Receive Contexts without a pending request.
+ *		See rdma_request()
  * @addr: The remote peer's address
  * @req_lock: Protects the active request list
  * @cm_done: Completion event for connection management tracking
@@ -99,6 +101,7 @@ struct p9_trans_rdma {
 	struct semaphore sq_sem;
 	int rq_depth;
 	struct semaphore rq_sem;
+	atomic_t excess_rc;
 	struct sockaddr_in addr;
 	spinlock_t req_lock;
 
@@ -426,6 +429,26 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	struct p9_rdma_context *c = NULL;
 	struct p9_rdma_context *rpl_context = NULL;
 
+	/* When an error occurs between posting the recv and the send,
+	 * there will be a receive context posted without a pending request.
+	 * Since there is no way to "un-post" it, we remember it and skip
+	 * post_recv() for the next request.
+	 * So here,
+	 * see if we are this `next request' and need to absorb an excess rc.
+	 * If yes, then drop and free our own, and do not recv_post().
+	 **/
+	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
+		if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
+			/* Got one ! */
+			kfree(req->rc);
+			req->rc = NULL;
+			goto dont_need_post_recv;
+		} else {
+			/* We raced and lost. */
+			atomic_inc(&rdma->excess_rc);
+		}
+	}
+
 	/* Allocate an fcall for the reply */
 	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
 	if (!rpl_context) {
@@ -451,10 +474,10 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 		p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
 		goto recv_error;
 	}
-
 	/* remove posted receive buffer from request structure */
 	req->rc = NULL;
 
+dont_need_post_recv:
 	/* Post the request */
 	c = kmalloc(sizeof *c, GFP_NOFS);
 	if (!c) {
@@ -499,6 +522,11 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
  send_error:
 	kfree(c);
 	p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
+
+	/* Ach.
+	 *  We did recv_post(), but not send. We have one recv_post in excess.
+	 */
+	atomic_inc(&rdma->excess_rc);
 	return err;
 
  /* Handle errors that happened during or while preparing post_recv(): */
@@ -549,6 +577,7 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
 	init_completion(&rdma->cm_done);
 	sema_init(&rdma->sq_sem, rdma->sq_depth);
 	sema_init(&rdma->rq_sem, rdma->rq_depth);
+	atomic_set(&rdma->excess_rc, 0);
 
 	return rdma;
 }
-- 
cgit v1.2.3


From 80b45261a0b263536b043c5ccfc4ba4fc27c2acc Mon Sep 17 00:00:00 2001
From: Simon Derr <simon.derr@bull.net>
Date: Fri, 21 Jun 2013 15:32:43 +0200
Subject: 9P: Add cancelled() to the transport functions.

RDMA needs to post a buffer for each incoming reply.
Hence it needs to keep count of these and needs to be
aware of whether a flushed request has received a reply
or not.

This patch adds the cancelled() callback to the transport modules.
It is called when RFLUSH has been received and that the corresponding
request will never receive a reply.

Signed-off-by: Simon Derr <simon.derr@bull.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c     | 12 +++++++++---
 net/9p/trans_rdma.c | 11 +++++++++++
 2 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index d18a0b22f62c..8b93cae2d11d 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -658,12 +658,18 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
 
 	/*
 	 * if we haven't received a response for oldreq,
-	 * remove it from the list.
+	 * remove it from the list, and notify the transport
+	 * layer that the reply will never arrive.
 	 */
 	spin_lock(&c->lock);
-	if (oldreq->status == REQ_STATUS_FLSH)
+	if (oldreq->status == REQ_STATUS_FLSH) {
 		list_del(&oldreq->req_list);
-	spin_unlock(&c->lock);
+		spin_unlock(&c->lock);
+		if (c->trans_mod->cancelled)
+			c->trans_mod->cancelled(c, req);
+	} else {
+		spin_unlock(&c->lock);
+	}
 
 	p9_free_req(c, req);
 	return 0;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 8f68df5d2973..928f2bb9bf8d 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -588,6 +588,17 @@ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
 	return 1;
 }
 
+/* A request has been fully flushed without a reply.
+ * That means we have posted one buffer in excess.
+ */
+static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
+{
+	struct p9_trans_rdma *rdma = client->trans;
+
+	atomic_inc(&rdma->excess_rc);
+	return 0;
+}
+
 /**
  * trans_create_rdma - Transport method for creating atransport instance
  * @client: client instance
-- 
cgit v1.2.3