summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRalph Campbell <ralph.campbell@qlogic.com>2008-12-01 20:59:08 -0800
committerRoland Dreier <rolandd@cisco.com>2008-12-01 20:59:08 -0800
commit7c37d74474c8ee8ddcd5a2d2a9571d4a1290c844 (patch)
treed44fb97adfa0b036d0a0db193b3273eff5616246
parent64f22fa17c1a531e682ebc882566856ea5718495 (diff)
downloadlinux-3.10-7c37d74474c8ee8ddcd5a2d2a9571d4a1290c844.tar.gz
linux-3.10-7c37d74474c8ee8ddcd5a2d2a9571d4a1290c844.tar.bz2
linux-3.10-7c37d74474c8ee8ddcd5a2d2a9571d4a1290c844.zip
IB/ipath: Improve UD loopback performance by allocating temp array only once
Receive work queue entries are checked for L_Key validity, and pointers to the memory region structure are saved in an allocated structure. For UD loopback packets, this structure is allocated and freed for each packet. This patch changes that to allocate/free during QP creation and destruction. Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c32
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c19
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h1
3 files changed, 26 insertions, 26 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 4715911101e..3a5a89b609c 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -745,6 +745,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
struct ipath_swqe *swq = NULL;
struct ipath_ibdev *dev;
size_t sz;
+ size_t sg_list_sz;
struct ib_qp *ret;
if (init_attr->create_flags) {
@@ -789,19 +790,31 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
goto bail;
}
sz = sizeof(*qp);
+ sg_list_sz = 0;
if (init_attr->srq) {
struct ipath_srq *srq = to_isrq(init_attr->srq);
- sz += sizeof(*qp->r_sg_list) *
- srq->rq.max_sge;
- } else
- sz += sizeof(*qp->r_sg_list) *
- init_attr->cap.max_recv_sge;
- qp = kmalloc(sz, GFP_KERNEL);
+ if (srq->rq.max_sge > 1)
+ sg_list_sz = sizeof(*qp->r_sg_list) *
+ (srq->rq.max_sge - 1);
+ } else if (init_attr->cap.max_recv_sge > 1)
+ sg_list_sz = sizeof(*qp->r_sg_list) *
+ (init_attr->cap.max_recv_sge - 1);
+ qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
if (!qp) {
ret = ERR_PTR(-ENOMEM);
goto bail_swq;
}
+ if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
+ init_attr->qp_type == IB_QPT_SMI ||
+ init_attr->qp_type == IB_QPT_GSI)) {
+ qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
+ if (!qp->r_ud_sg_list) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_qp;
+ }
+ } else
+ qp->r_ud_sg_list = NULL;
if (init_attr->srq) {
sz = 0;
qp->r_rq.size = 0;
@@ -818,7 +831,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
qp->r_rq.size * sz);
if (!qp->r_rq.wq) {
ret = ERR_PTR(-ENOMEM);
- goto bail_qp;
+ goto bail_sg_list;
}
}
@@ -848,7 +861,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
if (err) {
ret = ERR_PTR(err);
vfree(qp->r_rq.wq);
- goto bail_qp;
+ goto bail_sg_list;
}
qp->ip = NULL;
qp->s_tx = NULL;
@@ -925,6 +938,8 @@ bail_ip:
vfree(qp->r_rq.wq);
ipath_free_qp(&dev->qp_table, qp);
free_qpn(&dev->qp_table, qp->ibqp.qp_num);
+bail_sg_list:
+ kfree(qp->r_ud_sg_list);
bail_qp:
kfree(qp);
bail_swq:
@@ -989,6 +1004,7 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
kref_put(&qp->ip->ref, ipath_release_mmap_info);
else
vfree(qp->r_rq.wq);
+ kfree(qp->r_ud_sg_list);
vfree(qp->s_wq);
kfree(qp);
return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 729446f56aa..91c74cc797a 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -70,8 +70,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
goto done;
}
- rsge.sg_list = NULL;
-
/*
* Check that the qkey matches (except for QP0, see 9.6.1.4.1).
* Qkeys with the high order bit set mean use the
@@ -115,21 +113,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
rq = &qp->r_rq;
}
- if (rq->max_sge > 1) {
- /*
- * XXX We could use GFP_KERNEL if ipath_do_send()
- * was always called from the tasklet instead of
- * from ipath_post_send().
- */
- rsge.sg_list = kmalloc((rq->max_sge - 1) *
- sizeof(struct ipath_sge),
- GFP_ATOMIC);
- if (!rsge.sg_list) {
- dev->n_pkt_drops++;
- goto drop;
- }
- }
-
/*
* Get the next work request entry to find where to put the data.
* Note that it is safe to drop the lock after changing rq->tail
@@ -147,6 +130,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
goto drop;
}
wqe = get_rwqe_ptr(rq, tail);
+ rsge.sg_list = qp->r_ud_sg_list;
if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
spin_unlock_irqrestore(&rq->lock, flags);
dev->n_pkt_drops++;
@@ -242,7 +226,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
swqe->wr.send_flags & IB_SEND_SOLICITED);
drop:
- kfree(rsge.sg_list);
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
done:;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 9d12ae8a778..11e3f613df9 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -431,6 +431,7 @@ struct ipath_qp {
u32 s_lsn; /* limit sequence number (credit) */
struct ipath_swqe *s_wq; /* send work queue */
struct ipath_swqe *s_wqe;
+ struct ipath_sge *r_ud_sg_list;
struct ipath_rq r_rq; /* receive work queue */
struct ipath_sge r_sg_list[0]; /* verified SGEs */
};