diff options
author | Steve Wise <swise@opengridcomputing.com> | 2010-01-27 17:03:34 +0000 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2010-02-24 10:40:28 -0800 |
commit | e998f245c4b2d36ae2c35446e54ccbf1fb29d9de (patch) | |
tree | 9a0232f2d99d37f8d47ca1f7b4dbe5e9bcde5642 /drivers/infiniband | |
parent | 2542322485be45853cc72d542d8ed84fae82c981 (diff) | |
download | linux-3.10-e998f245c4b2d36ae2c35446e54ccbf1fb29d9de.tar.gz linux-3.10-e998f245c4b2d36ae2c35446e54ccbf1fb29d9de.tar.bz2 linux-3.10-e998f245c4b2d36ae2c35446e54ccbf1fb29d9de.zip |
RDMA/cxgb3: Doorbell overflow avoidance and recovery
T3 hardware doorbell FIFO overflows can cause application stalls due
to lost doorbell ring events. This has been seen when running large
NP IMB alltoall MPI jobs. The T3 hardware supports an xon/xoff-type
flow control mechanism to help avoid overflowing the HW doorbell FIFO.
This patch uses these interrupts to disable RDMA QP doorbell rings
when we near an overflow condition, and then turn them back on (and
ring all the active QP doorbells) when when the doorbell FIFO empties
out. In addition if an doorbell ring is dropped by the hardware, the
code will now recover.
Design:
cxgb3:
- enable these DB interrupts
- in the interrupt handler, schedule work tasks to call the ULPs event
handlers with the new events.
- ring all the qset txqs when an overflow is detected.
iw_cxgb3:
- disable db ringing on all active qps when we get the DB_FULL event
- enable db ringing on all active qps and ring all active dbs when we get
the DB_EMPTY event
- On DB_DROP event:
- disable db rings in the event handler
- delay-schedule a work task which rings and enables the dbs on
all active qps.
- in post_send and post_recv logic, don't ring the db if it's disabled.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/cxgb3/cxio_wr.h | 17 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch.c | 79 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_qp.c | 9 |
4 files changed, 99 insertions, 8 deletions
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index a197a5b7ac7..15073b2da1c 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -730,7 +730,22 @@ struct t3_cq { static inline void cxio_set_wq_in_error(struct t3_wq *wq) { - wq->queue->wq_in_err.err = 1; + wq->queue->wq_in_err.err |= 1; +} + +static inline void cxio_disable_wq_db(struct t3_wq *wq) +{ + wq->queue->wq_in_err.err |= 2; +} + +static inline void cxio_enable_wq_db(struct t3_wq *wq) +{ + wq->queue->wq_in_err.err &= ~2; +} + +static inline int cxio_wq_db_enabled(struct t3_wq *wq) +{ + return !(wq->queue->wq_in_err.err & 2); } static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index b0ea0105ddf..d992543890e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -65,6 +65,46 @@ struct cxgb3_client t3c_client = { static LIST_HEAD(dev_list); static DEFINE_MUTEX(dev_mutex); +static int disable_qp_db(int id, void *p, void *data) +{ + struct iwch_qp *qhp = p; + + cxio_disable_wq_db(&qhp->wq); + return 0; +} + +static int enable_qp_db(int id, void *p, void *data) +{ + struct iwch_qp *qhp = p; + + if (data) + ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid); + cxio_enable_wq_db(&qhp->wq); + return 0; +} + +static void disable_dbs(struct iwch_dev *rnicp) +{ + spin_lock_irq(&rnicp->lock); + idr_for_each(&rnicp->qpidr, disable_qp_db, NULL); + spin_unlock_irq(&rnicp->lock); +} + +static void enable_dbs(struct iwch_dev *rnicp, int ring_db) +{ + spin_lock_irq(&rnicp->lock); + idr_for_each(&rnicp->qpidr, enable_qp_db, + (void *)(unsigned long)ring_db); + spin_unlock_irq(&rnicp->lock); +} + +static void iwch_db_drop_task(struct work_struct *work) +{ + struct iwch_dev *rnicp = container_of(work, struct iwch_dev, + db_drop_task.work); + enable_dbs(rnicp, 1); +} + static void rnic_init(struct iwch_dev *rnicp) { PDBG("%s iwch_dev %p\n", __func__, rnicp); @@ -72,6 +112,7 @@ static void rnic_init(struct iwch_dev *rnicp) idr_init(&rnicp->qpidr); idr_init(&rnicp->mmidr); spin_lock_init(&rnicp->lock); + INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; @@ -147,6 +188,7 @@ static void close_rnic_dev(struct t3cdev *tdev) mutex_lock(&dev_mutex); list_for_each_entry_safe(dev, tmp, &dev_list, entry) { if (dev->rdev.t3cdev_p == tdev) { + cancel_delayed_work_sync(&dev->db_drop_task); list_del(&dev->entry); iwch_unregister_device(dev); cxio_rdev_close(&dev->rdev); @@ -165,7 +207,8 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) struct cxio_rdev *rdev = tdev->ulp; struct iwch_dev *rnicp; struct ib_event event; - u32 portnum = port_id + 1; + u32 portnum = port_id + 1; + int dispatch = 0; if (!rdev) return; @@ -174,21 +217,49 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) case OFFLOAD_STATUS_DOWN: { rdev->flags = CXIO_ERROR_FATAL; event.event = IB_EVENT_DEVICE_FATAL; + dispatch = 1; break; } case OFFLOAD_PORT_DOWN: { event.event = IB_EVENT_PORT_ERR; + dispatch = 1; break; } case OFFLOAD_PORT_UP: { event.event = IB_EVENT_PORT_ACTIVE; + dispatch = 1; + break; + } + case OFFLOAD_DB_FULL: { + disable_dbs(rnicp); + break; + } + case OFFLOAD_DB_EMPTY: { + enable_dbs(rnicp, 1); + break; + } + case OFFLOAD_DB_DROP: { + unsigned long delay = 1000; + unsigned short r; + + disable_dbs(rnicp); + get_random_bytes(&r, 2); + delay += r & 1023; + + /* + * delay is between 1000-2023 usecs. + */ + schedule_delayed_work(&rnicp->db_drop_task, + usecs_to_jiffies(delay)); break; } } - event.device = &rnicp->ibdev; - event.element.port_num = portnum; - ib_dispatch_event(&event); + if (dispatch) { + event.device = &rnicp->ibdev; + event.element.port_num = portnum; + ib_dispatch_event(&event); + } return; } diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index 84735506333..a1c44578e03 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -36,6 +36,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/idr.h> +#include <linux/workqueue.h> #include <rdma/ib_verbs.h> @@ -110,6 +111,7 @@ struct iwch_dev { struct idr mmidr; spinlock_t lock; struct list_head entry; + struct delayed_work db_drop_task; }; static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 3eb8cecf81d..b4d893de365 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -452,7 +452,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ++(qhp->wq.sq_wptr); } spin_unlock_irqrestore(&qhp->lock, flag); - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + if (cxio_wq_db_enabled(&qhp->wq)) + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); out: if (err) @@ -514,7 +515,8 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, num_wrs--; } spin_unlock_irqrestore(&qhp->lock, flag); - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + if (cxio_wq_db_enabled(&qhp->wq)) + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); out: if (err) @@ -597,7 +599,8 @@ int iwch_bind_mw(struct ib_qp *qp, ++(qhp->wq.sq_wptr); spin_unlock_irqrestore(&qhp->lock, flag); - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + if (cxio_wq_db_enabled(&qhp->wq)) + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); return err; } |