diff options
79 files changed, 3527 insertions, 466 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index fc0f2bd9ca8..4104ea2427c 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -889,6 +889,8 @@ retest: break; case IB_CM_ESTABLISHED: spin_unlock_irq(&cm_id_priv->lock); + if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) + break; ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: @@ -1008,7 +1010,6 @@ static void cm_format_req(struct cm_req_msg *req_msg, req_msg->service_id = param->service_id; req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); - cm_req_set_resp_res(req_msg, param->responder_resources); cm_req_set_init_depth(req_msg, param->initiator_depth); cm_req_set_remote_resp_timeout(req_msg, param->remote_cm_response_timeout); @@ -1017,12 +1018,16 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); cm_req_set_local_resp_timeout(req_msg, param->local_cm_response_timeout); - cm_req_set_retry_count(req_msg, param->retry_count); req_msg->pkey = param->primary_path->pkey; cm_req_set_path_mtu(req_msg, param->primary_path->mtu); - cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); - cm_req_set_srq(req_msg, param->srq); + + if (param->qp_type != IB_QPT_XRC_INI) { + cm_req_set_resp_res(req_msg, param->responder_resources); + cm_req_set_retry_count(req_msg, param->retry_count); + cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); + cm_req_set_srq(req_msg, param->srq); + } if (pri_path->hop_limit <= 1) { req_msg->primary_local_lid = pri_path->slid; @@ -1080,7 +1085,8 @@ static int cm_validate_req_param(struct ib_cm_req_param *param) if (!param->primary_path) return -EINVAL; - if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC) + if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC && + param->qp_type != IB_QPT_XRC_INI) return -EINVAL; if (param->private_data && @@ -1601,18 +1607,24 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); rep_msg->local_comm_id = cm_id_priv->id.local_id; rep_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); rep_msg->resp_resources = param->responder_resources; - rep_msg->initiator_depth = param->initiator_depth; cm_rep_set_target_ack_delay(rep_msg, cm_id_priv->av.port->cm_dev->ack_delay); cm_rep_set_failover(rep_msg, param->failover_accepted); - cm_rep_set_flow_ctrl(rep_msg, param->flow_control); cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); - cm_rep_set_srq(rep_msg, param->srq); rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; + if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) { + rep_msg->initiator_depth = param->initiator_depth; + cm_rep_set_flow_ctrl(rep_msg, param->flow_control); + cm_rep_set_srq(rep_msg, param->srq); + cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); + } else { + cm_rep_set_srq(rep_msg, 1); + cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num)); + } + if (param->private_data && param->private_data_len) memcpy(rep_msg->private_data, param->private_data, param->private_data_len); @@ -1660,7 +1672,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); - cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; @@ -1731,7 +1743,7 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_rtu); -static void cm_format_rep_event(struct cm_work *work) +static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) { struct cm_rep_msg *rep_msg; struct ib_cm_rep_event_param *param; @@ -1740,7 +1752,7 @@ static void cm_format_rep_event(struct cm_work *work) param = &work->cm_event.param.rep_rcvd; param->remote_ca_guid = rep_msg->local_ca_guid; param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); - param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg)); + param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type)); param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); param->responder_resources = rep_msg->initiator_depth; param->initiator_depth = rep_msg->resp_resources; @@ -1808,7 +1820,7 @@ static int cm_rep_handler(struct cm_work *work) return -EINVAL; } - cm_format_rep_event(work); + cm_format_rep_event(work, cm_id_priv->qp_type); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { @@ -1823,7 +1835,7 @@ static int cm_rep_handler(struct cm_work *work) cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; - cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); spin_lock(&cm.lock); /* Check for duplicate REP. */ @@ -1850,7 +1862,7 @@ static int cm_rep_handler(struct cm_work *work) cm_id_priv->id.state = IB_CM_REP_RCVD; cm_id_priv->id.remote_id = rep_msg->local_comm_id; - cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); cm_id_priv->initiator_depth = rep_msg->resp_resources; cm_id_priv->responder_resources = rep_msg->initiator_depth; cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); @@ -3492,7 +3504,8 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC) { + if (cm_id_priv->qp_type == IB_QPT_RC || + cm_id_priv->qp_type == IB_QPT_XRC_TGT) { *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; qp_attr->max_dest_rd_atomic = @@ -3537,15 +3550,21 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) { *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC) { - *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY | + switch (cm_id_priv->qp_type) { + case IB_QPT_RC: + case IB_QPT_XRC_INI: + *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->timeout = cm_id_priv->av.timeout; qp_attr->retry_cnt = cm_id_priv->retry_count; qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; - qp_attr->max_rd_atomic = - cm_id_priv->initiator_depth; + qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + /* fall through */ + case IB_QPT_XRC_TGT: + *qp_attr_mask |= IB_QP_TIMEOUT; + qp_attr->timeout = cm_id_priv->av.timeout; + break; + default: + break; } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_PATH_MIG_STATE; diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 7e63c08f697..505db2a59e7 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * @@ -86,7 +86,7 @@ struct cm_req_msg { __be16 pkey; /* path MTU:4, RDC exists:1, RNR retry count:3. */ u8 offset50; - /* max CM Retries:4, SRQ:1, rsvd:3 */ + /* max CM Retries:4, SRQ:1, extended transport type:3 */ u8 offset51; __be16 primary_local_lid; @@ -175,6 +175,11 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg) switch(transport_type) { case 0: return IB_QPT_RC; case 1: return IB_QPT_UC; + case 3: + switch (req_msg->offset51 & 0x7) { + case 1: return IB_QPT_XRC_TGT; + default: return 0; + } default: return 0; } } @@ -188,6 +193,12 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, req_msg->offset40) & 0xFFFFFFF9) | 0x2); break; + case IB_QPT_XRC_INI: + req_msg->offset40 = cpu_to_be32((be32_to_cpu( + req_msg->offset40) & + 0xFFFFFFF9) | 0x6); + req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1; + break; default: req_msg->offset40 = cpu_to_be32(be32_to_cpu( req_msg->offset40) & @@ -527,6 +538,23 @@ static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn) (be32_to_cpu(rep_msg->offset12) & 0x000000FF)); } +static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg) +{ + return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8); +} + +static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn) +{ + rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) | + (be32_to_cpu(rep_msg->offset16) & 0x000000FF)); +} + +static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type) +{ + return (qp_type == IB_QPT_XRC_INI) ? + cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg); +} + static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg) { return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 3e710455459..872b1842598 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -81,6 +81,7 @@ static DEFINE_IDR(sdp_ps); static DEFINE_IDR(tcp_ps); static DEFINE_IDR(udp_ps); static DEFINE_IDR(ipoib_ps); +static DEFINE_IDR(ib_ps); struct cma_device { struct list_head list; @@ -2250,6 +2251,9 @@ static int cma_get_port(struct rdma_id_private *id_priv) case RDMA_PS_IPOIB: ps = &ipoib_ps; break; + case RDMA_PS_IB: + ps = &ib_ps; + break; default: return -EPROTONOSUPPORT; } @@ -2585,7 +2589,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, req.service_id = cma_get_service_id(id_priv->id.ps, (struct sockaddr *) &route->addr.dst_addr); req.qp_num = id_priv->qp_num; - req.qp_type = IB_QPT_RC; + req.qp_type = id_priv->id.qp_type; req.starting_psn = id_priv->seq_num; req.responder_resources = conn_param->responder_resources; req.initiator_depth = conn_param->initiator_depth; @@ -2632,14 +2636,16 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, if (ret) goto out; - iw_param.ord = conn_param->initiator_depth; - iw_param.ird = conn_param->responder_resources; - iw_param.private_data = conn_param->private_data; - iw_param.private_data_len = conn_param->private_data_len; - if (id_priv->id.qp) + if (conn_param) { + iw_param.ord = conn_param->initiator_depth; + iw_param.ird = conn_param->responder_resources; + iw_param.private_data = conn_param->private_data; + iw_param.private_data_len = conn_param->private_data_len; + iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; + } else { + memset(&iw_param, 0, sizeof iw_param); iw_param.qpn = id_priv->qp_num; - else - iw_param.qpn = conn_param->qp_num; + } ret = iw_cm_connect(cm_id, &iw_param); out: if (ret) { @@ -2781,14 +2787,20 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (id->qp_type == IB_QPT_UD) - ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, - conn_param->private_data, - conn_param->private_data_len); - else if (conn_param) - ret = cma_accept_ib(id_priv, conn_param); - else - ret = cma_rep_recv(id_priv); + if (id->qp_type == IB_QPT_UD) { + if (conn_param) + ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + conn_param->private_data, + conn_param->private_data_len); + else + ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + NULL, 0); + } else { + if (conn_param) + ret = cma_accept_ib(id_priv, conn_param); + else + ret = cma_rep_recv(id_priv); + } break; case RDMA_TRANSPORT_IWARP: ret = cma_accept_iw(id_priv, conn_param); @@ -3476,6 +3488,7 @@ static void __exit cma_cleanup(void) idr_destroy(&tcp_ps); idr_destroy(&udp_ps); idr_destroy(&ipoib_ps); + idr_destroy(&ib_ps); } module_init(cma_init); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index b4d8672a3e4..056389229ea 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1596,6 +1596,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv, mad->mad_hdr.class_version].class; if (!class) goto out; + if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >= + IB_MGMT_MAX_METHODS) + goto out; method = class->method_table[convert_mgmt_class( mad->mad_hdr.mgmt_class)]; if (method) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 9ab5df72df7..2b59b72b57f 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -185,17 +185,35 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; + rate = (25 * attr.active_speed) / 10; + switch (attr.active_speed) { - case 2: speed = " DDR"; break; - case 4: speed = " QDR"; break; + case 2: + speed = " DDR"; + break; + case 4: + speed = " QDR"; + break; + case 8: + speed = " FDR10"; + rate = 10; + break; + case 16: + speed = " FDR"; + rate = 14; + break; + case 32: + speed = " EDR"; + rate = 25; + break; } - rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed; + rate *= ib_width_enum_to_int(attr.active_width); if (rate < 0) return -EINVAL; return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", - rate / 10, rate % 10 ? ".5" : "", + rate, (attr.active_speed == 1) ? ".5" : "", ib_width_enum_to_int(attr.active_width), speed); } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 08f948df8fa..b8a0b4a7811 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1122,7 +1122,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) + if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 71be5eebd68..b69307f4f6d 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -276,7 +276,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, ucma_set_event_context(ctx, event, uevent); uevent->resp.event = event->event; uevent->resp.status = event->status; - if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB) + if (cm_id->qp_type == IB_QPT_UD) ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, @@ -377,6 +377,9 @@ static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_ case RDMA_PS_IPOIB: *qp_type = IB_QPT_UD; return 0; + case RDMA_PS_IB: + *qp_type = cmd->qp_type; + return 0; default: return -EINVAL; } @@ -1270,7 +1273,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) + if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 8d261b6ea5f..07db22997e9 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -458,8 +458,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err; } - if (packet->mad.hdr.id < 0 || - packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { + if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { ret = -EINVAL; goto err; } @@ -703,7 +702,7 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); - if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { + if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; goto out; } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index a078e5624d2..5bcb2afd3dc 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -76,6 +76,8 @@ struct ib_uverbs_device { struct ib_device *ib_dev; int devnum; struct cdev cdev; + struct rb_root xrcd_tree; + struct mutex xrcd_tree_mutex; }; struct ib_uverbs_event_file { @@ -120,6 +122,16 @@ struct ib_uevent_object { u32 events_reported; }; +struct ib_uxrcd_object { + struct ib_uobject uobject; + atomic_t refcnt; +}; + +struct ib_usrq_object { + struct ib_uevent_object uevent; + struct ib_uxrcd_object *uxrcd; +}; + struct ib_uqp_object { struct ib_uevent_object uevent; struct list_head mcast_list; @@ -142,6 +154,7 @@ extern struct idr ib_uverbs_ah_idr; extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; +extern struct idr ib_uverbs_xrcd_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -161,6 +174,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); +void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ @@ -181,6 +195,7 @@ IB_UVERBS_DECLARE_CMD(poll_cq); IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); +IB_UVERBS_DECLARE_CMD(open_qp); IB_UVERBS_DECLARE_CMD(query_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); @@ -195,5 +210,8 @@ IB_UVERBS_DECLARE_CMD(create_srq); IB_UVERBS_DECLARE_CMD(modify_srq); IB_UVERBS_DECLARE_CMD(query_srq); IB_UVERBS_DECLARE_CMD(destroy_srq); +IB_UVERBS_DECLARE_CMD(create_xsrq); +IB_UVERBS_DECLARE_CMD(open_xrcd); +IB_UVERBS_DECLARE_CMD(close_xrcd); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index c42699285f8..254f1649c73 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -47,6 +47,7 @@ static struct lock_class_key cq_lock_key; static struct lock_class_key qp_lock_key; static struct lock_class_key ah_lock_key; static struct lock_class_key srq_lock_key; +static struct lock_class_key xrcd_lock_key; #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ @@ -255,6 +256,18 @@ static void put_srq_read(struct ib_srq *srq) put_uobj_read(srq->uobject); } +static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, + struct ib_uobject **uobj) +{ + *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0); + return *uobj ? (*uobj)->object : NULL; +} + +static void put_xrcd_read(struct ib_uobject *uobj) +{ + put_uobj_read(uobj); +} + ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -298,6 +311,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); + INIT_LIST_HEAD(&ucontext->xrcd_list); ucontext->closing = 0; resp.num_comp_vectors = file->device->num_comp_vectors; @@ -579,6 +593,310 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, return in_len; } +struct xrcd_table_entry { + struct rb_node node; + struct ib_xrcd *xrcd; + struct inode *inode; +}; + +static int xrcd_table_insert(struct ib_uverbs_device *dev, + struct inode *inode, + struct ib_xrcd *xrcd) +{ + struct xrcd_table_entry *entry, *scan; + struct rb_node **p = &dev->xrcd_tree.rb_node; + struct rb_node *parent = NULL; + + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->xrcd = xrcd; + entry->inode = inode; + + while (*p) { + parent = *p; + scan = rb_entry(parent, struct xrcd_table_entry, node); + + if (inode < scan->inode) { + p = &(*p)->rb_left; + } else if (inode > scan->inode) { + p = &(*p)->rb_right; + } else { + kfree(entry); + return -EEXIST; + } + } + + rb_link_node(&entry->node, parent, p); + rb_insert_color(&entry->node, &dev->xrcd_tree); + igrab(inode); + return 0; +} + +static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev, + struct inode *inode) +{ + struct xrcd_table_entry *entry; + struct rb_node *p = dev->xrcd_tree.rb_node; + + while (p) { + entry = rb_entry(p, struct xrcd_table_entry, node); + + if (inode < entry->inode) + p = p->rb_left; + else if (inode > entry->inode) + p = p->rb_right; + else + return entry; + } + + return NULL; +} + +static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode) +{ + struct xrcd_table_entry *entry; + + entry = xrcd_table_search(dev, inode); + if (!entry) + return NULL; + + return entry->xrcd; +} + +static void xrcd_table_delete(struct ib_uverbs_device *dev, + struct inode *inode) +{ + struct xrcd_table_entry *entry; + + entry = xrcd_table_search(dev, inode); + if (entry) { + iput(inode); + rb_erase(&entry->node, &dev->xrcd_tree); + kfree(entry); + } +} + +ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_open_xrcd cmd; + struct ib_uverbs_open_xrcd_resp resp; + struct ib_udata udata; + struct ib_uxrcd_object *obj; + struct ib_xrcd *xrcd = NULL; + struct file *f = NULL; + struct inode *inode = NULL; + int ret = 0; + int new_xrcd = 0; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + mutex_lock(&file->device->xrcd_tree_mutex); + + if (cmd.fd != -1) { + /* search for file descriptor */ + f = fget(cmd.fd); + if (!f) { + ret = -EBADF; + goto err_tree_mutex_unlock; + } + + inode = f->f_dentry->d_inode; + if (!inode) { + ret = -EBADF; + goto err_tree_mutex_unlock; + } + + xrcd = find_xrcd(file->device, inode); + if (!xrcd && !(cmd.oflags & O_CREAT)) { + /* no file descriptor. Need CREATE flag */ + ret = -EAGAIN; + goto err_tree_mutex_unlock; + } + + if (xrcd && cmd.oflags & O_EXCL) { + ret = -EINVAL; + goto err_tree_mutex_unlock; + } + } + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) { + ret = -ENOMEM; + goto err_tree_mutex_unlock; + } + + init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key); + + down_write(&obj->uobject.mutex); + + if (!xrcd) { + xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev, + file->ucontext, &udata); + if (IS_ERR(xrcd)) { + ret = PTR_ERR(xrcd); + goto err; + } + + xrcd->inode = inode; + xrcd->device = file->device->ib_dev; + atomic_set(&xrcd->usecnt, 0); + mutex_init(&xrcd->tgt_qp_mutex); + INIT_LIST_HEAD(&xrcd->tgt_qp_list); + new_xrcd = 1; + } + + atomic_set(&obj->refcnt, 0); + obj->uobject.object = xrcd; + ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + if (ret) + goto err_idr; + + memset(&resp, 0, sizeof resp); + resp.xrcd_handle = obj->uobject.id; + + if (inode) { + if (new_xrcd) { + /* create new inode/xrcd table entry */ + ret = xrcd_table_insert(file->device, inode, xrcd); + if (ret) + goto err_insert_xrcd; + } + atomic_inc(&xrcd->usecnt); + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_copy; + } + + if (f) + fput(f); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); + mutex_unlock(&file->mutex); + + obj->uobject.live = 1; + up_write(&obj->uobject.mutex); + + mutex_unlock(&file->device->xrcd_tree_mutex); + return in_len; + +err_copy: + if (inode) { + if (new_xrcd) + xrcd_table_delete(file->device, inode); + atomic_dec(&xrcd->usecnt); + } + +err_insert_xrcd: + idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + +err_idr: + ib_dealloc_xrcd(xrcd); + +err: + put_uobj_write(&obj->uobject); + +err_tree_mutex_unlock: + if (f) + fput(f); + + mutex_unlock(&file->device->xrcd_tree_mutex); + + return ret; +} + +ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_close_xrcd cmd; + struct ib_uobject *uobj; + struct ib_xrcd *xrcd = NULL; + struct inode *inode = NULL; + struct ib_uxrcd_object *obj; + int live; + int ret = 0; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + mutex_lock(&file->device->xrcd_tree_mutex); + uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext); + if (!uobj) { + ret = -EINVAL; + goto out; + } + + xrcd = uobj->object; + inode = xrcd->inode; + obj = container_of(uobj, struct ib_uxrcd_object, uobject); + if (atomic_read(&obj->refcnt)) { + put_uobj_write(uobj); + ret = -EBUSY; + goto out; + } + + if (!inode || atomic_dec_and_test(&xrcd->usecnt)) { + ret = ib_dealloc_xrcd(uobj->object); + if (!ret) + uobj->live = 0; + } + + live = uobj->live; + if (inode && ret) + atomic_inc(&xrcd->usecnt); + + put_uobj_write(uobj); + + if (ret) + goto out; + + if (inode && !live) + xrcd_table_delete(file->device, inode); + + idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + ret = in_len; + +out: + mutex_unlock(&file->device->xrcd_tree_mutex); + return ret; +} + +void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, + struct ib_xrcd *xrcd) +{ + struct inode *inode; + + inode = xrcd->inode; + if (inode && !atomic_dec_and_test(&xrcd->usecnt)) + return; + + ib_dealloc_xrcd(xrcd); + + if (inode) + xrcd_table_delete(dev, inode); +} + ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1052,9 +1370,12 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, struct ib_uverbs_create_qp_resp resp; struct ib_udata udata; struct ib_uqp_object *obj; - struct ib_pd *pd; - struct ib_cq *scq, *rcq; - struct ib_srq *srq; + struct ib_device *device; + struct ib_pd *pd = NULL; + struct ib_xrcd *xrcd = NULL; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_cq *scq = NULL, *rcq = NULL; + struct ib_srq *srq = NULL; struct ib_qp *qp; struct ib_qp_init_attr attr; int ret; @@ -1076,15 +1397,39 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); down_write(&obj->uevent.uobject.mutex); - srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); - rcq = cmd.recv_cq_handle == cmd.send_cq_handle ? - scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); + if (cmd.qp_type == IB_QPT_XRC_TGT) { + xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_put; + } + device = xrcd->device; + } else { + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); + if (!pd || !scq) { + ret = -EINVAL; + goto err_put; + } - if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) { - ret = -EINVAL; - goto err_put; + if (cmd.qp_type == IB_QPT_XRC_INI) { + cmd.max_recv_wr = cmd.max_recv_sge = 0; + } else { + if (cmd.is_srq) { + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq || srq->srq_type != IB_SRQT_BASIC) { + ret = -EINVAL; + goto err_put; + } + } + rcq = (cmd.recv_cq_handle == cmd.send_cq_handle) ? + scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } + } + device = pd->device; } attr.event_handler = ib_uverbs_qp_event_handler; @@ -1092,6 +1437,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.send_cq = scq; attr.recv_cq = rcq; attr.srq = srq; + attr.xrcd = xrcd; attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; attr.qp_type = cmd.qp_type; attr.create_flags = 0; @@ -1106,26 +1452,34 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); - qp = pd->device->create_qp(pd, &attr, &udata); + if (cmd.qp_type == IB_QPT_XRC_TGT) + qp = ib_create_qp(pd, &attr); + else + qp = device->create_qp(pd, &attr, &udata); + if (IS_ERR(qp)) { ret = PTR_ERR(qp); goto err_put; } - qp->device = pd->device; - qp->pd = pd; - qp->send_cq = attr.send_cq; - qp->recv_cq = attr.recv_cq; - qp->srq = attr.srq; - qp->uobject = &obj->uevent.uobject; - qp->event_handler = attr.event_handler; - qp->qp_context = attr.qp_context; - qp->qp_type = attr.qp_type; - atomic_inc(&pd->usecnt); - atomic_inc(&attr.send_cq->usecnt); - atomic_inc(&attr.recv_cq->usecnt); - if (attr.srq) - atomic_inc(&attr.srq->usecnt); + if (cmd.qp_type != IB_QPT_XRC_TGT) { + qp->real_qp = qp; + qp->device = device; + qp->pd = pd; + qp->send_cq = attr.send_cq; + qp->recv_cq = attr.recv_cq; + qp->srq = attr.srq; + qp->event_handler = attr.event_handler; + qp->qp_context = attr.qp_context; + qp->qp_type = attr.qp_type; + atomic_inc(&pd->usecnt); + atomic_inc(&attr.send_cq->usecnt); + if (attr.recv_cq) + atomic_inc(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_inc(&attr.srq->usecnt); + } + qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); @@ -1147,9 +1501,13 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_copy; } - put_pd_read(pd); - put_cq_read(scq); - if (rcq != scq) + if (xrcd) + put_xrcd_read(xrcd_uobj); + if (pd) + put_pd_read(pd); + if (scq) + put_cq_read(scq); + if (rcq && rcq != scq) put_cq_read(rcq); if (srq) put_srq_read(srq); @@ -1171,6 +1529,8 @@ err_destroy: ib_destroy_qp(qp); err_put: + if (xrcd) + put_xrcd_read(xrcd_uobj); if (pd) put_pd_read(pd); if (scq) @@ -1184,6 +1544,98 @@ err_put: return ret; } +ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_open_qp cmd; + struct ib_uverbs_create_qp_resp resp; + struct ib_udata udata; + struct ib_uqp_object *obj; + struct ib_xrcd *xrcd; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_qp *qp; + struct ib_qp_open_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); + down_write(&obj->uevent.uobject.mutex); + + xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_put; + } + + attr.event_handler = ib_uverbs_qp_event_handler; + attr.qp_context = file; + attr.qp_num = cmd.qpn; + attr.qp_type = cmd.qp_type; + + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + INIT_LIST_HEAD(&obj->mcast_list); + + qp = ib_open_qp(xrcd, &attr); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_put; + } + + qp->uobject = &obj->uevent.uobject; + + obj->uevent.uobject.object = qp; + ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + if (ret) + goto err_destroy; + + memset(&resp, 0, sizeof resp); + resp.qpn = qp->qp_num; + resp.qp_handle = obj->uevent.uobject.id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_remove; + } + + put_xrcd_read(xrcd_uobj); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); + mutex_unlock(&file->mutex); + + obj->uevent.uobject.live = 1; + + up_write(&obj->uevent.uobject.mutex); + + return in_len; + +err_remove: + idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + +err_destroy: + ib_destroy_qp(qp); + +err_put: + put_xrcd_read(xrcd_uobj); + put_uobj_write(&obj->uevent.uobject); + return ret; +} + ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1284,6 +1736,20 @@ out: return ret ? ret : in_len; } +/* Remove ignored fields set in the attribute mask */ +static int modify_qp_mask(enum ib_qp_type qp_type, int mask) +{ + switch (qp_type) { + case IB_QPT_XRC_INI: + return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER); + case IB_QPT_XRC_TGT: + return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY); + default: + return mask; + } +} + ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1356,7 +1822,12 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; - ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata); + if (qp->real_qp == qp) { + ret = qp->device->modify_qp(qp, attr, + modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); + } else { + ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); + } put_qp_read(qp); @@ -1553,7 +2024,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, } resp.bad_wr = 0; - ret = qp->device->post_send(qp, wr, &bad_wr); + ret = qp->device->post_send(qp->real_qp, wr, &bad_wr); if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; @@ -1691,7 +2162,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, goto out; resp.bad_wr = 0; - ret = qp->device->post_recv(qp, wr, &bad_wr); + ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); put_qp_read(qp); @@ -1975,107 +2446,199 @@ out_put: return ret ? ret : in_len; } -ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +int __uverbs_create_xsrq(struct ib_uverbs_file *file, + struct ib_uverbs_create_xsrq *cmd, + struct ib_udata *udata) { - struct ib_uverbs_create_srq cmd; struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; - struct ib_uevent_object *obj; + struct ib_usrq_object *obj; struct ib_pd *pd; struct ib_srq *srq; + struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_srq_init_attr attr; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key); - down_write(&obj->uobject.mutex); + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_key); + down_write(&obj->uevent.uobject.mutex); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = idr_read_pd(cmd->pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err; } + if (cmd->srq_type == IB_SRQT_XRC) { + attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); + if (!attr.ext.xrc.cq) { + ret = -EINVAL; + goto err_put_pd; + } + + attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); + if (!attr.ext.xrc.xrcd) { + ret = -EINVAL; + goto err_put_cq; + } + + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); + atomic_inc(&obj->uxrcd->refcnt); + } + attr.event_handler = ib_uverbs_srq_event_handler; attr.srq_context = file; - attr.attr.max_wr = cmd.max_wr; - attr.attr.max_sge = cmd.max_sge; - attr.attr.srq_limit = cmd.srq_limit; + attr.srq_type = cmd->srq_type; + attr.attr.max_wr = cmd->max_wr; + attr.attr.max_sge = cmd->max_sge; + attr.attr.srq_limit = cmd->srq_limit; - obj->events_reported = 0; - INIT_LIST_HEAD(&obj->event_list); + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); - srq = pd->device->create_srq(pd, &attr, &udata); + srq = pd->device->create_srq(pd, &attr, udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); goto err_put; } - srq->device = pd->device; - srq->pd = pd; - srq->uobject = &obj->uobject; + srq->device = pd->device; + srq->pd = pd; + srq->srq_type = cmd->srq_type; + srq->uobject = &obj->uevent.uobject; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; + + if (cmd->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.cq = attr.ext.xrc.cq; + srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; + atomic_inc(&attr.ext.xrc.cq->usecnt); + atomic_inc(&attr.ext.xrc.xrcd->usecnt); + } + atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); - obj->uobject.object = srq; - ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject); + obj->uevent.uobject.object = srq; + ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); if (ret) goto err_destroy; memset(&resp, 0, sizeof resp); - resp.srq_handle = obj->uobject.id; + resp.srq_handle = obj->uevent.uobject.id; resp.max_wr = attr.attr.max_wr; resp.max_sge = attr.attr.max_sge; + if (cmd->srq_type == IB_SRQT_XRC) + resp.srqn = srq->ext.xrc.srq_num; - if (copy_to_user((void __user *) (unsigned long) cmd.response, + if (copy_to_user((void __user *) (unsigned long) cmd->response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } + if (cmd->srq_type == IB_SRQT_XRC) { + put_uobj_read(xrcd_uobj); + put_cq_read(attr.ext.xrc.cq); + } put_pd_read(pd); mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->srq_list); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list); mutex_unlock(&file->mutex); - obj->uobject.live = 1; + obj->uevent.uobject.live = 1; - up_write(&obj->uobject.mutex); + up_write(&obj->uevent.uobject.mutex); - return in_len; + return 0; err_copy: - idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject); + idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); err_destroy: ib_destroy_srq(srq); err_put: + if (cmd->srq_type == IB_SRQT_XRC) { + atomic_dec(&obj->uxrcd->refcnt); + put_uobj_read(xrcd_uobj); + } + +err_put_cq: + if (cmd->srq_type == IB_SRQT_XRC) + put_cq_read(attr.ext.xrc.cq); + +err_put_pd: put_pd_read(pd); err: - put_uobj_write(&obj->uobject); + put_uobj_write(&obj->uevent.uobject); return ret; } +ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_srq cmd; + struct ib_uverbs_create_xsrq xcmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + xcmd.response = cmd.response; + xcmd.user_handle = cmd.user_handle; + xcmd.srq_type = IB_SRQT_BASIC; + xcmd.pd_handle = cmd.pd_handle; + xcmd.max_wr = cmd.max_wr; + xcmd.max_sge = cmd.max_sge; + xcmd.srq_limit = cmd.srq_limit; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + ret = __uverbs_create_xsrq(file, &xcmd, &udata); + if (ret) + return ret; + + return in_len; +} + +ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_create_xsrq cmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + ret = __uverbs_create_xsrq(file, &cmd, &udata); + if (ret) + return ret; + + return in_len; +} + ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 56898b6578a..87963674637 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -72,6 +72,7 @@ DEFINE_IDR(ib_uverbs_ah_idr); DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); +DEFINE_IDR(ib_uverbs_xrcd_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -107,6 +108,10 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, + [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, + [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, + [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp }; static void ib_uverbs_add_one(struct ib_device *device); @@ -202,8 +207,12 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - ib_uverbs_detach_umcast(qp, uqp); - ib_destroy_qp(qp); + if (qp != qp->real_qp) { + ib_close_qp(qp); + } else { + ib_uverbs_detach_umcast(qp, uqp); + ib_destroy_qp(qp); + } ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } @@ -241,6 +250,18 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uobj); } + mutex_lock(&file->device->xrcd_tree_mutex); + list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { + struct ib_xrcd *xrcd = uobj->object; + struct ib_uxrcd_object *uxrcd = + container_of(uobj, struct ib_uxrcd_object, uobject); + + idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); + ib_uverbs_dealloc_xrcd(file->device, xrcd); + kfree(uxrcd); + } + mutex_unlock(&file->device->xrcd_tree_mutex); + list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { struct ib_pd *pd = uobj->object; @@ -557,8 +578,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (hdr.in_words * 4 != count) return -EINVAL; - if (hdr.command < 0 || - hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || + if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || !uverbs_cmd_table[hdr.command]) return -EINVAL; @@ -741,6 +761,8 @@ static void ib_uverbs_add_one(struct ib_device *device) kref_init(&uverbs_dev->ref); init_completion(&uverbs_dev->comp); + uverbs_dev->xrcd_tree = RB_ROOT; + mutex_init(&uverbs_dev->xrcd_tree_mutex); spin_lock(&map_lock); devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index af7a8b08b2e..42517500b22 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -39,6 +39,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/string.h> +#include <linux/slab.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> @@ -77,6 +78,31 @@ enum ib_rate mult_to_ib_rate(int mult) } EXPORT_SYMBOL(mult_to_ib_rate); +int ib_rate_to_mbps(enum ib_rate rate) +{ + switch (rate) { + case IB_RATE_2_5_GBPS: return 2500; + case IB_RATE_5_GBPS: return 5000; + case IB_RATE_10_GBPS: return 10000; + case IB_RATE_20_GBPS: return 20000; + case IB_RATE_30_GBPS: return 30000; + case IB_RATE_40_GBPS: return 40000; + case IB_RATE_60_GBPS: return 60000; + case IB_RATE_80_GBPS: return 80000; + case IB_RATE_120_GBPS: return 120000; + case IB_RATE_14_GBPS: return 14062; + case IB_RATE_56_GBPS: return 56250; + case IB_RATE_112_GBPS: return 112500; + case IB_RATE_168_GBPS: return 168750; + case IB_RATE_25_GBPS: return 25781; + case IB_RATE_100_GBPS: return 103125; + case IB_RATE_200_GBPS: return 206250; + case IB_RATE_300_GBPS: return 309375; + default: return -1; + } +} +EXPORT_SYMBOL(ib_rate_to_mbps); + enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type) { @@ -250,6 +276,13 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, srq->uobject = NULL; srq->event_handler = srq_init_attr->event_handler; srq->srq_context = srq_init_attr->srq_context; + srq->srq_type = srq_init_attr->srq_type; + if (srq->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; + srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq; + atomic_inc(&srq->ext.xrc.xrcd->usecnt); + atomic_inc(&srq->ext.xrc.cq->usecnt); + } atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); } @@ -279,16 +312,29 @@ EXPORT_SYMBOL(ib_query_srq); int ib_destroy_srq(struct ib_srq *srq) { struct ib_pd *pd; + enum ib_srq_type srq_type; + struct ib_xrcd *uninitialized_var(xrcd); + struct ib_cq *uninitialized_var(cq); int ret; if (atomic_read(&srq->usecnt)) return -EBUSY; pd = srq->pd; + srq_type = srq->srq_type; + if (srq_type == IB_SRQT_XRC) { + xrcd = srq->ext.xrc.xrcd; + cq = srq->ext.xrc.cq; + } ret = srq->device->destroy_srq(srq); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + if (srq_type == IB_SRQT_XRC) { + atomic_dec(&xrcd->usecnt); + atomic_dec(&cq->usecnt); + } + } return ret; } @@ -296,28 +342,123 @@ EXPORT_SYMBOL(ib_destroy_srq); /* Queue pairs */ +static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) +{ + struct ib_qp *qp = context; + + list_for_each_entry(event->element.qp, &qp->open_list, open_list) + event->element.qp->event_handler(event, event->element.qp->qp_context); +} + +static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) +{ + mutex_lock(&xrcd->tgt_qp_mutex); + list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); + mutex_unlock(&xrcd->tgt_qp_mutex); +} + +static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, + void (*event_handler)(struct ib_event *, void *), + void *qp_context) +{ + struct ib_qp *qp; + unsigned long flags; + + qp = kzalloc(sizeof *qp, GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->real_qp = real_qp; + atomic_inc(&real_qp->usecnt); + qp->device = real_qp->device; + qp->event_handler = event_handler; + qp->qp_context = qp_context; + qp->qp_num = real_qp->qp_num; + qp->qp_type = real_qp->qp_type; + + spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + list_add(&qp->open_list, &real_qp->open_list); + spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + + return qp; +} + +struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, + struct ib_qp_open_attr *qp_open_attr) +{ + struct ib_qp *qp, *real_qp; + + if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) + return ERR_PTR(-EINVAL); + + qp = ERR_PTR(-EINVAL); + mutex_lock(&xrcd->tgt_qp_mutex); + list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { + if (real_qp->qp_num == qp_open_attr->qp_num) { + qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, + qp_open_attr->qp_context); + break; + } + } + mutex_unlock(&xrcd->tgt_qp_mutex); + return qp; +} +EXPORT_SYMBOL(ib_open_qp); + struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { - struct ib_qp *qp; + struct ib_qp *qp, *real_qp; + struct ib_device *device; - qp = pd->device->create_qp(pd, qp_init_attr, NULL); + device = pd ? pd->device : qp_init_attr->xrcd->device; + qp = device->create_qp(pd, qp_init_attr, NULL); if (!IS_ERR(qp)) { - qp->device = pd->device; - qp->pd = pd; - qp->send_cq = qp_init_attr->send_cq; - qp->recv_cq = qp_init_attr->recv_cq; - qp->srq = qp_init_attr->srq; - qp->uobject = NULL; - qp->event_handler = qp_init_attr->event_handler; - qp->qp_context = qp_init_attr->qp_context; - qp->qp_type = qp_init_attr->qp_type; - atomic_inc(&pd->usecnt); - atomic_inc(&qp_init_attr->send_cq->usecnt); - atomic_inc(&qp_init_attr->recv_cq->usecnt); - if (qp_init_attr->srq) - atomic_inc(&qp_init_attr->srq->usecnt); + qp->device = device; + qp->real_qp = qp; + qp->uobject = NULL; + qp->qp_type = qp_init_attr->qp_type; + + if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { + qp->event_handler = __ib_shared_qp_event_handler; + qp->qp_context = qp; + qp->pd = NULL; + qp->send_cq = qp->recv_cq = NULL; + qp->srq = NULL; + qp->xrcd = qp_init_attr->xrcd; + atomic_inc(&qp_init_attr->xrcd->usecnt); + INIT_LIST_HEAD(&qp->open_list); + atomic_set(&qp->usecnt, 0); + + real_qp = qp; + qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, + qp_init_attr->qp_context); + if (!IS_ERR(qp)) + __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); + else + real_qp->device->destroy_qp(real_qp); + } else { + qp->event_handler = qp_init_attr->event_handler; + qp->qp_context = qp_init_attr->qp_context; + if (qp_init_attr->qp_type == IB_QPT_XRC_INI) { + qp->recv_cq = NULL; + qp->srq = NULL; + } else { + qp->recv_cq = qp_init_attr->recv_cq; + atomic_inc(&qp_init_attr->recv_cq->usecnt); + qp->srq = qp_init_attr->srq; + if (qp->srq) + atomic_inc(&qp_init_attr->srq->usecnt); + } + + qp->pd = pd; + qp->send_cq = qp_init_attr->send_cq; + qp->xrcd = NULL; + + atomic_inc(&pd->usecnt); + atomic_inc(&qp_init_attr->send_cq->usecnt); + } } return qp; @@ -326,8 +467,8 @@ EXPORT_SYMBOL(ib_create_qp); static const struct { int valid; - enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETHERTYPE + 1]; - enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETHERTYPE + 1]; + enum ib_qp_attr_mask req_param[IB_QPT_MAX]; + enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, @@ -343,6 +484,12 @@ static const struct { [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -365,6 +512,12 @@ static const struct { [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -384,6 +537,16 @@ static const struct { IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), + [IB_QPT_XRC_INI] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN), + [IB_QPT_XRC_TGT] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_MIN_RNR_TIMER), }, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | @@ -394,6 +557,12 @@ static const struct { [IB_QPT_RC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), + [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), + [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -414,6 +583,13 @@ static const struct { IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC), + [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC), + [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT | + IB_QP_SQ_PSN), [IB_QPT_SMI] = IB_QP_SQ_PSN, [IB_QPT_GSI] = IB_QP_SQ_PSN, }, @@ -429,6 +605,15 @@ static const struct { IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | @@ -453,6 +638,15 @@ static const struct { IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER), + [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE | + IB_QP_MIN_RNR_TIMER), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | @@ -465,6 +659,8 @@ static const struct { [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */ [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY } @@ -487,6 +683,15 @@ static const struct { IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | @@ -515,6 +720,25 @@ static const struct { IB_QP_PKEY_INDEX | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_INI] = (IB_QP_PORT | + IB_QP_AV | + IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_PORT | + IB_QP_AV | + IB_QP_TIMEOUT | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -579,7 +803,7 @@ int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { - return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL); + return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); @@ -589,11 +813,59 @@ int ib_query_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr) { return qp->device->query_qp ? - qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) : + qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : -ENOSYS; } EXPORT_SYMBOL(ib_query_qp); +int ib_close_qp(struct ib_qp *qp) +{ + struct ib_qp *real_qp; + unsigned long flags; + + real_qp = qp->real_qp; + if (real_qp == qp) + return -EINVAL; + + spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + list_del(&qp->open_list); + spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + + atomic_dec(&real_qp->usecnt); + kfree(qp); + + return 0; +} +EXPORT_SYMBOL(ib_close_qp); + +static int __ib_destroy_shared_qp(struct ib_qp *qp) +{ + struct ib_xrcd *xrcd; + struct ib_qp *real_qp; + int ret; + + real_qp = qp->real_qp; + xrcd = real_qp->xrcd; + + mutex_lock(&xrcd->tgt_qp_mutex); + ib_close_qp(qp); + if (atomic_read(&real_qp->usecnt) == 0) + list_del(&real_qp->xrcd_list); + else + real_qp = NULL; + mutex_unlock(&xrcd->tgt_qp_mutex); + + if (real_qp) { + ret = ib_destroy_qp(real_qp); + if (!ret) + atomic_dec(&xrcd->usecnt); + else + __ib_insert_xrcd_qp(xrcd, real_qp); + } + + return 0; +} + int ib_destroy_qp(struct ib_qp *qp) { struct ib_pd *pd; @@ -601,16 +873,25 @@ int ib_destroy_qp(struct ib_qp *qp) struct ib_srq *srq; int ret; - pd = qp->pd; - scq = qp->send_cq; - rcq = qp->recv_cq; - srq = qp->srq; + if (atomic_read(&qp->usecnt)) + return -EBUSY; + + if (qp->real_qp != qp) + return __ib_destroy_shared_qp(qp); + + pd = qp->pd; + scq = qp->send_cq; + rcq = qp->recv_cq; + srq = qp->srq; ret = qp->device->destroy_qp(qp); if (!ret) { - atomic_dec(&pd->usecnt); - atomic_dec(&scq->usecnt); - atomic_dec(&rcq->usecnt); + if (pd) + atomic_dec(&pd->usecnt); + if (scq) + atomic_dec(&scq->usecnt); + if (rcq) + atomic_dec(&rcq->usecnt); if (srq) atomic_dec(&srq->usecnt); } @@ -920,3 +1201,42 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) return qp->device->detach_mcast(qp, gid, lid); } EXPORT_SYMBOL(ib_detach_mcast); + +struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device) +{ + struct ib_xrcd *xrcd; + + if (!device->alloc_xrcd) + return ERR_PTR(-ENOSYS); + + xrcd = device->alloc_xrcd(device, NULL, NULL); + if (!IS_ERR(xrcd)) { + xrcd->device = device; + xrcd->inode = NULL; + atomic_set(&xrcd->usecnt, 0); + mutex_init(&xrcd->tgt_qp_mutex); + INIT_LIST_HEAD(&xrcd->tgt_qp_list); + } + + return xrcd; +} +EXPORT_SYMBOL(ib_alloc_xrcd); + +int ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + struct ib_qp *qp; + int ret; + + if (atomic_read(&xrcd->usecnt)) + return -EBUSY; + + while (!list_empty(&xrcd->tgt_qp_list)) { + qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); + ret = ib_destroy_qp(qp); + if (ret) + return ret; + } + + return xrcd->device->dealloc_xrcd(xrcd); +} +EXPORT_SYMBOL(ib_dealloc_xrcd); diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index f101bb73be6..12f923d64e4 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -753,10 +753,7 @@ static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev) memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6); /* Print out the MAC address */ - pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n", - netdev->name, - netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2], - netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]); + pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr); #if 0 /* Disable network packets */ diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c index 71e0d845da3..abcc9e76962 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c @@ -46,6 +46,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, struct ib_event event; struct iwch_qp_attributes attrs; struct iwch_qp *qhp; + unsigned long flag; spin_lock(&rnicp->lock); qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); @@ -94,7 +95,9 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, if (qhp->ibqp.event_handler) (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); + spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); if (atomic_dec_and_test(&qhp->refcnt)) wake_up(&qhp->wait); @@ -107,6 +110,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) struct iwch_cq *chp; struct iwch_qp *qhp; u32 cqid = RSPQ_CQID(rsp_msg); + unsigned long flag; rnicp = (struct iwch_dev *) rdev_p->ulp; spin_lock(&rnicp->lock); @@ -170,7 +174,9 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) */ if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) dst_confirm(qhp->ep->dst); + spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); break; case TPT_ERR_STAG: diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index c7d9411f295..37c224fc3ad 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -190,6 +190,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve chp->rhp = rhp; chp->ibcq.cqe = 1 << chp->cq.size_log2; spin_lock_init(&chp->lock); + spin_lock_init(&chp->comp_handler_lock); atomic_set(&chp->refcnt, 1); init_waitqueue_head(&chp->wait); if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) { diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 9a342c9b220..87c14b0c5ac 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -103,6 +103,7 @@ struct iwch_cq { struct iwch_dev *rhp; struct t3_cq cq; spinlock_t lock; + spinlock_t comp_handler_lock; atomic_t refcnt; wait_queue_head_t wait; u32 __user *user_rptr_addr; diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index ecd313f359a..bea5839d89e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -822,8 +822,11 @@ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, *flag); - if (flushed) + if (flushed) { + spin_lock_irqsave(&rchp->comp_handler_lock, *flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, *flag); + } /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, *flag); @@ -833,8 +836,11 @@ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, *flag); - if (flushed) + if (flushed) { + spin_lock_irqsave(&schp->comp_handler_lock, *flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, *flag); + } /* deref */ if (atomic_dec_and_test(&qhp->refcnt)) @@ -853,11 +859,15 @@ static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) if (qhp->ibqp.uobject) { cxio_set_wq_in_error(&qhp->wq); cxio_set_cq_in_error(&rchp->cq); + spin_lock_irqsave(&rchp->comp_handler_lock, *flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, *flag); if (schp != rchp) { cxio_set_cq_in_error(&schp->cq); + spin_lock_irqsave(&schp->comp_handler_lock, *flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, *flag); } return; } diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 1720dc790d1..f35a935267e 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -185,7 +185,7 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) V_CQE_OPCODE(FW_RI_SEND) | V_CQE_TYPE(0) | V_CQE_SWCQE(1) | - V_CQE_QPID(wq->rq.qid)); + V_CQE_QPID(wq->sq.qid)); cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen)); cq->sw_queue[cq->sw_pidx] = cqe; t4_swcq_produce(cq); @@ -818,6 +818,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, chp->cq.size--; /* status page */ chp->ibcq.cqe = entries - 2; spin_lock_init(&chp->lock); + spin_lock_init(&chp->comp_handler_lock); atomic_set(&chp->refcnt, 1); init_waitqueue_head(&chp->wait); ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 40a13cc633a..6d0df6ec161 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -376,10 +376,8 @@ struct uld_ctx { struct c4iw_dev *dev; }; -static void c4iw_remove(struct uld_ctx *ctx) +static void c4iw_dealloc(struct uld_ctx *ctx) { - PDBG("%s c4iw_dev %p\n", __func__, ctx->dev); - c4iw_unregister_device(ctx->dev); c4iw_rdev_close(&ctx->dev->rdev); idr_destroy(&ctx->dev->cqidr); idr_destroy(&ctx->dev->qpidr); @@ -389,11 +387,30 @@ static void c4iw_remove(struct uld_ctx *ctx) ctx->dev = NULL; } +static void c4iw_remove(struct uld_ctx *ctx) +{ + PDBG("%s c4iw_dev %p\n", __func__, ctx->dev); + c4iw_unregister_device(ctx->dev); + c4iw_dealloc(ctx); +} + +static int rdma_supported(const struct cxgb4_lld_info *infop) +{ + return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 && + infop->vr->rq.size > 0 && infop->vr->qp.size > 0 && + infop->vr->cq.size > 0 && infop->vr->ocq.size > 0; +} + static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) { struct c4iw_dev *devp; int ret; + if (!rdma_supported(infop)) { + printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n", + pci_name(infop->pdev)); + return ERR_PTR(-ENOSYS); + } devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); if (!devp) { printk(KERN_ERR MOD "Cannot allocate ib device\n"); @@ -414,7 +431,6 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) ret = c4iw_rdev_open(&devp->rdev); if (ret) { - mutex_unlock(&dev_mutex); printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret); ib_dealloc_device(&devp->ibdev); return ERR_PTR(ret); @@ -519,15 +535,24 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) case CXGB4_STATE_UP: printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev)); if (!ctx->dev) { - int ret = 0; + int ret; ctx->dev = c4iw_alloc(&ctx->lldi); - if (!IS_ERR(ctx->dev)) - ret = c4iw_register_device(ctx->dev); - if (IS_ERR(ctx->dev) || ret) + if (IS_ERR(ctx->dev)) { + printk(KERN_ERR MOD + "%s: initialization failed: %ld\n", + pci_name(ctx->lldi.pdev), + PTR_ERR(ctx->dev)); + ctx->dev = NULL; + break; + } + ret = c4iw_register_device(ctx->dev); + if (ret) { printk(KERN_ERR MOD "%s: RDMA registration failed: %d\n", pci_name(ctx->lldi.pdev), ret); + c4iw_dealloc(ctx); + } } break; case CXGB4_STATE_DOWN: diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index c13041a0aeb..397cb36cf10 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -42,6 +42,7 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, { struct ib_event event; struct c4iw_qp_attributes attrs; + unsigned long flag; if ((qhp->attr.state == C4IW_QP_STATE_ERROR) || (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) { @@ -72,7 +73,9 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, if (qhp->ibqp.event_handler) (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); + spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); } void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) @@ -183,11 +186,14 @@ out: int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) { struct c4iw_cq *chp; + unsigned long flag; chp = get_chp(dev, qid); - if (chp) + if (chp) { + spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - else + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); + } else PDBG("%s unknown cqid 0x%x\n", __func__, qid); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 62cea0e2b15..1357c5bf209 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -309,6 +309,7 @@ struct c4iw_cq { struct c4iw_dev *rhp; struct t4_cq cq; spinlock_t lock; + spinlock_t comp_handler_lock; atomic_t refcnt; wait_queue_head_t wait; }; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index ec3ce675fdf..d6ccc7e8480 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -945,8 +945,11 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, flag); - if (flushed) + if (flushed) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, flag); @@ -956,13 +959,17 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); - if (flushed) + if (flushed) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } } static void flush_qp(struct c4iw_qp *qhp) { struct c4iw_cq *rchp, *schp; + unsigned long flag; rchp = get_chp(qhp->rhp, qhp->attr.rcq); schp = get_chp(qhp->rhp, qhp->attr.scq); @@ -970,8 +977,16 @@ static void flush_qp(struct c4iw_qp *qhp) if (qhp->ibqp.uobject) { t4_set_wq_in_error(&qhp->wq); t4_set_cq_in_error(&rchp->cq); - if (schp != rchp) + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + if (schp != rchp) { t4_set_cq_in_error(&schp->cq); + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } return; } __flush_qp(qhp, rchp, schp); @@ -1211,6 +1226,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, disconnect = 1; c4iw_get_ep(&qhp->ep->com); } + if (qhp->ibqp.uobject) + t4_set_wq_in_error(&qhp->wq); ret = rdma_fini(rhp, qhp, ep); if (ret) goto err; @@ -1229,6 +1246,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, break; case C4IW_QP_STATE_ERROR: set_state(qhp, C4IW_QP_STATE_ERROR); + if (qhp->ibqp.uobject) + t4_set_wq_in_error(&qhp->wq); if (!internal) { abort = 1; disconnect = 1; diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index d9b1bb40f48..818d721fc44 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -125,7 +125,7 @@ int ehca_create_eq(struct ehca_shca *shca, tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, - IRQF_DISABLED, "ehca_eq", + 0, "ehca_eq", (void *)shca); if (ret < 0) ehca_err(ib_dev, "Can't map interrupt handler."); @@ -133,7 +133,7 @@ int ehca_create_eq(struct ehca_shca *shca, tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, - IRQF_DISABLED, "ehca_neq", + 0, "ehca_neq", (void *)shca); if (ret < 0) ehca_err(ib_dev, "Can't map interrupt handler."); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 32fb34201ab..964f8552079 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -977,6 +977,9 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, struct hcp_modify_qp_control_block *mqpcb; u64 hret, update_mask; + if (srq_init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + /* For common attributes, internal_create_qp() takes its info * out of qp_init_attr, so copy all common attrs there. */ diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 7c1eebe8c7c..824a4d50883 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -34,6 +34,7 @@ #include <linux/pci.h> #include <linux/netdevice.h> #include <linux/slab.h> +#include <linux/stat.h> #include <linux/vmalloc.h> #include "ipath_kernel.h" diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 386e2c717c5..26271984b71 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -107,6 +107,11 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, u32 sz; struct ib_srq *ret; + if (srq_init_attr->srq_type != IB_SRQT_BASIC) { + ret = ERR_PTR(-ENOSYS); + goto done; + } + if (srq_init_attr->attr.max_wr == 0) { ret = ERR_PTR(-EINVAL); goto done; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index fa643f4f4e2..77f3dbc0aaa 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -128,6 +128,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) + props->device_cap_flags |= IB_DEVICE_XRC; props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; @@ -181,8 +183,12 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) static int ib_link_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, + struct ib_smp *in_mad, struct ib_smp *out_mad) { + int ext_active_speed; + int err; + props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); @@ -203,6 +209,39 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port, props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; + /* Check if extended speeds (EDR/FDR/...) are supported */ + if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { + ext_active_speed = out_mad->data[62] >> 4; + + switch (ext_active_speed) { + case 1: + props->active_speed = 16; /* FDR */ + break; + case 2: + props->active_speed = 32; /* EDR */ + break; + } + } + + /* If reported active speed is QDR, check if is FDR-10 */ + if (props->active_speed == 4) { + if (to_mdev(ibdev)->dev->caps.ext_port_cap[port] & + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { + init_query_mad(in_mad); + in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, + NULL, NULL, in_mad, out_mad); + if (err) + return err; + + /* Checking LinkSpeedActive for FDR-10 */ + if (out_mad->data[15] & 0x1) + props->active_speed = 8; + } + } + return 0; } @@ -227,7 +266,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->pkey_tbl_len = 1; props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); - props->max_mtu = IB_MTU_2048; + props->max_mtu = IB_MTU_4096; props->subnet_timeout = 0; props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = 0; @@ -274,7 +313,7 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, goto out; err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? - ib_link_query_port(ibdev, port, props, out_mad) : + ib_link_query_port(ibdev, port, props, in_mad, out_mad) : eth_link_query_port(ibdev, port, props, out_mad); out: @@ -566,6 +605,57 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd) return 0; } +static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx4_ib_xrcd *xrcd; + int err; + + if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); + + xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); + if (!xrcd) + return ERR_PTR(-ENOMEM); + + err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn); + if (err) + goto err1; + + xrcd->pd = ib_alloc_pd(ibdev); + if (IS_ERR(xrcd->pd)) { + err = PTR_ERR(xrcd->pd); + goto err2; + } + + xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0); + if (IS_ERR(xrcd->cq)) { + err = PTR_ERR(xrcd->cq); + goto err3; + } + + return &xrcd->ibxrcd; + +err3: + ib_dealloc_pd(xrcd->pd); +err2: + mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn); +err1: + kfree(xrcd); + return ERR_PTR(err); +} + +static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + ib_destroy_cq(to_mxrcd(xrcd)->cq); + ib_dealloc_pd(to_mxrcd(xrcd)->pd); + mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); + kfree(xrcd); + + return 0; +} + static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) { struct mlx4_ib_qp *mqp = to_mqp(ibqp); @@ -1044,7 +1134,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | + (1ull << IB_USER_VERBS_CMD_OPEN_QP); ibdev->ib_dev.query_device = mlx4_ib_query_device; ibdev->ib_dev.query_port = mlx4_ib_query_port; @@ -1093,6 +1185,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { + ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; + ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; + ibdev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | + (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + } + spin_lock_init(&iboe->lock); if (init_node_data(ibdev)) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e4bf2cff866..ed80345c99a 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -56,6 +56,13 @@ struct mlx4_ib_pd { u32 pdn; }; +struct mlx4_ib_xrcd { + struct ib_xrcd ibxrcd; + u32 xrcdn; + struct ib_pd *pd; + struct ib_cq *cq; +}; + struct mlx4_ib_cq_buf { struct mlx4_buf buf; struct mlx4_mtt mtt; @@ -138,6 +145,7 @@ struct mlx4_ib_qp { struct mlx4_mtt mtt; int buf_size; struct mutex mutex; + u16 xrcdn; u32 flags; u8 port; u8 alt_port; @@ -211,6 +219,11 @@ static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd) return container_of(ibpd, struct mlx4_ib_pd, ibpd); } +static inline struct mlx4_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd) +{ + return container_of(ibxrcd, struct mlx4_ib_xrcd, ibxrcd); +} + static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx4_ib_cq, ibcq); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 3a91d9d8dc5..a16f0c8e6f3 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -302,15 +302,14 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - int is_user, int has_srq, struct mlx4_ib_qp *qp) + int is_user, int has_rq, struct mlx4_ib_qp *qp) { /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->dev->caps.max_wqes || cap->max_recv_sge > dev->dev->caps.max_rq_sg) return -EINVAL; - if (has_srq) { - /* QPs attached to an SRQ should have no RQ */ + if (!has_rq) { if (cap->max_recv_wr) return -EINVAL; @@ -463,6 +462,14 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev, return 0; } +static int qp_has_rq(struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) + return 0; + + return !attr->srq; +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) @@ -479,7 +486,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp); if (err) goto err; @@ -513,7 +520,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; - if (!init_attr->srq) { + if (qp_has_rq(init_attr)) { err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), ucmd.db_addr, &qp->db); if (err) @@ -532,7 +539,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err; - if (!init_attr->srq) { + if (qp_has_rq(init_attr)) { err = mlx4_db_alloc(dev->dev, &qp->db, 0); if (err) goto err; @@ -575,6 +582,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_qpn; + if (init_attr->qp_type == IB_QPT_XRC_TGT) + qp->mqp.qpn |= (1 << 23); + /* * Hardware wants QPN written in big-endian order (after * shifting) for send doorbell. Precompute this value to save @@ -592,9 +602,8 @@ err_qpn: err_wrid: if (pd->uobject) { - if (!init_attr->srq) - mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), - &qp->db); + if (qp_has_rq(init_attr)) + mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { kfree(qp->sq.wrid); kfree(qp->rq.wrid); @@ -610,7 +619,7 @@ err_buf: mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); err_db: - if (!pd->uobject && !init_attr->srq) + if (!pd->uobject && qp_has_rq(init_attr)) mlx4_db_free(dev->dev, &qp->db); err: @@ -671,6 +680,33 @@ static void del_gid_entries(struct mlx4_ib_qp *qp) } } +static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp) +{ + if (qp->ibqp.qp_type == IB_QPT_XRC_TGT) + return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd); + else + return to_mpd(qp->ibqp.pd); +} + +static void get_cqs(struct mlx4_ib_qp *qp, + struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq) +{ + switch (qp->ibqp.qp_type) { + case IB_QPT_XRC_TGT: + *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq); + *recv_cq = *send_cq; + break; + case IB_QPT_XRC_INI: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = *send_cq; + break; + default: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = to_mcq(qp->ibqp.recv_cq); + break; + } +} + static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, int is_user) { @@ -682,8 +718,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n", qp->mqp.qpn); - send_cq = to_mcq(qp->ibqp.send_cq); - recv_cq = to_mcq(qp->ibqp.recv_cq); + get_cqs(qp, &send_cq, &recv_cq); mlx4_ib_lock_cqs(send_cq, recv_cq); @@ -706,7 +741,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_mtt_cleanup(dev->dev, &qp->mtt); if (is_user) { - if (!qp->ibqp.srq) + if (qp->rq.wqe_cnt) mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), &qp->db); ib_umem_release(qp->umem); @@ -714,7 +749,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, kfree(qp->sq.wrid); kfree(qp->rq.wrid); mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); - if (!qp->ibqp.srq) + if (qp->rq.wqe_cnt) mlx4_db_free(dev->dev, &qp->db); } @@ -725,10 +760,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { - struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_sqp *sqp; struct mlx4_ib_qp *qp; int err; + u16 xrcdn = 0; /* * We only support LSO and multicast loopback blocking, and @@ -739,10 +774,20 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); if (init_attr->create_flags && - (pd->uobject || init_attr->qp_type != IB_QPT_UD)) + (udata || init_attr->qp_type != IB_QPT_UD)) return ERR_PTR(-EINVAL); switch (init_attr->qp_type) { + case IB_QPT_XRC_TGT: + pd = to_mxrcd(init_attr->xrcd)->pd; + xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; + init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq; + /* fall through */ + case IB_QPT_XRC_INI: + if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); + init_attr->recv_cq = init_attr->send_cq; + /* fall through */ case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: @@ -751,13 +796,14 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, if (!qp) return ERR_PTR(-ENOMEM); - err = create_qp_common(dev, pd, init_attr, udata, 0, qp); + err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, qp); if (err) { kfree(qp); return ERR_PTR(err); } qp->ibqp.qp_num = qp->mqp.qpn; + qp->xrcdn = xrcdn; break; } @@ -765,7 +811,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_GSI: { /* Userspace is not allowed to create special QPs: */ - if (pd->uobject) + if (udata) return ERR_PTR(-EINVAL); sqp = kzalloc(sizeof *sqp, GFP_KERNEL); @@ -774,8 +820,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, qp = &sqp->qp; - err = create_qp_common(dev, pd, init_attr, udata, - dev->dev->caps.sqp_start + + err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, + to_mdev(pd->device)->dev->caps.sqp_start + (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + init_attr->port_num - 1, qp); @@ -801,11 +847,13 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); + struct mlx4_ib_pd *pd; if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); - destroy_qp_common(dev, mqp, !!qp->pd->uobject); + pd = get_pd(mqp); + destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); @@ -821,6 +869,8 @@ static int to_mlx4_st(enum ib_qp_type type) case IB_QPT_RC: return MLX4_QP_ST_RC; case IB_QPT_UC: return MLX4_QP_ST_UC; case IB_QPT_UD: return MLX4_QP_ST_UD; + case IB_QPT_XRC_INI: + case IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; case IB_QPT_SMI: case IB_QPT_GSI: return MLX4_QP_ST_MLX; default: return -1; @@ -959,6 +1009,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, { struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct mlx4_ib_pd *pd; + struct mlx4_ib_cq *send_cq, *recv_cq; struct mlx4_qp_context *context; enum mlx4_qp_optpar optpar = 0; int sqd_event; @@ -1014,8 +1066,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; context->sq_size_stride |= qp->sq.wqe_shift - 4; - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; + context->xrcd = cpu_to_be32((u32) qp->xrcdn); + } if (qp->ibqp.uobject) context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); @@ -1079,8 +1133,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } - context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); - context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); + pd = get_pd(qp); + get_cqs(qp, &send_cq, &recv_cq); + context->pd = cpu_to_be32(pd->pdn); + context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); + context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); + context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); /* Set "fast registration enabled" for all kernel QPs */ if (!qp->ibqp.uobject) @@ -1106,8 +1164,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_SQ_PSN) context->next_send_psn = cpu_to_be32(attr->sq_psn); - context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn); - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { if (attr->max_dest_rd_atomic) context->params2 |= @@ -1130,8 +1186,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_RQ_PSN) context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); - context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn); - if (attr_mask & IB_QP_QKEY) { context->qkey = cpu_to_be32(attr->qkey); optpar |= MLX4_QP_OPTPAR_Q_KEY; @@ -1140,7 +1194,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (ibqp->srq) context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); - if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); if (cur_state == IB_QPS_INIT && @@ -1225,17 +1279,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET && !ibqp->uobject) { - mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn, + mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, ibqp->srq ? to_msrq(ibqp->srq): NULL); - if (ibqp->send_cq != ibqp->recv_cq) - mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL); + if (send_cq != recv_cq) + mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); qp->rq.head = 0; qp->rq.tail = 0; qp->sq.head = 0; qp->sq.tail = 0; qp->sq_next_wqe = 0; - if (!ibqp->srq) + if (qp->rq.wqe_cnt) *qp->db.db = 0; } @@ -1547,14 +1601,13 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr, __be16 *vlan) + struct ib_send_wr *wr) { memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); - *vlan = dseg->vlan; } static void set_mlx_icrc_seg(void *dseg) @@ -1657,7 +1710,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, __be32 uninitialized_var(lso_hdr_sz); __be32 blh; int i; - __be16 vlan = cpu_to_be16(0xffff); spin_lock_irqsave(&qp->sq.lock, flags); @@ -1761,7 +1813,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_UD: - set_datagram_seg(wqe, wr, &vlan); + set_datagram_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; @@ -1824,11 +1876,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? MLX4_WQE_CTRL_FENCE : 0) | size; - if (be16_to_cpu(vlan) < 0x1000) { - ctrl->ins_vlan = 1 << 6; - ctrl->vlan_tag = vlan; - } - /* * Make sure descriptor is fully written before * setting ownership bit (because HW can start diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 818b7ecace5..39542f3703b 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -76,6 +76,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct mlx4_ib_srq *srq; struct mlx4_wqe_srq_next_seg *next; struct mlx4_wqe_data_seg *scatter; + u32 cqn; + u16 xrcdn; int desc_size; int buf_size; int err; @@ -174,12 +176,18 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, } } - err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt, + cqn = (init_attr->srq_type == IB_SRQT_XRC) ? + to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0; + xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ? + to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn : + (u16) dev->dev->caps.reserved_xrcds; + err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt, srq->db.dma, &srq->msrq); if (err) goto err_wrid; srq->msrq.event = mlx4_ib_srq_event; + srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; if (pd->uobject) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 365fe0e1419..cb9a0b97680 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -438,6 +438,9 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, struct mthca_srq *srq; int err; + if (init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + srq = kmalloc(sizeof *srq, GFP_KERNEL); if (!srq) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile index 35148513c47..97820c23ece 100644 --- a/drivers/infiniband/hw/nes/Makefile +++ b/drivers/infiniband/hw/nes/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o -iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o +iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o nes_mgt.o diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 2d668c69f6d..5965b3df8f2 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -84,7 +84,7 @@ module_param(send_first, int, 0644); MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection"); -unsigned int nes_drv_opt = 0; +unsigned int nes_drv_opt = NES_DRV_OPT_DISABLE_INT_MOD | NES_DRV_OPT_ENABLE_PAU; module_param(nes_drv_opt, int, 0644); MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters"); @@ -130,9 +130,6 @@ static struct notifier_block nes_net_notifier = { .notifier_call = nes_net_event }; - - - /** * nes_inetaddr_event */ @@ -321,6 +318,9 @@ void nes_rem_ref(struct ib_qp *ibqp) } if (atomic_dec_and_test(&nesqp->refcount)) { + if (nesqp->pau_mode) + nes_destroy_pau_qp(nesdev, nesqp); + /* Destroy the QP */ cqp_request = nes_get_cqp_request(nesdev); if (cqp_request == NULL) { diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 6fe79876009..568b4f11380 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -102,6 +102,7 @@ #define NES_DRV_OPT_NO_INLINE_DATA 0x00000080 #define NES_DRV_OPT_DISABLE_INT_MOD 0x00000100 #define NES_DRV_OPT_DISABLE_VIRT_WQ 0x00000200 +#define NES_DRV_OPT_ENABLE_PAU 0x00000400 #define NES_AEQ_EVENT_TIMEOUT 2500 #define NES_DISCONNECT_EVENT_TIMEOUT 2000 @@ -128,6 +129,7 @@ #define NES_DBG_IW_RX 0x00020000 #define NES_DBG_IW_TX 0x00040000 #define NES_DBG_SHUTDOWN 0x00080000 +#define NES_DBG_PAU 0x00100000 #define NES_DBG_RSVD1 0x10000000 #define NES_DBG_RSVD2 0x20000000 #define NES_DBG_RSVD3 0x40000000 @@ -162,6 +164,7 @@ do { \ #include "nes_context.h" #include "nes_user.h" #include "nes_cm.h" +#include "nes_mgt.h" extern int max_mtu; #define max_frame_len (max_mtu+ETH_HLEN) @@ -202,6 +205,8 @@ extern atomic_t cm_nodes_created; extern atomic_t cm_nodes_destroyed; extern atomic_t cm_accel_dropped_pkts; extern atomic_t cm_resets_recvd; +extern atomic_t pau_qps_created; +extern atomic_t pau_qps_destroyed; extern u32 int_mod_timer_init; extern u32 int_mod_cq_depth_256; @@ -273,6 +278,14 @@ struct nes_device { u8 link_recheck; }; +/* Receive skb private area - must fit in skb->cb area */ +struct nes_rskb_cb { + u64 busaddr; + u32 maplen; + u32 seqnum; + u8 *data_start; + struct nes_qp *nesqp; +}; static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad) { @@ -305,8 +318,8 @@ set_wqe_32bit_value(__le32 *wqe_words, u32 index, u32 value) static inline void nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev) { - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_CTX_LOW_IDX, - (u64)((unsigned long) &nesdev->cqp)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0; diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 7dc43ea7d4e..dfce9ea98a3 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -156,6 +156,15 @@ atomic_t cm_connecteds; atomic_t cm_connect_reqs; atomic_t cm_rejects; +int nes_add_ref_cm_node(struct nes_cm_node *cm_node) +{ + return add_ref_cm_node(cm_node); +} + +int nes_rem_ref_cm_node(struct nes_cm_node *cm_node) +{ + return rem_ref_cm_node(cm_node->cm_core, cm_node); +} /** * create_event @@ -2559,9 +2568,13 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, } add_ref_cm_node(cm_node); } else if (cm_node->state == NES_CM_STATE_TSA) { - rem_ref_cm_node(cm_core, cm_node); - atomic_inc(&cm_accel_dropped_pkts); - dev_kfree_skb_any(skb); + if (cm_node->nesqp->pau_mode) + nes_queue_mgt_skbs(skb, nesvnic, cm_node->nesqp); + else { + rem_ref_cm_node(cm_core, cm_node); + atomic_inc(&cm_accel_dropped_pkts); + dev_kfree_skb_any(skb); + } break; } skb_reset_network_header(skb); diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 85f53d9521b..bdfa1fbb35f 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -461,5 +461,7 @@ int nes_destroy_listen(struct iw_cm_id *); int nes_cm_recv(struct sk_buff *, struct net_device *); int nes_cm_start(void); int nes_cm_stop(void); +int nes_add_ref_cm_node(struct nes_cm_node *cm_node); +int nes_rem_ref_cm_node(struct nes_cm_node *cm_node); #endif /* NES_CM_H */ diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index be36cbeae63..7c0ff19ce38 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -110,6 +110,14 @@ static unsigned char *nes_tcp_state_str[] = { }; #endif +static inline void print_ip(struct nes_cm_node *cm_node) +{ + unsigned char *rem_addr; + if (cm_node) { + rem_addr = (unsigned char *)&cm_node->rem_addr; + printk(KERN_ERR PFX "Remote IP addr: %pI4\n", rem_addr); + } +} /** * nes_nic_init_timer_defaults @@ -1555,6 +1563,7 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic) struct nes_hw_nic_rq_wqe *nic_rqe; struct nes_hw_nic *nesnic; struct nes_device *nesdev; + struct nes_rskb_cb *cb; u32 rx_wqes_posted = 0; nesnic = &nesvnic->nic; @@ -1580,6 +1589,9 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic) bus_address = pci_map_single(nesdev->pcidev, skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->busaddr = bus_address; + cb->maplen = nesvnic->max_frame_size; nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head]; nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = @@ -1669,6 +1681,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) u32 cqp_head; u32 counter; u32 wqe_count; + struct nes_rskb_cb *cb; u8 jumbomode=0; /* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */ @@ -1845,6 +1858,9 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) pmem = pci_map_single(nesdev->pcidev, skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->busaddr = pmem; + cb->maplen = nesvnic->max_frame_size; nic_rqe = &nesvnic->nic.rq_vbase[counter]; nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size); @@ -1873,6 +1889,13 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) jumbomode = 1; nes_nic_init_timer_defaults(nesdev, jumbomode); } + if ((nesdev->nesadapter->allow_unaligned_fpdus) && + (nes_init_mgt_qp(nesdev, netdev, nesvnic))) { + nes_debug(NES_DBG_INIT, "%s: Out of memory for pau nic\n", netdev->name); + nes_destroy_nic_qp(nesvnic); + return -ENOMEM; + } + nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr; nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; @@ -1895,28 +1918,29 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) struct nes_device *nesdev = nesvnic->nesdev; struct nes_hw_cqp_wqe *cqp_wqe; struct nes_hw_nic_sq_wqe *nic_sqe; - struct nes_hw_nic_rq_wqe *nic_rqe; __le16 *wqe_fragment_length; u16 wqe_fragment_index; - u64 wqe_frag; u32 cqp_head; u32 wqm_cfg0; unsigned long flags; + struct sk_buff *rx_skb; + struct nes_rskb_cb *cb; int ret; + if (nesdev->nesadapter->allow_unaligned_fpdus) + nes_destroy_mgt(nesvnic); + /* clear wqe stall before destroying NIC QP */ wqm_cfg0 = nes_read_indexed(nesdev, NES_IDX_WQM_CONFIG0); nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0 & 0xFFFF7FFF); /* Free remaining NIC receive buffers */ while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) { - nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail]; - wqe_frag = (u64)le32_to_cpu( - nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); - wqe_frag |= ((u64)le32_to_cpu( - nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX]))<<32; - pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag, - nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + rx_skb = nesvnic->nic.rx_skb[nesvnic->nic.rq_tail]; + cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; + pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, + PCI_DMA_FROMDEVICE); + dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]); nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1); } @@ -2775,6 +2799,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) struct nes_hw_nic_sq_wqe *nic_sqe; struct sk_buff *skb; struct sk_buff *rx_skb; + struct nes_rskb_cb *cb; __le16 *wqe_fragment_length; u32 head; u32 cq_size; @@ -2859,6 +2884,8 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32; pci_unmap_single(nesdev->pcidev, bus_address, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; + cb->busaddr = 0; /* rx_skb->tail = rx_skb->data + rx_pkt_size; */ /* rx_skb->len = rx_pkt_size; */ rx_skb->len = 0; /* TODO: see if this is necessary */ @@ -2983,6 +3010,7 @@ skip_rx_indicate0: } + /** * nes_cqp_ce_handler */ @@ -2997,6 +3025,8 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) u32 cq_size; u32 cqe_count=0; u32 error_code; + u32 opcode; + u32 ctx_index; /* u32 counter; */ head = cq->cq_head; @@ -3007,12 +3037,9 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) /* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head, le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */ - if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) { - u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head]. - cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) | - ((u64)(le32_to_cpu(cq->cq_vbase[head]. - cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]))); - cqp = *((struct nes_hw_cqp **)&u64temp); + opcode = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]); + if (opcode & NES_CQE_VALID) { + cqp = &nesdev->cqp; error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]); if (error_code) { @@ -3021,15 +3048,14 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f, (u16)(error_code >> 16), (u16)error_code); - nes_debug(NES_DBG_CQP, "cqp: qp_id=%u, sq_head=%u, sq_tail=%u\n", - cqp->qp_id, cqp->sq_head, cqp->sq_tail); } - u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail]. - wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) | - ((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail]. - wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX]))); - cqp_request = *((struct nes_cqp_request **)&u64temp); + u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head]. + cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) | + ((u64)(le32_to_cpu(cq->cq_vbase[head]. + cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]))); + + cqp_request = (struct nes_cqp_request *)(unsigned long)u64temp; if (cqp_request) { if (cqp_request->waiting) { /* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */ @@ -3075,9 +3101,15 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) cqp_wqe = &nesdev->cqp.sq_vbase[head]; memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); barrier(); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = + + opcode = cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]; + if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT) + ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX; + else + ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX; + cqp_wqe->wqe_words[ctx_index] = cpu_to_le32((u32)((unsigned long)cqp_request)); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = + cqp_wqe->wqe_words[ctx_index + 1] = cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request))); nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n", cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head); @@ -3093,7 +3125,6 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) nes_read32(nesdev->regs+NES_CQE_ALLOC); } - static u8 *locate_mpa(u8 *pkt, u32 aeq_info) { if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) { @@ -3553,9 +3584,9 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]); if (aeq_info & NES_AEQE_QP) { - if ((!nes_is_resource_allocated(nesadapter, nesadapter->allocated_qps, - aeqe_cq_id)) || - (atomic_read(&nesqp->close_timer_started))) + if (!nes_is_resource_allocated(nesadapter, + nesadapter->allocated_qps, + aeqe_cq_id)) return; } @@ -3566,8 +3597,7 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, if (atomic_inc_return(&nesqp->close_timer_started) == 1) { if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) && - (nesqp->ibqp_state == IB_QPS_RTS) && - ((nesadapter->eeprom_version >> 16) != NES_A0)) { + (nesqp->ibqp_state == IB_QPS_RTS)) { spin_lock_irqsave(&nesqp->lock, flags); nesqp->hw_iwarp_state = iwarp_state; nesqp->hw_tcp_state = tcp_state; @@ -3594,9 +3624,10 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, return; } spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; spin_unlock_irqrestore(&nesqp->lock, flags); - nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_CLOSING, 0, 0); nes_cm_disconn(nesqp); break; @@ -3694,7 +3725,9 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP: printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n", nesqp->hwqp.qp_id, async_event_id); - nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); + print_ip(nesqp->cm_node); + if (!atomic_read(&nesqp->close_timer_started)) + nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); break; case NES_AEQE_AEID_CQ_OPERATION_ERROR: diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index c3241479ec0..0b590e152c6 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -47,6 +47,11 @@ #define NES_MULTICAST_PF_MAX 8 #define NES_A0 3 +#define NES_ENABLE_PAU 0x07000001 +#define NES_DISABLE_PAU 0x07000000 +#define NES_PAU_COUNTER 10 +#define NES_CQP_OPCODE_MASK 0x3f + enum pci_regs { NES_INT_STAT = 0x0000, NES_INT_MASK = 0x0004, @@ -73,8 +78,10 @@ enum indexed_regs { NES_IDX_QP_CONTROL = 0x0040, NES_IDX_FLM_CONTROL = 0x0080, NES_IDX_INT_CPU_STATUS = 0x00a0, + NES_IDX_GPR_TRIGGER = 0x00bc, NES_IDX_GPIO_CONTROL = 0x00f0, NES_IDX_GPIO_DATA = 0x00f4, + NES_IDX_GPR2 = 0x010c, NES_IDX_TCP_CONFIG0 = 0x01e4, NES_IDX_TCP_TIMER_CONFIG = 0x01ec, NES_IDX_TCP_NOW = 0x01f0, @@ -202,6 +209,7 @@ enum nes_cqp_opcodes { NES_CQP_REGISTER_SHARED_STAG = 0x0c, NES_CQP_DEALLOCATE_STAG = 0x0d, NES_CQP_MANAGE_ARP_CACHE = 0x0f, + NES_CQP_DOWNLOAD_SEGMENT = 0x10, NES_CQP_SUSPEND_QPS = 0x11, NES_CQP_UPLOAD_CONTEXT = 0x13, NES_CQP_CREATE_CEQ = 0x16, @@ -210,7 +218,8 @@ enum nes_cqp_opcodes { NES_CQP_DESTROY_AEQ = 0x1b, NES_CQP_LMI_ACCESS = 0x20, NES_CQP_FLUSH_WQES = 0x22, - NES_CQP_MANAGE_APBVT = 0x23 + NES_CQP_MANAGE_APBVT = 0x23, + NES_CQP_MANAGE_QUAD_HASH = 0x25 }; enum nes_cqp_wqe_word_idx { @@ -222,6 +231,14 @@ enum nes_cqp_wqe_word_idx { NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5, }; +enum nes_cqp_wqe_word_download_idx { /* format differs from other cqp ops */ + NES_CQP_WQE_DL_OPCODE_IDX = 0, + NES_CQP_WQE_DL_COMP_CTX_LOW_IDX = 1, + NES_CQP_WQE_DL_COMP_CTX_HIGH_IDX = 2, + NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX = 3 + /* For index values 4-15 use NES_NIC_SQ_WQE_ values */ +}; + enum nes_cqp_cq_wqeword_idx { NES_CQP_CQ_WQE_PBL_LOW_IDX = 6, NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7, @@ -242,6 +259,7 @@ enum nes_cqp_stag_wqeword_idx { NES_CQP_STAG_WQE_PBL_LEN_IDX = 14 }; +#define NES_CQP_OP_LOGICAL_PORT_SHIFT 26 #define NES_CQP_OP_IWARP_STATE_SHIFT 28 #define NES_CQP_OP_TERMLEN_SHIFT 28 @@ -599,6 +617,7 @@ enum nes_nic_sq_wqe_bits { enum nes_nic_cqe_word_idx { NES_NIC_CQE_ACCQP_ID_IDX = 0, + NES_NIC_CQE_HASH_RCVNXT = 1, NES_NIC_CQE_TAG_PKT_TYPE_IDX = 2, NES_NIC_CQE_MISC_IDX = 3, }; @@ -1005,6 +1024,11 @@ struct nes_arp_entry { #define NES_NIC_CQ_DOWNWARD_TREND 16 #define NES_PFT_SIZE 48 +#define NES_MGT_WQ_COUNT 32 +#define NES_MGT_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_32) | (NES_NIC_CTX_SQ_SIZE_32)) +#define NES_MGT_QP_OFFSET 36 +#define NES_MGT_QP_COUNT 4 + struct nes_hw_tune_timer { /* u16 cq_count; */ u16 threshold_low; @@ -1118,6 +1142,7 @@ struct nes_adapter { u32 et_rate_sample_interval; u32 timer_int_limit; u32 wqm_quanta; + u8 allow_unaligned_fpdus; /* Adapter base MAC address */ u32 mac_addr_low; @@ -1251,6 +1276,14 @@ struct nes_vnic { enum ib_event_type delayed_event; enum ib_event_type last_dispatched_event; spinlock_t port_ibevent_lock; + u32 mgt_mem_size; + void *mgt_vbase; + dma_addr_t mgt_pbase; + struct nes_vnic_mgt *mgtvnic[NES_MGT_QP_COUNT]; + struct task_struct *mgt_thread; + wait_queue_head_t mgt_wait_queue; + struct sk_buff_head mgt_skb_list; + }; struct nes_ib_device { diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c new file mode 100644 index 00000000000..b3b2a240c6e --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -0,0 +1,1162 @@ +/* + * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/kthread.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <net/tcp.h> +#include "nes.h" +#include "nes_mgt.h" + +atomic_t pau_qps_created; +atomic_t pau_qps_destroyed; + +static void nes_replenish_mgt_rq(struct nes_vnic_mgt *mgtvnic) +{ + unsigned long flags; + dma_addr_t bus_address; + struct sk_buff *skb; + struct nes_hw_nic_rq_wqe *nic_rqe; + struct nes_hw_mgt *nesmgt; + struct nes_device *nesdev; + struct nes_rskb_cb *cb; + u32 rx_wqes_posted = 0; + + nesmgt = &mgtvnic->mgt; + nesdev = mgtvnic->nesvnic->nesdev; + spin_lock_irqsave(&nesmgt->rq_lock, flags); + if (nesmgt->replenishing_rq != 0) { + if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) && + (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) { + atomic_set(&mgtvnic->rx_skb_timer_running, 1); + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */ + add_timer(&mgtvnic->rq_wqes_timer); + } else { + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + } + return; + } + nesmgt->replenishing_rq = 1; + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + do { + skb = dev_alloc_skb(mgtvnic->nesvnic->max_frame_size); + if (skb) { + skb->dev = mgtvnic->nesvnic->netdev; + + bus_address = pci_map_single(nesdev->pcidev, + skb->data, mgtvnic->nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->busaddr = bus_address; + cb->maplen = mgtvnic->nesvnic->max_frame_size; + + nic_rqe = &nesmgt->rq_vbase[mgtvnic->mgt.rq_head]; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = + cpu_to_le32(mgtvnic->nesvnic->max_frame_size); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = + cpu_to_le32((u32)bus_address); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = + cpu_to_le32((u32)((u64)bus_address >> 32)); + nesmgt->rx_skb[nesmgt->rq_head] = skb; + nesmgt->rq_head++; + nesmgt->rq_head &= nesmgt->rq_size - 1; + atomic_dec(&mgtvnic->rx_skbs_needed); + barrier(); + if (++rx_wqes_posted == 255) { + nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id); + rx_wqes_posted = 0; + } + } else { + spin_lock_irqsave(&nesmgt->rq_lock, flags); + if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) && + (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) { + atomic_set(&mgtvnic->rx_skb_timer_running, 1); + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */ + add_timer(&mgtvnic->rq_wqes_timer); + } else { + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + } + break; + } + } while (atomic_read(&mgtvnic->rx_skbs_needed)); + barrier(); + if (rx_wqes_posted) + nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id); + nesmgt->replenishing_rq = 0; +} + +/** + * nes_mgt_rq_wqes_timeout + */ +static void nes_mgt_rq_wqes_timeout(unsigned long parm) +{ + struct nes_vnic_mgt *mgtvnic = (struct nes_vnic_mgt *)parm; + + atomic_set(&mgtvnic->rx_skb_timer_running, 0); + if (atomic_read(&mgtvnic->rx_skbs_needed)) + nes_replenish_mgt_rq(mgtvnic); +} + +/** + * nes_mgt_free_skb - unmap and free skb + */ +static void nes_mgt_free_skb(struct nes_device *nesdev, struct sk_buff *skb, u32 dir) +{ + struct nes_rskb_cb *cb; + + cb = (struct nes_rskb_cb *)&skb->cb[0]; + pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, dir); + cb->busaddr = 0; + dev_kfree_skb_any(skb); +} + +/** + * nes_download_callback - handle download completions + */ +static void nes_download_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request) +{ + struct pau_fpdu_info *fpdu_info = cqp_request->cqp_callback_pointer; + struct nes_qp *nesqp = fpdu_info->nesqp; + struct sk_buff *skb; + int i; + + for (i = 0; i < fpdu_info->frag_cnt; i++) { + skb = fpdu_info->frags[i].skb; + if (fpdu_info->frags[i].cmplt) { + nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE); + nes_rem_ref_cm_node(nesqp->cm_node); + } + } + + if (fpdu_info->hdr_vbase) + pci_free_consistent(nesdev->pcidev, fpdu_info->hdr_len, + fpdu_info->hdr_vbase, fpdu_info->hdr_pbase); + kfree(fpdu_info); +} + +/** + * nes_get_seq - Get the seq, ack_seq and window from the packet + */ +static u32 nes_get_seq(struct sk_buff *skb, u32 *ack, u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd) +{ + struct nes_rskb_cb *cb = (struct nes_rskb_cb *)&skb->cb[0]; + struct iphdr *iph = (struct iphdr *)(cb->data_start + ETH_HLEN); + struct tcphdr *tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + + *ack = be32_to_cpu(tcph->ack_seq); + *wnd = be16_to_cpu(tcph->window); + *fin_rcvd = tcph->fin; + *rst_rcvd = tcph->rst; + return be32_to_cpu(tcph->seq); +} + +/** + * nes_get_next_skb - Get the next skb based on where current skb is in the queue + */ +static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp *nesqp, + struct sk_buff *skb, u32 nextseq, u32 *ack, + u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd) +{ + u32 seq; + bool processacks; + struct sk_buff *old_skb; + + if (skb) { + /* Continue processing fpdu */ + if (skb->next == (struct sk_buff *)&nesqp->pau_list) + goto out; + skb = skb->next; + processacks = false; + } else { + /* Starting a new one */ + if (skb_queue_empty(&nesqp->pau_list)) + goto out; + skb = skb_peek(&nesqp->pau_list); + processacks = true; + } + + while (1) { + seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd); + if (seq == nextseq) { + if (skb->len || processacks) + break; + } else if (after(seq, nextseq)) { + goto out; + } + + if (skb->next == (struct sk_buff *)&nesqp->pau_list) + goto out; + + old_skb = skb; + skb = skb->next; + skb_unlink(old_skb, &nesqp->pau_list); + nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE); + nes_rem_ref_cm_node(nesqp->cm_node); + } + return skb; + +out: + return NULL; +} + +/** + * get_fpdu_info - Find the next complete fpdu and return its fragments. + */ +static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, + struct pau_fpdu_info **pau_fpdu_info) +{ + struct sk_buff *skb; + struct iphdr *iph; + struct tcphdr *tcph; + struct nes_rskb_cb *cb; + struct pau_fpdu_info *fpdu_info = NULL; + struct pau_fpdu_frag frags[MAX_FPDU_FRAGS]; + unsigned long flags; + u32 fpdu_len = 0; + u32 tmp_len; + int frag_cnt = 0; + u32 tot_len; + u32 frag_tot; + u32 ack; + u32 fin_rcvd; + u32 rst_rcvd; + u16 wnd; + int i; + int rc = 0; + + *pau_fpdu_info = NULL; + + spin_lock_irqsave(&nesqp->pau_lock, flags); + skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd); + if (!skb) { + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + goto out; + } + cb = (struct nes_rskb_cb *)&skb->cb[0]; + if (skb->len) { + fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING; + fpdu_len = (fpdu_len + 3) & 0xfffffffc; + tmp_len = fpdu_len; + + /* See if we have all of the fpdu */ + frag_tot = 0; + memset(&frags, 0, sizeof frags); + for (i = 0; i < MAX_FPDU_FRAGS; i++) { + frags[i].physaddr = cb->busaddr; + frags[i].physaddr += skb->data - cb->data_start; + frags[i].frag_len = min(tmp_len, skb->len); + frags[i].skb = skb; + frags[i].cmplt = (skb->len == frags[i].frag_len); + frag_tot += frags[i].frag_len; + frag_cnt++; + + tmp_len -= frags[i].frag_len; + if (tmp_len == 0) + break; + + skb = nes_get_next_skb(nesdev, nesqp, skb, + nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd); + if (!skb) { + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + goto out; + } else if (rst_rcvd) { + /* rst received in the middle of fpdu */ + for (; i >= 0; i--) { + skb_unlink(frags[i].skb, &nesqp->pau_list); + nes_mgt_free_skb(nesdev, frags[i].skb, PCI_DMA_TODEVICE); + } + cb = (struct nes_rskb_cb *)&skb->cb[0]; + frags[0].physaddr = cb->busaddr; + frags[0].physaddr += skb->data - cb->data_start; + frags[0].frag_len = skb->len; + frags[0].skb = skb; + frags[0].cmplt = true; + frag_cnt = 1; + break; + } + + cb = (struct nes_rskb_cb *)&skb->cb[0]; + } + } else { + /* no data */ + frags[0].physaddr = cb->busaddr; + frags[0].frag_len = 0; + frags[0].skb = skb; + frags[0].cmplt = true; + frag_cnt = 1; + } + + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + + /* Found one */ + fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC); + if (fpdu_info == NULL) { + nes_debug(NES_DBG_PAU, "Failed to alloc a fpdu_info.\n"); + rc = -ENOMEM; + goto out; + } + + fpdu_info->cqp_request = nes_get_cqp_request(nesdev); + if (fpdu_info->cqp_request == NULL) { + nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); + rc = -ENOMEM; + goto out; + } + + cb = (struct nes_rskb_cb *)&frags[0].skb->cb[0]; + iph = (struct iphdr *)(cb->data_start + ETH_HLEN); + tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + fpdu_info->hdr_len = (((unsigned char *)tcph) + 4 * (tcph->doff)) - cb->data_start; + fpdu_info->data_len = fpdu_len; + tot_len = fpdu_info->hdr_len + fpdu_len - ETH_HLEN; + + if (frags[0].cmplt) { + fpdu_info->hdr_pbase = cb->busaddr; + fpdu_info->hdr_vbase = NULL; + } else { + fpdu_info->hdr_vbase = pci_alloc_consistent(nesdev->pcidev, + fpdu_info->hdr_len, &fpdu_info->hdr_pbase); + if (!fpdu_info->hdr_vbase) { + nes_debug(NES_DBG_PAU, "Unable to allocate memory for pau first frag\n"); + rc = -ENOMEM; + goto out; + } + + /* Copy hdrs, adjusting len and seqnum */ + memcpy(fpdu_info->hdr_vbase, cb->data_start, fpdu_info->hdr_len); + iph = (struct iphdr *)(fpdu_info->hdr_vbase + ETH_HLEN); + tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + } + + iph->tot_len = cpu_to_be16(tot_len); + iph->saddr = cpu_to_be32(0x7f000001); + + tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt); + tcph->ack_seq = cpu_to_be32(ack); + tcph->window = cpu_to_be16(wnd); + + nesqp->pau_rcv_nxt += fpdu_len + fin_rcvd; + + memcpy(fpdu_info->frags, frags, sizeof(fpdu_info->frags)); + fpdu_info->frag_cnt = frag_cnt; + fpdu_info->nesqp = nesqp; + *pau_fpdu_info = fpdu_info; + + /* Update skb's for next pass */ + for (i = 0; i < frag_cnt; i++) { + cb = (struct nes_rskb_cb *)&frags[i].skb->cb[0]; + skb_pull(frags[i].skb, frags[i].frag_len); + + if (frags[i].skb->len == 0) { + /* Pull skb off the list - it will be freed in the callback */ + spin_lock_irqsave(&nesqp->pau_lock, flags); + skb_unlink(frags[i].skb, &nesqp->pau_list); + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + } else { + /* Last skb still has data so update the seq */ + iph = (struct iphdr *)(cb->data_start + ETH_HLEN); + tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt); + } + } + +out: + if (rc) { + if (fpdu_info) { + if (fpdu_info->cqp_request) + nes_put_cqp_request(nesdev, fpdu_info->cqp_request); + kfree(fpdu_info); + } + } + return rc; +} + +/** + * forward_fpdu - send complete fpdus, one at a time + */ +static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + struct nes_device *nesdev = nesvnic->nesdev; + struct pau_fpdu_info *fpdu_info; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + u64 u64tmp; + u32 u32tmp; + int rc; + + while (1) { + rc = get_fpdu_info(nesdev, nesqp, &fpdu_info); + if (fpdu_info == NULL) + return rc; + + cqp_request = fpdu_info->cqp_request; + cqp_wqe = &cqp_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_OPCODE_IDX, + NES_CQP_DOWNLOAD_SEGMENT | + (((u32)nesvnic->logical_port) << NES_CQP_OP_LOGICAL_PORT_SHIFT)); + + u32tmp = fpdu_info->hdr_len << 16; + u32tmp |= fpdu_info->hdr_len + (u32)fpdu_info->data_len; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX, + u32tmp); + + u32tmp = (fpdu_info->frags[1].frag_len << 16) | fpdu_info->frags[0].frag_len; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_2_1_IDX, + u32tmp); + + u32tmp = (fpdu_info->frags[3].frag_len << 16) | fpdu_info->frags[2].frag_len; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_4_3_IDX, + u32tmp); + + u64tmp = (u64)fpdu_info->hdr_pbase; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX, + lower_32_bits(u64tmp)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX, + upper_32_bits(u64tmp >> 32)); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, + lower_32_bits(fpdu_info->frags[0].physaddr)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_HIGH_IDX, + upper_32_bits(fpdu_info->frags[0].physaddr)); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_LOW_IDX, + lower_32_bits(fpdu_info->frags[1].physaddr)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_HIGH_IDX, + upper_32_bits(fpdu_info->frags[1].physaddr)); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_LOW_IDX, + lower_32_bits(fpdu_info->frags[2].physaddr)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_HIGH_IDX, + upper_32_bits(fpdu_info->frags[2].physaddr)); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_LOW_IDX, + lower_32_bits(fpdu_info->frags[3].physaddr)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_HIGH_IDX, + upper_32_bits(fpdu_info->frags[3].physaddr)); + + cqp_request->cqp_callback_pointer = fpdu_info; + cqp_request->callback = 1; + cqp_request->cqp_callback = nes_download_callback; + + atomic_set(&cqp_request->refcount, 1); + nes_post_cqp_request(nesdev, cqp_request); + } + + return 0; +} + +static void process_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + int again = 1; + unsigned long flags; + + do { + /* Ignore rc - if it failed, tcp retries will cause it to try again */ + forward_fpdus(nesvnic, nesqp); + + spin_lock_irqsave(&nesqp->pau_lock, flags); + if (nesqp->pau_pending) { + nesqp->pau_pending = 0; + } else { + nesqp->pau_busy = 0; + again = 0; + } + + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + } while (again); +} + +/** + * queue_fpdus - Handle fpdu's that hw passed up to sw + */ +static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + struct sk_buff *tmpskb; + struct nes_rskb_cb *cb; + struct iphdr *iph; + struct tcphdr *tcph; + unsigned char *tcph_end; + u32 rcv_nxt; + u32 rcv_wnd; + u32 seqnum; + u32 len; + bool process_it = false; + unsigned long flags; + + /* Move data ptr to after tcp header */ + iph = (struct iphdr *)skb->data; + tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + seqnum = be32_to_cpu(tcph->seq); + tcph_end = (((char *)tcph) + (4 * tcph->doff)); + + len = be16_to_cpu(iph->tot_len); + if (skb->len > len) + skb_trim(skb, len); + skb_pull(skb, tcph_end - skb->data); + + /* Initialize tracking values */ + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->seqnum = seqnum; + + /* Make sure data is in the receive window */ + rcv_nxt = nesqp->pau_rcv_nxt; + rcv_wnd = le32_to_cpu(nesqp->nesqp_context->rcv_wnd); + if (!between(seqnum, rcv_nxt, (rcv_nxt + rcv_wnd))) { + nes_mgt_free_skb(nesvnic->nesdev, skb, PCI_DMA_TODEVICE); + nes_rem_ref_cm_node(nesqp->cm_node); + return; + } + + spin_lock_irqsave(&nesqp->pau_lock, flags); + + if (nesqp->pau_busy) + nesqp->pau_pending = 1; + else + nesqp->pau_busy = 1; + + /* Queue skb by sequence number */ + if (skb_queue_len(&nesqp->pau_list) == 0) { + skb_queue_head(&nesqp->pau_list, skb); + } else { + tmpskb = nesqp->pau_list.next; + while (tmpskb != (struct sk_buff *)&nesqp->pau_list) { + cb = (struct nes_rskb_cb *)&tmpskb->cb[0]; + if (before(seqnum, cb->seqnum)) + break; + tmpskb = tmpskb->next; + } + skb_insert(tmpskb, skb, &nesqp->pau_list); + } + if (nesqp->pau_state == PAU_READY) + process_it = true; + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + + if (process_it) + process_fpdus(nesvnic, nesqp); + + return; +} + +/** + * mgt_thread - Handle mgt skbs in a safe context + */ +static int mgt_thread(void *context) +{ + struct nes_vnic *nesvnic = context; + struct sk_buff *skb; + struct nes_rskb_cb *cb; + + while (!kthread_should_stop()) { + wait_event_interruptible(nesvnic->mgt_wait_queue, + skb_queue_len(&nesvnic->mgt_skb_list) || kthread_should_stop()); + while ((skb_queue_len(&nesvnic->mgt_skb_list)) && !kthread_should_stop()) { + skb = skb_dequeue(&nesvnic->mgt_skb_list); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->data_start = skb->data - ETH_HLEN; + cb->busaddr = pci_map_single(nesvnic->nesdev->pcidev, cb->data_start, + nesvnic->max_frame_size, PCI_DMA_TODEVICE); + queue_fpdus(skb, nesvnic, cb->nesqp); + } + } + + /* Closing down so delete any entries on the queue */ + while (skb_queue_len(&nesvnic->mgt_skb_list)) { + skb = skb_dequeue(&nesvnic->mgt_skb_list); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + nes_rem_ref_cm_node(cb->nesqp->cm_node); + dev_kfree_skb_any(skb); + } + return 0; +} + +/** + * nes_queue_skbs - Queue skb so it can be handled in a thread context + */ +void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + struct nes_rskb_cb *cb; + + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->nesqp = nesqp; + skb_queue_tail(&nesvnic->mgt_skb_list, skb); + wake_up_interruptible(&nesvnic->mgt_wait_queue); +} + +void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp) +{ + struct sk_buff *skb; + unsigned long flags; + atomic_inc(&pau_qps_destroyed); + + /* Free packets that have not yet been forwarded */ + /* Lock is acquired by skb_dequeue when removing the skb */ + spin_lock_irqsave(&nesqp->pau_lock, flags); + while (skb_queue_len(&nesqp->pau_list)) { + skb = skb_dequeue(&nesqp->pau_list); + nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE); + nes_rem_ref_cm_node(nesqp->cm_node); + } + spin_unlock_irqrestore(&nesqp->pau_lock, flags); +} + +static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request *cqp_request) +{ + struct pau_qh_chg *qh_chg = cqp_request->cqp_callback_pointer; + struct nes_cqp_request *new_request; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_adapter *nesadapter; + struct nes_qp *nesqp; + struct nes_v4_quad nes_quad; + u32 crc_value; + u64 u64temp; + + nesadapter = nesdev->nesadapter; + nesqp = qh_chg->nesqp; + + /* Should we handle the bad completion */ + if (cqp_request->major_code) { + printk(KERN_ERR PFX "Invalid cqp_request major_code=0x%x\n", + cqp_request->major_code); + WARN_ON(1); + } + + switch (nesqp->pau_state) { + case PAU_DEL_QH: + /* Old hash code deleted, now set the new one */ + nesqp->pau_state = PAU_ADD_LB_QH; + new_request = nes_get_cqp_request(nesdev); + if (new_request == NULL) { + nes_debug(NES_DBG_PAU, "Failed to get a new_request.\n"); + WARN_ON(1); + return; + } + + memset(&nes_quad, 0, sizeof(nes_quad)); + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + nes_quad.SrcIpadr = cpu_to_be32(0x7f000001); + nes_quad.TcpPorts[0] = swab16(nesqp->nesqp_context->tcpPorts[1]); + nes_quad.TcpPorts[1] = swab16(nesqp->nesqp_context->tcpPorts[0]); + + /* Produce hash key */ + crc_value = get_crc_value(&nes_quad); + nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); + nes_debug(NES_DBG_PAU, "new HTE Index = 0x%08X, CRC = 0x%08X\n", + nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); + + nesqp->hte_index &= nesadapter->hte_index_mask; + nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); + nesqp->nesqp_context->ip0 = cpu_to_le32(0x7f000001); + nesqp->nesqp_context->rcv_nxt = cpu_to_le32(nesqp->pau_rcv_nxt); + + cqp_wqe = &new_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, + NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | + NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); + u64temp = (u64)nesqp->nesqp_context_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + nes_debug(NES_DBG_PAU, "Waiting for CQP completion for adding the quad hash.\n"); + + new_request->cqp_callback_pointer = qh_chg; + new_request->callback = 1; + new_request->cqp_callback = nes_chg_qh_handler; + atomic_set(&new_request->refcount, 1); + nes_post_cqp_request(nesdev, new_request); + break; + + case PAU_ADD_LB_QH: + /* Start processing the queued fpdu's */ + nesqp->pau_state = PAU_READY; + process_fpdus(qh_chg->nesvnic, qh_chg->nesqp); + kfree(qh_chg); + break; + } +} + +/** + * nes_change_quad_hash + */ +static int nes_change_quad_hash(struct nes_device *nesdev, + struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + struct nes_cqp_request *cqp_request = NULL; + struct pau_qh_chg *qh_chg = NULL; + u64 u64temp; + struct nes_hw_cqp_wqe *cqp_wqe; + int ret = 0; + + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); + ret = -ENOMEM; + goto chg_qh_err; + } + + qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC); + if (qh_chg == NULL) { + nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); + ret = -ENOMEM; + goto chg_qh_err; + } + qh_chg->nesdev = nesdev; + qh_chg->nesvnic = nesvnic; + qh_chg->nesqp = nesqp; + nesqp->pau_state = PAU_DEL_QH; + + cqp_wqe = &cqp_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, + NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | NES_CQP_QP_DEL_HTE | + NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); + u64temp = (u64)nesqp->nesqp_context_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + nes_debug(NES_DBG_PAU, "Waiting for CQP completion for deleting the quad hash.\n"); + + cqp_request->cqp_callback_pointer = qh_chg; + cqp_request->callback = 1; + cqp_request->cqp_callback = nes_chg_qh_handler; + atomic_set(&cqp_request->refcount, 1); + nes_post_cqp_request(nesdev, cqp_request); + + return ret; + +chg_qh_err: + kfree(qh_chg); + if (cqp_request) + nes_put_cqp_request(nesdev, cqp_request); + return ret; +} + +/** + * nes_mgt_ce_handler + * This management code deals with any packed and unaligned (pau) fpdu's + * that the hardware cannot handle. + */ +static void nes_mgt_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) +{ + struct nes_vnic_mgt *mgtvnic = container_of(cq, struct nes_vnic_mgt, mgt_cq); + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 head; + u32 cq_size; + u32 cqe_count = 0; + u32 cqe_misc; + u32 qp_id = 0; + u32 skbs_needed; + unsigned long context; + struct nes_qp *nesqp; + struct sk_buff *rx_skb; + struct nes_rskb_cb *cb; + + head = cq->cq_head; + cq_size = cq->cq_size; + + while (1) { + cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]); + if (!(cqe_misc & NES_NIC_CQE_VALID)) + break; + + nesqp = NULL; + if (cqe_misc & NES_NIC_CQE_ACCQP_VALID) { + qp_id = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_ACCQP_ID_IDX]); + qp_id &= 0x001fffff; + if (qp_id < nesadapter->max_qp) { + context = (unsigned long)nesadapter->qp_table[qp_id - NES_FIRST_QPN]; + nesqp = (struct nes_qp *)context; + } + } + + if (nesqp) { + if (nesqp->pau_mode == false) { + nesqp->pau_mode = true; /* First time for this qp */ + nesqp->pau_rcv_nxt = le32_to_cpu( + cq->cq_vbase[head].cqe_words[NES_NIC_CQE_HASH_RCVNXT]); + skb_queue_head_init(&nesqp->pau_list); + spin_lock_init(&nesqp->pau_lock); + atomic_inc(&pau_qps_created); + nes_change_quad_hash(nesdev, mgtvnic->nesvnic, nesqp); + } + + rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail]; + rx_skb->len = 0; + skb_put(rx_skb, cqe_misc & 0x0000ffff); + rx_skb->protocol = eth_type_trans(rx_skb, mgtvnic->nesvnic->netdev); + cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; + pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, PCI_DMA_FROMDEVICE); + cb->busaddr = 0; + mgtvnic->mgt.rq_tail++; + mgtvnic->mgt.rq_tail &= mgtvnic->mgt.rq_size - 1; + + nes_add_ref_cm_node(nesqp->cm_node); + nes_queue_mgt_skbs(rx_skb, mgtvnic->nesvnic, nesqp); + } else { + printk(KERN_ERR PFX "Invalid QP %d for packed/unaligned handling\n", qp_id); + } + + cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0; + cqe_count++; + if (++head >= cq_size) + head = 0; + + if (cqe_count == 255) { + /* Replenish mgt CQ */ + nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16)); + nesdev->currcq_count += cqe_count; + cqe_count = 0; + } + + skbs_needed = atomic_inc_return(&mgtvnic->rx_skbs_needed); + if (skbs_needed > (mgtvnic->mgt.rq_size >> 1)) + nes_replenish_mgt_rq(mgtvnic); + } + + cq->cq_head = head; + nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + cq->cq_number | (cqe_count << 16)); + nes_read32(nesdev->regs + NES_CQE_ALLOC); + nesdev->currcq_count += cqe_count; +} + +/** + * nes_init_mgt_qp + */ +int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic) +{ + struct nes_vnic_mgt *mgtvnic; + u32 counter; + void *vmem; + dma_addr_t pmem; + struct nes_hw_cqp_wqe *cqp_wqe; + u32 cqp_head; + unsigned long flags; + struct nes_hw_nic_qp_context *mgt_context; + u64 u64temp; + struct nes_hw_nic_rq_wqe *mgt_rqe; + struct sk_buff *skb; + u32 wqe_count; + struct nes_rskb_cb *cb; + u32 mgt_mem_size; + void *mgt_vbase; + dma_addr_t mgt_pbase; + int i; + int ret; + + /* Allocate space the all mgt QPs once */ + mgtvnic = kzalloc(NES_MGT_QP_COUNT * sizeof(struct nes_vnic_mgt), GFP_KERNEL); + if (mgtvnic == NULL) { + nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt structure\n"); + return -ENOMEM; + } + + /* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */ + /* We are not sending from this NIC so sq is not allocated */ + mgt_mem_size = 256 + + (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)) + + (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_cqe)) + + sizeof(struct nes_hw_nic_qp_context); + mgt_mem_size = (mgt_mem_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + mgt_vbase = pci_alloc_consistent(nesdev->pcidev, NES_MGT_QP_COUNT * mgt_mem_size, &mgt_pbase); + if (!mgt_vbase) { + kfree(mgtvnic); + nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt host descriptor rings\n"); + return -ENOMEM; + } + + nesvnic->mgt_mem_size = NES_MGT_QP_COUNT * mgt_mem_size; + nesvnic->mgt_vbase = mgt_vbase; + nesvnic->mgt_pbase = mgt_pbase; + + skb_queue_head_init(&nesvnic->mgt_skb_list); + init_waitqueue_head(&nesvnic->mgt_wait_queue); + nesvnic->mgt_thread = kthread_run(mgt_thread, nesvnic, "nes_mgt_thread"); + + for (i = 0; i < NES_MGT_QP_COUNT; i++) { + mgtvnic->nesvnic = nesvnic; + mgtvnic->mgt.qp_id = nesdev->mac_index + NES_MGT_QP_OFFSET + i; + memset(mgt_vbase, 0, mgt_mem_size); + nes_debug(NES_DBG_INIT, "Allocated mgt QP structures at %p (phys = %016lX), size = %u.\n", + mgt_vbase, (unsigned long)mgt_pbase, mgt_mem_size); + + vmem = (void *)(((unsigned long)mgt_vbase + (256 - 1)) & + ~(unsigned long)(256 - 1)); + pmem = (dma_addr_t)(((unsigned long long)mgt_pbase + (256 - 1)) & + ~(unsigned long long)(256 - 1)); + + spin_lock_init(&mgtvnic->mgt.rq_lock); + + /* setup the RQ */ + mgtvnic->mgt.rq_vbase = vmem; + mgtvnic->mgt.rq_pbase = pmem; + mgtvnic->mgt.rq_head = 0; + mgtvnic->mgt.rq_tail = 0; + mgtvnic->mgt.rq_size = NES_MGT_WQ_COUNT; + + /* setup the CQ */ + vmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)); + pmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)); + + mgtvnic->mgt_cq.cq_number = mgtvnic->mgt.qp_id; + mgtvnic->mgt_cq.cq_vbase = vmem; + mgtvnic->mgt_cq.cq_pbase = pmem; + mgtvnic->mgt_cq.cq_head = 0; + mgtvnic->mgt_cq.cq_size = NES_MGT_WQ_COUNT; + + mgtvnic->mgt_cq.ce_handler = nes_mgt_ce_handler; + + /* Send CreateCQ request to CQP */ + spin_lock_irqsave(&nesdev->cqp.lock, flags); + cqp_head = nesdev->cqp.sq_head; + + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( + NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | + ((u32)mgtvnic->mgt_cq.cq_size << 16)); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32( + mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16)); + u64temp = (u64)mgtvnic->mgt_cq.cq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; + u64temp = (unsigned long)&mgtvnic->mgt_cq; + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1)); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = + cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + /* Send CreateQP request to CQP */ + mgt_context = (void *)(&mgtvnic->mgt_cq.cq_vbase[mgtvnic->mgt_cq.cq_size]); + mgt_context->context_words[NES_NIC_CTX_MISC_IDX] = + cpu_to_le32((u32)NES_MGT_CTX_SIZE | + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12)); + nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n", + nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE), + nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE)); + if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0) + mgt_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE); + + u64temp = (u64)mgtvnic->mgt.rq_pbase; + mgt_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + mgt_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); + u64temp = (u64)mgtvnic->mgt.rq_pbase; + mgt_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + mgt_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP | + NES_CQP_QP_TYPE_NIC); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(mgtvnic->mgt.qp_id); + u64temp = (u64)mgtvnic->mgt_cq.cq_pbase + + (mgtvnic->mgt_cq.cq_size * sizeof(struct nes_hw_nic_cqe)); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + nesdev->cqp.sq_head = cqp_head; + + barrier(); + + /* Ring doorbell (2 WQEs) */ + nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + nes_debug(NES_DBG_INIT, "Waiting for create MGT QP%u to complete.\n", + mgtvnic->mgt.qp_id); + + ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_INIT, "Create MGT QP%u completed, wait_event_timeout ret = %u.\n", + mgtvnic->mgt.qp_id, ret); + if (!ret) { + nes_debug(NES_DBG_INIT, "MGT QP%u create timeout expired\n", mgtvnic->mgt.qp_id); + if (i == 0) { + pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase, + nesvnic->mgt_pbase); + kfree(mgtvnic); + } else { + nes_destroy_mgt(nesvnic); + } + return -EIO; + } + + /* Populate the RQ */ + for (counter = 0; counter < (NES_MGT_WQ_COUNT - 1); counter++) { + skb = dev_alloc_skb(nesvnic->max_frame_size); + if (!skb) { + nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name); + return -ENOMEM; + } + + skb->dev = netdev; + + pmem = pci_map_single(nesdev->pcidev, skb->data, + nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->busaddr = pmem; + cb->maplen = nesvnic->max_frame_size; + + mgt_rqe = &mgtvnic->mgt.rq_vbase[counter]; + mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32((u32)nesvnic->max_frame_size); + mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; + mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem); + mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32)); + mgtvnic->mgt.rx_skb[counter] = skb; + } + + init_timer(&mgtvnic->rq_wqes_timer); + mgtvnic->rq_wqes_timer.function = nes_mgt_rq_wqes_timeout; + mgtvnic->rq_wqes_timer.data = (unsigned long)mgtvnic; + + wqe_count = NES_MGT_WQ_COUNT - 1; + mgtvnic->mgt.rq_head = wqe_count; + barrier(); + do { + counter = min(wqe_count, ((u32)255)); + wqe_count -= counter; + nes_write32(nesdev->regs + NES_WQE_ALLOC, (counter << 24) | mgtvnic->mgt.qp_id); + } while (wqe_count); + + nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + mgtvnic->mgt_cq.cq_number); + nes_read32(nesdev->regs + NES_CQE_ALLOC); + + mgt_vbase += mgt_mem_size; + mgt_pbase += mgt_mem_size; + nesvnic->mgtvnic[i] = mgtvnic++; + } + return 0; +} + + +void nes_destroy_mgt(struct nes_vnic *nesvnic) +{ + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_vnic_mgt *mgtvnic; + struct nes_vnic_mgt *first_mgtvnic; + unsigned long flags; + struct nes_hw_cqp_wqe *cqp_wqe; + u32 cqp_head; + struct sk_buff *rx_skb; + int i; + int ret; + + kthread_stop(nesvnic->mgt_thread); + + /* Free remaining NIC receive buffers */ + first_mgtvnic = nesvnic->mgtvnic[0]; + for (i = 0; i < NES_MGT_QP_COUNT; i++) { + mgtvnic = nesvnic->mgtvnic[i]; + if (mgtvnic == NULL) + continue; + + while (mgtvnic->mgt.rq_head != mgtvnic->mgt.rq_tail) { + rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail]; + nes_mgt_free_skb(nesdev, rx_skb, PCI_DMA_FROMDEVICE); + mgtvnic->mgt.rq_tail++; + mgtvnic->mgt.rq_tail &= (mgtvnic->mgt.rq_size - 1); + } + + spin_lock_irqsave(&nesdev->cqp.lock, flags); + + /* Destroy NIC QP */ + cqp_head = nesdev->cqp.sq_head; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + mgtvnic->mgt.qp_id); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + + /* Destroy NIC CQ */ + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_DESTROY_CQ | ((u32)mgtvnic->mgt_cq.cq_size << 16))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + (mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16))); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + + nesdev->cqp.sq_head = cqp_head; + barrier(); + + /* Ring doorbell (2 WQEs) */ + nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u," + " cqp.sq_tail=%u, cqp.sq_size=%u\n", + cqp_head, nesdev->cqp.sq_head, + nesdev->cqp.sq_tail, nesdev->cqp.sq_size); + + ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), + NES_EVENT_TIMEOUT); + + nes_debug(NES_DBG_SHUTDOWN, "Destroy MGT QP returned, wait_event_timeout ret = %u, cqp_head=%u," + " cqp.sq_head=%u, cqp.sq_tail=%u\n", + ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail); + if (!ret) + nes_debug(NES_DBG_SHUTDOWN, "MGT QP%u destroy timeout expired\n", + mgtvnic->mgt.qp_id); + + nesvnic->mgtvnic[i] = NULL; + } + + if (nesvnic->mgt_vbase) { + pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase, + nesvnic->mgt_pbase); + nesvnic->mgt_vbase = NULL; + nesvnic->mgt_pbase = 0; + } + + kfree(first_mgtvnic); +} diff --git a/drivers/infiniband/hw/nes/nes_mgt.h b/drivers/infiniband/hw/nes/nes_mgt.h new file mode 100644 index 00000000000..8c8af254555 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_mgt.h @@ -0,0 +1,97 @@ +/* +* Copyright (c) 2010 Intel-NE, Inc. All rights reserved. +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#ifndef __NES_MGT_H +#define __NES_MGT_H + +#define MPA_FRAMING 6 /* length is 2 bytes, crc is 4 bytes */ + +int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic); +void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp); +void nes_destroy_mgt(struct nes_vnic *nesvnic); +void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp); + +struct nes_hw_mgt { + struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */ + dma_addr_t rq_pbase; /* PCI memory for host rings */ + struct sk_buff *rx_skb[NES_NIC_WQ_SIZE]; + u16 qp_id; + u16 sq_head; + u16 rq_head; + u16 rq_tail; + u16 rq_size; + u8 replenishing_rq; + u8 reserved; + spinlock_t rq_lock; +}; + +struct nes_vnic_mgt { + struct nes_vnic *nesvnic; + struct nes_hw_mgt mgt; + struct nes_hw_nic_cq mgt_cq; + atomic_t rx_skbs_needed; + struct timer_list rq_wqes_timer; + atomic_t rx_skb_timer_running; +}; + +#define MAX_FPDU_FRAGS 4 +struct pau_fpdu_frag { + struct sk_buff *skb; + u64 physaddr; + u32 frag_len; + bool cmplt; +}; + +struct pau_fpdu_info { + struct nes_qp *nesqp; + struct nes_cqp_request *cqp_request; + void *hdr_vbase; + dma_addr_t hdr_pbase; + int hdr_len; + u16 data_len; + u16 frag_cnt; + struct pau_fpdu_frag frags[MAX_FPDU_FRAGS]; +}; + +enum pau_qh_state { + PAU_DEL_QH, + PAU_ADD_LB_QH, + PAU_READY +}; + +struct pau_qh_chg { + struct nes_device *nesdev; + struct nes_vnic *nesvnic; + struct nes_qp *nesqp; +}; + +#endif /* __NES_MGT_H */ diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 9d7ffebff21..64f91d84036 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1090,6 +1090,8 @@ static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = { "LRO aggregated", "LRO flushed", "LRO no_desc", + "PAU CreateQPs", + "PAU DestroyQPs", }; #define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset) @@ -1305,6 +1307,8 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated; target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed; target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc; + target_stat_values[++index] = atomic_read(&pau_qps_created); + target_stat_values[++index] = atomic_read(&pau_qps_destroyed); } /** diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index f9c417c6b3b..cd10968bfa2 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -51,13 +51,34 @@ #include "nes.h" - - static u16 nes_read16_eeprom(void __iomem *addr, u16 offset); u32 mh_detected; u32 mh_pauses_sent; +u32 nes_set_pau(struct nes_device *nesdev) +{ + u32 ret = 0; + u32 counter; + + nes_write_indexed(nesdev, NES_IDX_GPR2, NES_ENABLE_PAU); + nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1); + + for (counter = 0; counter < NES_PAU_COUNTER; counter++) { + udelay(30); + if (!nes_read_indexed(nesdev, NES_IDX_GPR2)) { + printk(KERN_INFO PFX "PAU is supported.\n"); + break; + } + nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1); + } + if (counter == NES_PAU_COUNTER) { + printk(KERN_INFO PFX "PAU is not supported.\n"); + return -EPERM; + } + return ret; +} + /** * nes_read_eeprom_values - */ @@ -187,6 +208,11 @@ int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesada if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3)) nesadapter->send_term_ok = 1; + if (nes_drv_opt & NES_DRV_OPT_ENABLE_PAU) { + if (!nes_set_pau(nesdev)) + nesadapter->allow_unaligned_fpdus = 1; + } + nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) + (u32)((u8)eeprom_data); @@ -594,6 +620,7 @@ void nes_put_cqp_request(struct nes_device *nesdev, nes_free_cqp_request(nesdev, cqp_request); } + /** * nes_post_cqp_request */ @@ -604,6 +631,8 @@ void nes_post_cqp_request(struct nes_device *nesdev, unsigned long flags; u32 cqp_head; u64 u64temp; + u32 opcode; + int ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX; spin_lock_irqsave(&nesdev->cqp.lock, flags); @@ -614,17 +643,20 @@ void nes_post_cqp_request(struct nes_device *nesdev, nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); + opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]); + if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT) + ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX; barrier(); u64temp = (unsigned long)cqp_request; - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_SCRATCH_LOW_IDX, - u64temp); + set_wqe_64bit_value(cqp_wqe->wqe_words, ctx_index, u64temp); nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ," - " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u," - " waiting = %d, refcount = %d.\n", - le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, - le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request, - nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, - cqp_request->waiting, atomic_read(&cqp_request->refcount)); + " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u," + " waiting = %d, refcount = %d.\n", + opcode & NES_CQP_OPCODE_MASK, + le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request, + nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, + cqp_request->waiting, atomic_read(&cqp_request->refcount)); + barrier(); /* Ring doorbell (1 WQEs) */ @@ -645,7 +677,6 @@ void nes_post_cqp_request(struct nes_device *nesdev, return; } - /** * nes_arp_table */ diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 9f2f7d4b119..5095bc41c6c 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1458,7 +1458,7 @@ static int nes_destroy_qp(struct ib_qp *ibqp) struct ib_qp_attr attr; struct iw_cm_id *cm_id; struct iw_cm_event cm_event; - int ret; + int ret = 0; atomic_inc(&sw_qps_destroyed); nesqp->destroyed = 1; @@ -1511,7 +1511,6 @@ static int nes_destroy_qp(struct ib_qp *ibqp) if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq)) nes_clean_cq(nesqp, nesqp->nesrcq); } - nes_rem_ref(&nesqp->ibqp); return 0; } @@ -2338,8 +2337,10 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, skip_pages = ((u32)region->offset) >> 12; - if (ib_copy_from_udata(&req, udata, sizeof(req))) + if (ib_copy_from_udata(&req, udata, sizeof(req))) { + ib_umem_release(region); return ERR_PTR(-EFAULT); + } nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type); switch (req.reg_type) { @@ -2631,6 +2632,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &nesmr->ibmr; } + ib_umem_release(region); return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index 854316d6694..fe6b6e92fa9 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -155,6 +155,7 @@ struct nes_qp { u32 mmap_sq_db_index; u32 mmap_rq_db_index; spinlock_t lock; + spinlock_t pau_lock; struct nes_qp_context *nesqp_context; dma_addr_t nesqp_context_pbase; void *pbl_vbase; @@ -162,6 +163,8 @@ struct nes_qp { struct page *page; struct timer_list terminate_timer; enum ib_event_type terminate_eventtype; + struct sk_buff_head pau_list; + u32 pau_rcv_nxt; u16 active_conn:1; u16 skip_lsmm:1; u16 user_mode:1; @@ -169,7 +172,8 @@ struct nes_qp { u16 flush_issued:1; u16 destroyed:1; u16 sig_all:1; - u16 rsvd:9; + u16 pau_mode:1; + u16 rsvd:8; u16 private_data_len; u16 term_sq_flush_code; u16 term_rq_flush_code; @@ -177,5 +181,8 @@ struct nes_qp { u8 hw_tcp_state; u8 term_flags; u8 sq_kmapped; + u8 pau_busy; + u8 pau_pending; + u8 pau_state; }; #endif /* NES_VERBS_H */ diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index c9624ea8720..b881bdc401f 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -171,7 +171,9 @@ struct qib_ctxtdata { /* how many alloc_pages() chunks in rcvegrbuf_pages */ u32 rcvegrbuf_chunks; /* how many egrbufs per chunk */ - u32 rcvegrbufs_perchunk; + u16 rcvegrbufs_perchunk; + /* ilog2 of above */ + u16 rcvegrbufs_perchunk_shift; /* order for rcvegrbuf_pages */ size_t rcvegrbuf_size; /* rcvhdrq size (for freeing) */ @@ -221,6 +223,9 @@ struct qib_ctxtdata { /* ctxt rcvhdrq head offset */ u32 head; u32 pkt_count; + /* lookaside fields */ + struct qib_qp *lookaside_qp; + u32 lookaside_qpn; /* QPs waiting for context processing */ struct list_head qp_wait_list; }; @@ -807,6 +812,10 @@ struct qib_devdata { * supports, less gives more pio bufs/ctxt, etc. */ u32 cfgctxts; + /* + * number of ctxts available for PSM open + */ + u32 freectxts; /* * hint that we should update pioavailshadow before @@ -936,7 +945,9 @@ struct qib_devdata { /* chip address space used by 4k pio buffers */ u32 align4k; /* size of each rcvegrbuffer */ - u32 rcvegrbufsize; + u16 rcvegrbufsize; + /* log2 of above */ + u16 rcvegrbufsize_shift; /* localbus width (1, 2,4,8,16,32) from config space */ u32 lbus_width; /* localbus speed in MHz */ diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 23e584f4c36..9a9047f385a 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -279,10 +279,10 @@ bail: */ static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail) { - const u32 chunk = etail / rcd->rcvegrbufs_perchunk; - const u32 idx = etail % rcd->rcvegrbufs_perchunk; + const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift; + const u32 idx = etail & ((u32)rcd->rcvegrbufs_perchunk - 1); - return rcd->rcvegrbuf[chunk] + idx * rcd->dd->rcvegrbufsize; + return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift); } /* @@ -310,7 +310,6 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, u32 opcode; u32 psn; int diff; - unsigned long flags; /* Sanity check packet */ if (tlen < 24) @@ -365,7 +364,6 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, switch (qp->ibqp.qp_type) { case IB_QPT_RC: - spin_lock_irqsave(&qp->s_lock, flags); ruc_res = qib_ruc_check_hdr( ibp, hdr, @@ -373,11 +371,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, qp, be32_to_cpu(ohdr->bth[0])); if (ruc_res) { - spin_unlock_irqrestore(&qp->s_lock, - flags); goto unlock; } - spin_unlock_irqrestore(&qp->s_lock, flags); /* Only deal with RDMA Writes for now */ if (opcode < @@ -547,6 +542,15 @@ move_along: updegr = 0; } } + /* + * Notify qib_destroy_qp() if it is waiting + * for lookaside_qp to finish. + */ + if (rcd->lookaside_qp) { + if (atomic_dec_and_test(&rcd->lookaside_qp->refcount)) + wake_up(&rcd->lookaside_qp->wait); + rcd->lookaside_qp = NULL; + } rcd->head = l; rcd->pkt_count += i; diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 26253039d2c..77633666f81 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1284,6 +1284,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, strlcpy(rcd->comm, current->comm, sizeof(rcd->comm)); ctxt_fp(fp) = rcd; qib_stats.sps_ctxts++; + dd->freectxts++; ret = 0; goto bail; @@ -1792,6 +1793,7 @@ static int qib_close(struct inode *in, struct file *fp) if (dd->pageshadow) unlock_expected_tids(rcd); qib_stats.sps_ctxts--; + dd->freectxts--; } mutex_unlock(&qib_mutex); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index d8ca0a0b970..781a802a321 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3273,6 +3273,8 @@ static int init_6120_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_6120_tidtemplate(dd); diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index e1f947446c2..3f1d562ba89 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4085,6 +4085,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7220_tidtemplate(dd); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 5ea9ece23b3..efd0a110091 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2310,12 +2310,15 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE << QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + ppd->cpspec->ibcctrl_a = val; /* * Reset the PCS interface to the serdes (and also ibc, which is still * in reset from above). Writes new value of ibcctrl_a as last step. */ qib_7322_mini_pcs_reset(ppd); qib_write_kreg(dd, kr_scratch, 0ULL); + /* clear the linkinit cmds */ + ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, LinkInitCmd); if (!ppd->cpspec->ibcctrl_b) { unsigned lse = ppd->link_speed_enabled; @@ -2387,11 +2390,6 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) qib_write_kreg_port(ppd, krp_rcvctrl, ppd->p_rcvctrl); spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); - /* Hold the link state machine for mezz boards */ - if (IS_QMH(dd) || IS_QME(dd)) - qib_set_ib_7322_lstate(ppd, 0, - QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); - /* Also enable IBSTATUSCHG interrupt. */ val = qib_read_kreg_port(ppd, krp_errmask); qib_write_kreg_port(ppd, krp_errmask, @@ -2853,9 +2851,8 @@ static irqreturn_t qib_7322intr(int irq, void *data) for (i = 0; i < dd->first_user_ctxt; i++) { if (ctxtrbits & rmask) { ctxtrbits &= ~rmask; - if (dd->rcd[i]) { + if (dd->rcd[i]) qib_kreceive(dd->rcd[i], NULL, &npkts); - } } rmask <<= 1; } @@ -5230,6 +5227,8 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) QIBL_IB_AUTONEG_INPROG))) set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled); if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + struct qib_qsfp_data *qd = + &ppd->cpspec->qsfp_data; /* unlock the Tx settings, speed may change */ qib_write_kreg_port(ppd, krp_tx_deemph_override, SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, @@ -5237,6 +5236,12 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) qib_cancel_sends(ppd); /* on link down, ensure sane pcs state */ qib_7322_mini_pcs_reset(ppd); + /* schedule the qsfp refresh which should turn the link + off */ + if (ppd->dd->flags & QIB_HAS_QSFP) { + qd->t_insert = get_jiffies_64(); + schedule_work(&qd->work); + } spin_lock_irqsave(&ppd->sdma_lock, flags); if (__qib_sdma_running(ppd)) __qib_sdma_process_event(ppd, @@ -5587,43 +5592,79 @@ static void qsfp_7322_event(struct work_struct *work) struct qib_qsfp_data *qd; struct qib_pportdata *ppd; u64 pwrup; + unsigned long flags; int ret; u32 le2; qd = container_of(work, struct qib_qsfp_data, work); ppd = qd->ppd; - pwrup = qd->t_insert + msecs_to_jiffies(QSFP_PWR_LAG_MSEC); + pwrup = qd->t_insert + + msecs_to_jiffies(QSFP_PWR_LAG_MSEC - QSFP_MODPRS_LAG_MSEC); - /* - * Some QSFP's not only do not respond until the full power-up - * time, but may behave badly if we try. So hold off responding - * to insertion. - */ - while (1) { - u64 now = get_jiffies_64(); - if (time_after64(now, pwrup)) - break; - msleep(20); - } - ret = qib_refresh_qsfp_cache(ppd, &qd->cache); - /* - * Need to change LE2 back to defaults if we couldn't - * read the cable type (to handle cable swaps), so do this - * even on failure to read cable information. We don't - * get here for QME, so IS_QME check not needed here. - */ - if (!ret && !ppd->dd->cspec->r1) { - if (QSFP_IS_ACTIVE_FAR(qd->cache.tech)) - le2 = LE2_QME; - else if (qd->cache.atten[1] >= qib_long_atten && - QSFP_IS_CU(qd->cache.tech)) - le2 = LE2_5m; - else + /* Delay for 20 msecs to allow ModPrs resistor to setup */ + mdelay(QSFP_MODPRS_LAG_MSEC); + + if (!qib_qsfp_mod_present(ppd)) { + ppd->cpspec->qsfp_data.modpresent = 0; + /* Set the physical link to disabled */ + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else { + /* + * Some QSFP's not only do not respond until the full power-up + * time, but may behave badly if we try. So hold off responding + * to insertion. + */ + while (1) { + u64 now = get_jiffies_64(); + if (time_after64(now, pwrup)) + break; + msleep(20); + } + + ret = qib_refresh_qsfp_cache(ppd, &qd->cache); + + /* + * Need to change LE2 back to defaults if we couldn't + * read the cable type (to handle cable swaps), so do this + * even on failure to read cable information. We don't + * get here for QME, so IS_QME check not needed here. + */ + if (!ret && !ppd->dd->cspec->r1) { + if (QSFP_IS_ACTIVE_FAR(qd->cache.tech)) + le2 = LE2_QME; + else if (qd->cache.atten[1] >= qib_long_atten && + QSFP_IS_CU(qd->cache.tech)) + le2 = LE2_5m; + else + le2 = LE2_DEFAULT; + } else le2 = LE2_DEFAULT; - } else - le2 = LE2_DEFAULT; - ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7)); - init_txdds_table(ppd, 0); + ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7)); + /* + * We always change parameteters, since we can choose + * values for cables without eeproms, and the cable may have + * changed from a cable with full or partial eeprom content + * to one with partial or no content. + */ + init_txdds_table(ppd, 0); + /* The physical link is being re-enabled only when the + * previous state was DISABLED and the VALID bit is not + * set. This should only happen when the cable has been + * physically pulled. */ + if (!ppd->cpspec->qsfp_data.modpresent && + (ppd->lflags & (QIBL_LINKV | QIBL_IB_LINK_DISABLED))) { + ppd->cpspec->qsfp_data.modpresent = 1; + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_SLEEP); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + } } /* @@ -5727,7 +5768,8 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change) /* now change the IBC and serdes, overriding generic */ init_txdds_table(ppd, 1); /* Re-enable the physical state machine on mezz boards - * now that the correct settings have been set. */ + * now that the correct settings have been set. + * QSFP boards are handles by the QSFP event handler */ if (IS_QMH(dd) || IS_QME(dd)) qib_set_ib_7322_lstate(ppd, 0, QLOGIC_IB_IBCC_LINKINITCMD_SLEEP); @@ -6205,6 +6247,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ dd->rcvegrbufsize = max(mtu, 2048); + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7322_tidtemplate(dd); @@ -7147,7 +7191,8 @@ static void find_best_ent(struct qib_pportdata *ppd, } } - /* Lookup serdes setting by cable type and attenuation */ + /* Active cables don't have attenuation so we only set SERDES + * settings to account for the attenuation of the board traces. */ if (!override && QSFP_IS_ACTIVE(qd->tech)) { *sdr_dds = txdds_sdr + ppd->dd->board_atten; *ddr_dds = txdds_ddr + ppd->dd->board_atten; @@ -7464,12 +7509,6 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) u32 le_val, rxcaldone; int chan, chan_done = (1 << SERDES_CHANS) - 1; - /* - * Initialize the Tx DDS tables. Also done every QSFP event, - * for adapters with QSFP - */ - init_txdds_table(ppd, 0); - /* Clear cmode-override, may be set from older driver */ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14); @@ -7655,6 +7694,12 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) /* VGA output common mode */ ibsd_wr_allchans(ppd, 12, (3 << 2), BMASK(3, 2)); + /* + * Initialize the Tx DDS tables. Also done every QSFP event, + * for adapters with QSFP + */ + init_txdds_table(ppd, 0); + return 0; } diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index a01f3fce8eb..b093a0b53b2 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -183,6 +183,9 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + rcd->rcvegrbufs_perchunk - 1) / rcd->rcvegrbufs_perchunk; + BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk)); + rcd->rcvegrbufs_perchunk_shift = + ilog2(rcd->rcvegrbufs_perchunk); } return rcd; } @@ -398,6 +401,7 @@ static void enable_chip(struct qib_devdata *dd) if (rcd) dd->f_rcvctrl(rcd->ppd, rcvmask, i); } + dd->freectxts = dd->cfgctxts - dd->first_user_ctxt; } static void verify_interrupt(unsigned long opaque) @@ -581,10 +585,6 @@ int qib_init(struct qib_devdata *dd, int reinit) continue; } - /* let link come up, and enable IBC */ - spin_lock_irqsave(&ppd->lflags_lock, flags); - ppd->lflags &= ~QIBL_IB_LINK_DISABLED; - spin_unlock_irqrestore(&ppd->lflags_lock, flags); portok++; } diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index e16751f8639..7e7e16fbee9 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -34,6 +34,7 @@ #include <linux/err.h> #include <linux/vmalloc.h> +#include <linux/jhash.h> #include "qib.h" @@ -204,6 +205,13 @@ static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); } +static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn) +{ + return jhash_1word(qpn, dev->qp_rnd) & + (dev->qp_table_size - 1); +} + + /* * Put the QP into the hash table. * The hash table holds a reference to the QP. @@ -211,22 +219,23 @@ static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) { struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - unsigned n = qp->ibqp.qp_num % dev->qp_table_size; unsigned long flags; + unsigned n = qpn_hash(dev, qp->ibqp.qp_num); spin_lock_irqsave(&dev->qpt_lock, flags); + atomic_inc(&qp->refcount); if (qp->ibqp.qp_num == 0) - ibp->qp0 = qp; + rcu_assign_pointer(ibp->qp0, qp); else if (qp->ibqp.qp_num == 1) - ibp->qp1 = qp; + rcu_assign_pointer(ibp->qp1, qp); else { qp->next = dev->qp_table[n]; - dev->qp_table[n] = qp; + rcu_assign_pointer(dev->qp_table[n], qp); } - atomic_inc(&qp->refcount); spin_unlock_irqrestore(&dev->qpt_lock, flags); + synchronize_rcu(); } /* @@ -236,29 +245,32 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) { struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct qib_qp *q, **qpp; + unsigned n = qpn_hash(dev, qp->ibqp.qp_num); unsigned long flags; - qpp = &dev->qp_table[qp->ibqp.qp_num % dev->qp_table_size]; - spin_lock_irqsave(&dev->qpt_lock, flags); if (ibp->qp0 == qp) { - ibp->qp0 = NULL; atomic_dec(&qp->refcount); + rcu_assign_pointer(ibp->qp0, NULL); } else if (ibp->qp1 == qp) { - ibp->qp1 = NULL; atomic_dec(&qp->refcount); - } else + rcu_assign_pointer(ibp->qp1, NULL); + } else { + struct qib_qp *q, **qpp; + + qpp = &dev->qp_table[n]; for (; (q = *qpp) != NULL; qpp = &q->next) if (q == qp) { - *qpp = qp->next; - qp->next = NULL; atomic_dec(&qp->refcount); + rcu_assign_pointer(*qpp, qp->next); + qp->next = NULL; break; } + } spin_unlock_irqrestore(&dev->qpt_lock, flags); + synchronize_rcu(); } /** @@ -280,21 +292,24 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) if (!qib_mcast_tree_empty(ibp)) qp_inuse++; - if (ibp->qp0) + rcu_read_lock(); + if (rcu_dereference(ibp->qp0)) qp_inuse++; - if (ibp->qp1) + if (rcu_dereference(ibp->qp1)) qp_inuse++; + rcu_read_unlock(); } spin_lock_irqsave(&dev->qpt_lock, flags); for (n = 0; n < dev->qp_table_size; n++) { qp = dev->qp_table[n]; - dev->qp_table[n] = NULL; + rcu_assign_pointer(dev->qp_table[n], NULL); for (; qp; qp = qp->next) qp_inuse++; } spin_unlock_irqrestore(&dev->qpt_lock, flags); + synchronize_rcu(); return qp_inuse; } @@ -309,25 +324,28 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) */ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) { - struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; - unsigned long flags; - struct qib_qp *qp; + struct qib_qp *qp = NULL; - spin_lock_irqsave(&dev->qpt_lock, flags); + if (unlikely(qpn <= 1)) { + rcu_read_lock(); + if (qpn == 0) + qp = rcu_dereference(ibp->qp0); + else + qp = rcu_dereference(ibp->qp1); + } else { + struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; + unsigned n = qpn_hash(dev, qpn); - if (qpn == 0) - qp = ibp->qp0; - else if (qpn == 1) - qp = ibp->qp1; - else - for (qp = dev->qp_table[qpn % dev->qp_table_size]; qp; - qp = qp->next) + rcu_read_lock(); + for (qp = dev->qp_table[n]; rcu_dereference(qp); qp = qp->next) if (qp->ibqp.qp_num == qpn) break; + } if (qp) - atomic_inc(&qp->refcount); + if (unlikely(!atomic_inc_not_zero(&qp->refcount))) + qp = NULL; - spin_unlock_irqrestore(&dev->qpt_lock, flags); + rcu_read_unlock(); return qp; } @@ -765,8 +783,10 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } } - if (attr_mask & IB_QP_PATH_MTU) + if (attr_mask & IB_QP_PATH_MTU) { qp->path_mtu = pmtu; + qp->pmtu = ib_mtu_enum_to_int(pmtu); + } if (attr_mask & IB_QP_RETRY_CNT) { qp->s_retry_cnt = attr->retry_cnt; @@ -781,8 +801,12 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MIN_RNR_TIMER) qp->r_min_rnr_timer = attr->min_rnr_timer; - if (attr_mask & IB_QP_TIMEOUT) + if (attr_mask & IB_QP_TIMEOUT) { qp->timeout = attr->timeout; + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + } if (attr_mask & IB_QP_QKEY) qp->qkey = attr->qkey; @@ -1013,6 +1037,10 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, ret = ERR_PTR(-ENOMEM); goto bail_swq; } + RCU_INIT_POINTER(qp->next, NULL); + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); if (init_attr->srq) sz = 0; else { diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c index 3374a52232c..e06c4ed383f 100644 --- a/drivers/infiniband/hw/qib/qib_qsfp.c +++ b/drivers/infiniband/hw/qib/qib_qsfp.c @@ -273,18 +273,12 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp) int ret; int idx; u16 cks; - u32 mask; u8 peek[4]; /* ensure sane contents on invalid reads, for cable swaps */ memset(cp, 0, sizeof(*cp)); - mask = QSFP_GPIO_MOD_PRS_N; - if (ppd->hw_pidx) - mask <<= QSFP_GPIO_PORT2_SHIFT; - - ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0); - if (ret & mask) { + if (!qib_qsfp_mod_present(ppd)) { ret = -ENODEV; goto bail; } @@ -444,6 +438,19 @@ const char * const qib_qsfp_devtech[16] = { static const char *pwr_codes = "1.5W2.0W2.5W3.5W"; +int qib_qsfp_mod_present(struct qib_pportdata *ppd) +{ + u32 mask; + int ret; + + mask = QSFP_GPIO_MOD_PRS_N << + (ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT); + ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0); + + return !((ret & mask) >> + ((ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT) + 3)); +} + /* * Initialize structures that control access to QSFP. Called once per port * on cards that support QSFP. @@ -452,7 +459,6 @@ void qib_qsfp_init(struct qib_qsfp_data *qd, void (*fevent)(struct work_struct *)) { u32 mask, highs; - int pins; struct qib_devdata *dd = qd->ppd->dd; @@ -480,8 +486,7 @@ void qib_qsfp_init(struct qib_qsfp_data *qd, mask <<= QSFP_GPIO_PORT2_SHIFT; /* Do not try to wait here. Better to let event handle it */ - pins = dd->f_gpio_mod(dd, 0, 0, 0); - if (pins & mask) + if (!qib_qsfp_mod_present(qd->ppd)) goto bail; /* We see a module, but it may be unwise to look yet. Just schedule */ qd->t_insert = get_jiffies_64(); diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h index c109bbdc90a..46002a9417c 100644 --- a/drivers/infiniband/hw/qib/qib_qsfp.h +++ b/drivers/infiniband/hw/qib/qib_qsfp.h @@ -34,6 +34,7 @@ #define QSFP_DEV 0xA0 #define QSFP_PWR_LAG_MSEC 2000 +#define QSFP_MODPRS_LAG_MSEC 20 /* * Below are masks for various QSFP signals, for Port 1. @@ -177,10 +178,12 @@ struct qib_qsfp_data { struct work_struct work; struct qib_qsfp_cache cache; u64 t_insert; + u8 modpresent; }; extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp); +extern int qib_qsfp_mod_present(struct qib_pportdata *ppd); extern void qib_qsfp_init(struct qib_qsfp_data *qd, void (*fevent)(struct work_struct *)); extern void qib_qsfp_deinit(struct qib_qsfp_data *qd); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index eca0c41f122..afaf4ac79f4 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -59,8 +59,7 @@ static void start_timer(struct qib_qp *qp) qp->s_flags |= QIB_S_TIMER; qp->s_timer.function = rc_timeout; /* 4.096 usec. * (1 << qp->timeout) */ - qp->s_timer.expires = jiffies + - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); + qp->s_timer.expires = jiffies + qp->timeout_jiffies; add_timer(&qp->s_timer); } @@ -239,7 +238,7 @@ int qib_make_rc_req(struct qib_qp *qp) u32 len; u32 bth0; u32 bth2; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + u32 pmtu = qp->pmtu; char newreq; unsigned long flags; int ret = 0; @@ -1519,9 +1518,7 @@ read_middle: * 4.096 usec. * (1 << qp->timeout) */ qp->s_flags |= QIB_S_TIMER; - mod_timer(&qp->s_timer, jiffies + - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / - 1000UL)); + mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); if (qp->s_flags & QIB_S_WAIT_ACK) { qp->s_flags &= ~QIB_S_WAIT_ACK; qib_schedule_send(qp); @@ -1732,7 +1729,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, * same request. */ offset = ((psn - e->psn) & QIB_PSN_MASK) * - ib_mtu_enum_to_int(qp->path_mtu); + qp->pmtu; len = be32_to_cpu(reth->length); if (unlikely(offset + len != e->rdma_sge.sge_length)) goto unlock_done; @@ -1876,7 +1873,7 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, u32 psn; u32 pad; struct ib_wc wc; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + u32 pmtu = qp->pmtu; int diff; struct ib_reth *reth; unsigned long flags; @@ -1892,10 +1889,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, } opcode = be32_to_cpu(ohdr->bth[0]); - spin_lock_irqsave(&qp->s_lock, flags); if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) - goto sunlock; - spin_unlock_irqrestore(&qp->s_lock, flags); + return; psn = be32_to_cpu(ohdr->bth[2]); opcode >>= 24; @@ -1955,8 +1950,6 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, break; } - memset(&wc, 0, sizeof wc); - if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) { qp->r_flags |= QIB_R_COMM_EST; if (qp->ibqp.event_handler) { @@ -2009,16 +2002,19 @@ send_middle: goto rnr_nak; qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) - goto send_last; - /* FALLTHROUGH */ + goto no_immediate_data; + /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; - /* FALLTHROUGH */ + goto send_last; case OP(SEND_LAST): case OP(RDMA_WRITE_LAST): +no_immediate_data: + wc.wc_flags = 0; + wc.ex.imm_data = 0; send_last: /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; @@ -2051,6 +2047,12 @@ send_last: wc.src_qp = qp->remote_qpn; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; + /* zero fields that are N/A */ + wc.vendor_err = 0; + wc.pkey_index = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + wc.csum_ok = 0; /* Signal completion event if the solicited bit is set. */ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -2089,7 +2091,7 @@ send_last: if (opcode == OP(RDMA_WRITE_FIRST)) goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) - goto send_last; + goto no_immediate_data; ret = qib_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index eb78d9367f0..b4b37e47321 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -260,12 +260,15 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) /* * - * This should be called with the QP s_lock held. + * This should be called with the QP r_lock held. + * + * The s_lock will be acquired around the qib_migrate_qp() call. */ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, int has_grh, struct qib_qp *qp, u32 bth0) { __be64 guid; + unsigned long flags; if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { if (!has_grh) { @@ -295,7 +298,9 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid || ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num) goto err; + spin_lock_irqsave(&qp->s_lock, flags); qib_migrate_qp(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); } else { if (!has_grh) { if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c index c3ec8efc2ed..d6235931a1b 100644 --- a/drivers/infiniband/hw/qib/qib_srq.c +++ b/drivers/infiniband/hw/qib/qib_srq.c @@ -107,6 +107,11 @@ struct ib_srq *qib_create_srq(struct ib_pd *ibpd, u32 sz; struct ib_srq *ret; + if (srq_init_attr->srq_type != IB_SRQT_BASIC) { + ret = ERR_PTR(-ENOSYS); + goto done; + } + if (srq_init_attr->attr.max_sge == 0 || srq_init_attr->attr.max_sge > ib_qib_max_srq_sges || srq_init_attr->attr.max_wr == 0 || diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 14d129de432..78fbd56879d 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -515,8 +515,7 @@ static ssize_t show_nfreectxts(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts - - dd->first_user_ctxt - (u32)qib_stats.sps_ctxts); + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); } static ssize_t show_serial(struct device *device, diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 32ccf3c824c..847e7afdfd9 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -51,7 +51,7 @@ int qib_make_uc_req(struct qib_qp *qp) u32 hwords; u32 bth0; u32 len; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + u32 pmtu = qp->pmtu; int ret = 0; spin_lock_irqsave(&qp->s_lock, flags); @@ -243,13 +243,12 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, int has_grh, void *data, u32 tlen, struct qib_qp *qp) { struct qib_other_headers *ohdr; - unsigned long flags; u32 opcode; u32 hdrsize; u32 psn; u32 pad; struct ib_wc wc; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + u32 pmtu = qp->pmtu; struct ib_reth *reth; int ret; @@ -263,14 +262,11 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, } opcode = be32_to_cpu(ohdr->bth[0]); - spin_lock_irqsave(&qp->s_lock, flags); if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) - goto sunlock; - spin_unlock_irqrestore(&qp->s_lock, flags); + return; psn = be32_to_cpu(ohdr->bth[2]); opcode >>= 24; - memset(&wc, 0, sizeof wc); /* Compare the PSN verses the expected PSN. */ if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) { @@ -370,7 +366,7 @@ send_first: } qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) - goto send_last; + goto no_immediate_data; else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) goto send_last_imm; /* FALLTHROUGH */ @@ -389,8 +385,11 @@ send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; - /* FALLTHROUGH */ + goto send_last; case OP(SEND_LAST): +no_immediate_data: + wc.ex.imm_data = 0; + wc.wc_flags = 0; send_last: /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; @@ -418,6 +417,12 @@ last_imm: wc.src_qp = qp->remote_qpn; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; + /* zero fields that are N/A */ + wc.vendor_err = 0; + wc.pkey_index = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + wc.csum_ok = 0; /* Signal completion event if the solicited bit is set. */ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -546,6 +551,4 @@ op_err: qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return; -sunlock: - spin_unlock_irqrestore(&qp->s_lock, flags); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 9fab4048885..9627cb73712 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -38,11 +38,12 @@ #include <linux/utsname.h> #include <linux/rculist.h> #include <linux/mm.h> +#include <linux/random.h> #include "qib.h" #include "qib_common.h" -static unsigned int ib_qib_qp_table_size = 251; +static unsigned int ib_qib_qp_table_size = 256; module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO); MODULE_PARM_DESC(qp_table_size, "QP table size"); @@ -659,17 +660,25 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) if (atomic_dec_return(&mcast->refcount) <= 1) wake_up(&mcast->wait); } else { - qp = qib_lookup_qpn(ibp, qp_num); - if (!qp) - goto drop; + if (rcd->lookaside_qp) { + if (rcd->lookaside_qpn != qp_num) { + if (atomic_dec_and_test( + &rcd->lookaside_qp->refcount)) + wake_up( + &rcd->lookaside_qp->wait); + rcd->lookaside_qp = NULL; + } + } + if (!rcd->lookaside_qp) { + qp = qib_lookup_qpn(ibp, qp_num); + if (!qp) + goto drop; + rcd->lookaside_qp = qp; + rcd->lookaside_qpn = qp_num; + } else + qp = rcd->lookaside_qp; ibp->n_unicast_rcv++; qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); - /* - * Notify qib_destroy_qp() if it is waiting - * for us to finish. - */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); } return; @@ -1974,6 +1983,8 @@ static void init_ibport(struct qib_pportdata *ppd) ibp->z_excessive_buffer_overrun_errors = cntrs.excessive_buffer_overrun_errors; ibp->z_vl15_dropped = cntrs.vl15_dropped; + RCU_INIT_POINTER(ibp->qp0, NULL); + RCU_INIT_POINTER(ibp->qp1, NULL); } /** @@ -1990,12 +2001,15 @@ int qib_register_ib_device(struct qib_devdata *dd) int ret; dev->qp_table_size = ib_qib_qp_table_size; - dev->qp_table = kzalloc(dev->qp_table_size * sizeof *dev->qp_table, + get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd)); + dev->qp_table = kmalloc(dev->qp_table_size * sizeof *dev->qp_table, GFP_KERNEL); if (!dev->qp_table) { ret = -ENOMEM; goto err_qpt; } + for (i = 0; i < dev->qp_table_size; i++) + RCU_INIT_POINTER(dev->qp_table[i], NULL); for (i = 0; i < dd->num_pports; i++) init_ibport(ppd + i); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 95e5b47223b..0c19ef0c412 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -485,6 +485,7 @@ struct qib_qp { u8 alt_timeout; /* Alternate path timeout for this QP */ u8 port_num; enum ib_mtu path_mtu; + u32 pmtu; /* decoded from path_mtu */ u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ @@ -495,6 +496,7 @@ struct qib_qp { u32 s_last; /* last completed entry */ u32 s_ssn; /* SSN of tail entry */ u32 s_lsn; /* limit sequence number (credit) */ + unsigned long timeout_jiffies; /* computed from timeout */ struct qib_swqe *s_wq; /* send work queue */ struct qib_swqe *s_wqe; struct qib_rq r_rq; /* receive work queue */ @@ -723,7 +725,8 @@ struct qib_ibdev { dma_addr_t pio_hdrs_phys; /* list of QPs waiting for RNR timer */ spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */ - unsigned qp_table_size; /* size of the hash table */ + u32 qp_table_size; /* size of the hash table */ + u32 qp_rnd; /* random bytes for hash */ spinlock_t qpt_lock; u32 n_piowait; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 39913a065f9..fe48677fd74 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -84,7 +84,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); for (i = 0; i < frags; ++i) - ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); } static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) @@ -183,7 +183,7 @@ partial_error: ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); for (; i > 0; --i) - ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); return NULL; @@ -1496,6 +1496,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_srq_init_attr srq_init_attr = { + .srq_type = IB_SRQT_BASIC, .attr = { .max_wr = ipoib_recvq_size, .max_sge = max_sge diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 86eae229dc4..0e2fe4631ba 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -212,16 +212,15 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr) gid_buf, path.pathrec.dlid ? "yes" : "no"); if (path.pathrec.dlid) { - rate = ib_rate_to_mult(path.pathrec.rate) * 25; + rate = ib_rate_to_mbps(path.pathrec.rate); seq_printf(file, " DLID: 0x%04x\n" " SL: %12d\n" - " rate: %*d%s Gb/sec\n", + " rate: %8d.%d Gb/sec\n", be16_to_cpu(path.pathrec.dlid), path.pathrec.sl, - 10 - ((rate % 10) ? 2 : 0), - rate / 10, rate % 10 ? ".5" : ""); + rate / 1000, rate % 1000); } seq_putc(file, '\n'); diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index 1ad1f6029af..869a2c220a7 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -484,7 +484,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev, mlx4_mtt_cleanup(dev, &eq->mtt); for (i = 0; i < npages; ++i) - pci_free_consistent(dev->pdev, PAGE_SIZE, + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, eq->page_list[i].buf, eq->page_list[i].map); diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index 7eb8ba822e9..875838b8799 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -204,6 +204,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63 #define QUERY_DEV_CAP_RSVD_PD_OFFSET 0x64 #define QUERY_DEV_CAP_MAX_PD_OFFSET 0x65 +#define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66 +#define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 @@ -318,6 +320,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->reserved_pds = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET); dev_cap->max_pds = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET); + dev_cap->reserved_xrcds = field >> 4; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET); + dev_cap->max_xrcds = 1 << (field & 0x1f); MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET); dev_cap->rdmarc_entry_sz = size; diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index 1e8ecc3708e..bf5ec228652 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -93,6 +93,8 @@ struct mlx4_dev_cap { int max_mcgs; int reserved_pds; int max_pds; + int reserved_xrcds; + int max_xrcds; int qpc_entry_sz; int rdmarc_entry_sz; int altc_entry_sz; diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index f0ee35df4dd..94bbc85a532 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -96,6 +96,8 @@ MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); static int log_num_vlan; module_param_named(log_num_vlan, log_num_vlan, int, 0444); MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)"); +/* Log2 max number of VLANs per ETH port (0-7) */ +#define MLX4_LOG_NUM_VLANS 7 static int use_prio; module_param_named(use_prio, use_prio, bool, 0444); @@ -220,6 +222,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_mrws = dev_cap->reserved_mrws; dev->caps.reserved_uars = dev_cap->reserved_uars; dev->caps.reserved_pds = dev_cap->reserved_pds; + dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? + dev_cap->reserved_xrcds : 0; + dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? + dev_cap->max_xrcds : 0; dev->caps.mtt_entry_sz = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz; dev->caps.max_msg_sz = dev_cap->max_msg_sz; dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); @@ -230,7 +236,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.log_num_macs = log_num_mac; - dev->caps.log_num_vlans = log_num_vlan; + dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; dev->caps.log_num_prios = use_prio ? 3 : 0; for (i = 1; i <= dev->caps.num_ports; ++i) { @@ -912,11 +918,18 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) goto err_kar_unmap; } + err = mlx4_init_xrcd_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize " + "reliable connection domain table, aborting.\n"); + goto err_pd_table_free; + } + err = mlx4_init_mr_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "memory region table, aborting.\n"); - goto err_pd_table_free; + goto err_xrcd_table_free; } err = mlx4_init_eq_table(dev); @@ -998,6 +1011,13 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) "ib capabilities (%d). Continuing with " "caps = 0\n", port, err); dev->caps.ib_port_def_cap[port] = ib_port_default_caps; + + err = mlx4_check_ext_port_caps(dev, port); + if (err) + mlx4_warn(dev, "failed to get port %d extended " + "port capabilities support info (%d)." + " Assuming not supported\n", port, err); + err = mlx4_SET_PORT(dev, port); if (err) { mlx4_err(dev, "Failed to set port %d, aborting\n", @@ -1033,6 +1053,9 @@ err_eq_table_free: err_mr_table_free: mlx4_cleanup_mr_table(dev); +err_xrcd_table_free: + mlx4_cleanup_xrcd_table(dev); + err_pd_table_free: mlx4_cleanup_pd_table(dev); @@ -1355,6 +1378,7 @@ err_port: mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); mlx4_cleanup_mr_table(dev); + mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); mlx4_cleanup_uar_table(dev); @@ -1416,6 +1440,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); mlx4_cleanup_mr_table(dev); + mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); iounmap(priv->kar); @@ -1489,10 +1514,9 @@ static int __init mlx4_verify_params(void) return -1; } - if ((log_num_vlan < 0) || (log_num_vlan > 7)) { - pr_warning("mlx4_core: bad num_vlan: %d\n", log_num_vlan); - return -1; - } + if (log_num_vlan != 0) + pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", + MLX4_LOG_NUM_VLANS); if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) { pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index a2fcd8402d3..5dfa68ffc11 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -335,6 +335,7 @@ struct mlx4_priv { struct mlx4_cmd cmd; struct mlx4_bitmap pd_bitmap; + struct mlx4_bitmap xrcd_bitmap; struct mlx4_uar_table uar_table; struct mlx4_mr_table mr_table; struct mlx4_cq_table cq_table; @@ -384,6 +385,7 @@ int mlx4_alloc_eq_table(struct mlx4_dev *dev); void mlx4_free_eq_table(struct mlx4_dev *dev); int mlx4_init_pd_table(struct mlx4_dev *dev); +int mlx4_init_xrcd_table(struct mlx4_dev *dev); int mlx4_init_uar_table(struct mlx4_dev *dev); int mlx4_init_mr_table(struct mlx4_dev *dev); int mlx4_init_eq_table(struct mlx4_dev *dev); @@ -393,6 +395,7 @@ int mlx4_init_srq_table(struct mlx4_dev *dev); int mlx4_init_mcg_table(struct mlx4_dev *dev); void mlx4_cleanup_pd_table(struct mlx4_dev *dev); +void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev); void mlx4_cleanup_uar_table(struct mlx4_dev *dev); void mlx4_cleanup_mr_table(struct mlx4_dev *dev); void mlx4_cleanup_eq_table(struct mlx4_dev *dev); @@ -450,6 +453,7 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port); int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps); +int mlx4_check_ext_port_caps(struct mlx4_dev *dev, u8 port); int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot, enum mlx4_steer_type steer); diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index 9c188bdd7f4..ab639cfef78 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c @@ -139,7 +139,7 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), GFP_KERNEL); - buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *), + buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free, GFP_KERNEL); if (!buddy->bits || !buddy->num_free) goto err_out; diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c index 1286b886dce..3736163e30e 100644 --- a/drivers/net/mlx4/pd.c +++ b/drivers/net/mlx4/pd.c @@ -61,6 +61,24 @@ void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn) } EXPORT_SYMBOL_GPL(mlx4_pd_free); +int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + *xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap); + if (*xrcdn == -1) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc); + +void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) +{ + mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn); +} +EXPORT_SYMBOL_GPL(mlx4_xrcd_free); + int mlx4_init_pd_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -74,6 +92,18 @@ void mlx4_cleanup_pd_table(struct mlx4_dev *dev) mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap); } +int mlx4_init_xrcd_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16), + (1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0); +} + +void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev) +{ + mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap); +} int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar) { diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c index 609e0ec14ce..881592eec61 100644 --- a/drivers/net/mlx4/port.c +++ b/drivers/net/mlx4/port.c @@ -148,22 +148,26 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn, u8 wrap) if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) { err = mlx4_uc_steer_add(dev, port, mac, qpn, 1); - if (!err) { - entry = kmalloc(sizeof *entry, GFP_KERNEL); - if (!entry) { - mlx4_uc_steer_release(dev, port, mac, *qpn, 1); - return -ENOMEM; - } - entry->mac = mac; - err = radix_tree_insert(&info->mac_tree, *qpn, entry); - if (err) { - mlx4_uc_steer_release(dev, port, mac, *qpn, 1); - return err; - } - } else + if (err) return err; + + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) { + mlx4_uc_steer_release(dev, port, mac, *qpn, 1); + return -ENOMEM; + } + + entry->mac = mac; + err = radix_tree_insert(&info->mac_tree, *qpn, entry); + if (err) { + kfree(entry); + mlx4_uc_steer_release(dev, port, mac, *qpn, 1); + return err; + } } + mlx4_dbg(dev, "Registering MAC: 0x%llx\n", (unsigned long long) mac); + mutex_lock(&table->mutex); for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) { if (free < 0 && !table->refs[i]) { @@ -464,6 +468,48 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) return err; } +int mlx4_check_ext_port_caps(struct mlx4_dev *dev, u8 port) +{ + struct mlx4_cmd_mailbox *inmailbox, *outmailbox; + u8 *inbuf, *outbuf; + int err, packet_error; + + inmailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(inmailbox)) + return PTR_ERR(inmailbox); + + outmailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(outmailbox)) { + mlx4_free_cmd_mailbox(dev, inmailbox); + return PTR_ERR(outmailbox); + } + + inbuf = inmailbox->buf; + outbuf = outmailbox->buf; + memset(inbuf, 0, 256); + memset(outbuf, 0, 256); + inbuf[0] = 1; + inbuf[1] = 1; + inbuf[2] = 1; + inbuf[3] = 1; + + *(__be16 *) (&inbuf[16]) = MLX4_ATTR_EXTENDED_PORT_INFO; + *(__be32 *) (&inbuf[20]) = cpu_to_be32(port); + + err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C); + + packet_error = be16_to_cpu(*(__be16 *) (outbuf + 4)); + + dev->caps.ext_port_cap[port] = (!err && !packet_error) ? + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO + : 0; + + mlx4_free_cmd_mailbox(dev, inmailbox); + mlx4_free_cmd_mailbox(dev, outmailbox); + return err; +} + int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) { struct mlx4_cmd_mailbox *mailbox; diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c index ec9350e5f21..51c53898c35 100644 --- a/drivers/net/mlx4/qp.c +++ b/drivers/net/mlx4/qp.c @@ -280,6 +280,9 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) * We reserve 2 extra QPs per port for the special QPs. The * block of special QPs must be aligned to a multiple of 8, so * round up. + * + * We also reserve the MSB of the 24-bit QP number to indicate + * that a QP is an XRC QP. */ dev->caps.sqp_start = ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); diff --git a/drivers/net/mlx4/srq.c b/drivers/net/mlx4/srq.c index 3b07b80a045..a20b141dbb5 100644 --- a/drivers/net/mlx4/srq.c +++ b/drivers/net/mlx4/srq.c @@ -40,20 +40,20 @@ struct mlx4_srq_context { __be32 state_logsize_srqn; u8 logstride; - u8 reserved1[3]; - u8 pg_offset; - u8 reserved2[3]; - u32 reserved3; + u8 reserved1; + __be16 xrcd; + __be32 pg_offset_cqn; + u32 reserved2; u8 log_page_size; - u8 reserved4[2]; + u8 reserved3[2]; u8 mtt_base_addr_h; __be32 mtt_base_addr_l; __be32 pd; __be16 limit_watermark; __be16 wqe_cnt; - u16 reserved5; + u16 reserved4; __be16 wqe_counter; - u32 reserved6; + u32 reserved5; __be64 db_rec_addr; }; @@ -109,8 +109,8 @@ static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox MLX4_CMD_TIME_CLASS_A); } -int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, - u64 db_rec, struct mlx4_srq *srq) +int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd, + struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_cmd_mailbox *mailbox; @@ -148,6 +148,8 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) | srq->srqn); srq_context->logstride = srq->wqe_shift - 4; + srq_context->xrcd = cpu_to_be16(xrcd); + srq_context->pg_offset_cqn = cpu_to_be32(cqn & 0xffffff); srq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; mtt_addr = mlx4_mtt_addr(dev, mtt); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 53ef894bfa0..ff3ccd5c44d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -61,6 +61,7 @@ enum { MLX4_DEV_CAP_FLAG_RC = 1LL << 0, MLX4_DEV_CAP_FLAG_UC = 1LL << 1, MLX4_DEV_CAP_FLAG_UD = 1LL << 2, + MLX4_DEV_CAP_FLAG_XRC = 1LL << 3, MLX4_DEV_CAP_FLAG_SRQ = 1LL << 6, MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1LL << 7, MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, @@ -82,6 +83,12 @@ enum { MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48 }; +#define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) + +enum { + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0 +}; + enum { MLX4_BMME_FLAG_LOCAL_INV = 1 << 6, MLX4_BMME_FLAG_REMOTE_INV = 1 << 7, @@ -256,6 +263,8 @@ struct mlx4_caps { int num_qp_per_mgm; int num_pds; int reserved_pds; + int max_xrcds; + int reserved_xrcds; int mtt_entry_sz; u32 max_msg_sz; u32 page_size_cap; @@ -276,6 +285,7 @@ struct mlx4_caps { u32 port_mask; enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1]; u32 max_counters; + u8 ext_port_cap[MLX4_MAX_PORTS + 1]; }; struct mlx4_buf_list { @@ -499,6 +509,8 @@ static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); +int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn); +void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar); void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar); @@ -538,8 +550,8 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); -int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, - u64 db_rec, struct mlx4_srq *srq); +int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn, + struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq); void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq); int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark); int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 4001c8249db..48cc4cb9785 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -75,6 +75,7 @@ enum { MLX4_QP_ST_UC = 0x1, MLX4_QP_ST_RD = 0x2, MLX4_QP_ST_UD = 0x3, + MLX4_QP_ST_XRC = 0x6, MLX4_QP_ST_MLX = 0x7 }; @@ -137,7 +138,7 @@ struct mlx4_qp_context { __be32 ssn; __be32 params2; __be32 rnr_nextrecvpsn; - __be32 srcd; + __be32 xrcd; __be32 cqn_recv; __be64 db_rec_addr; __be32 qkey; diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index fe5b05177a2..81aba3a73aa 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -81,7 +81,11 @@ enum { IB_USER_VERBS_CMD_MODIFY_SRQ, IB_USER_VERBS_CMD_QUERY_SRQ, IB_USER_VERBS_CMD_DESTROY_SRQ, - IB_USER_VERBS_CMD_POST_SRQ_RECV + IB_USER_VERBS_CMD_POST_SRQ_RECV, + IB_USER_VERBS_CMD_OPEN_XRCD, + IB_USER_VERBS_CMD_CLOSE_XRCD, + IB_USER_VERBS_CMD_CREATE_XSRQ, + IB_USER_VERBS_CMD_OPEN_QP }; /* @@ -222,6 +226,21 @@ struct ib_uverbs_dealloc_pd { __u32 pd_handle; }; +struct ib_uverbs_open_xrcd { + __u64 response; + __u32 fd; + __u32 oflags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_open_xrcd_resp { + __u32 xrcd_handle; +}; + +struct ib_uverbs_close_xrcd { + __u32 xrcd_handle; +}; + struct ib_uverbs_reg_mr { __u64 response; __u64 start; @@ -404,6 +423,17 @@ struct ib_uverbs_create_qp { __u64 driver_data[0]; }; +struct ib_uverbs_open_qp { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 qpn; + __u8 qp_type; + __u8 reserved[7]; + __u64 driver_data[0]; +}; + +/* also used for open response */ struct ib_uverbs_create_qp_resp { __u32 qp_handle; __u32 qpn; @@ -648,11 +678,25 @@ struct ib_uverbs_create_srq { __u64 driver_data[0]; }; +struct ib_uverbs_create_xsrq { + __u64 response; + __u64 user_handle; + __u32 srq_type; + __u32 pd_handle; + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u32 reserved; + __u32 xrcd_handle; + __u32 cq_handle; + __u64 driver_data[0]; +}; + struct ib_uverbs_create_srq_resp { __u32 srq_handle; __u32 max_wr; __u32 max_sge; - __u32 reserved; + __u32 srqn; }; struct ib_uverbs_modify_srq { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 228be3e220d..bf5daafe8ec 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -112,6 +112,7 @@ enum ib_device_cap_flags { */ IB_DEVICE_UD_IP_CSUM = (1<<18), IB_DEVICE_UD_TSO = (1<<19), + IB_DEVICE_XRC = (1<<20), IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), }; @@ -207,6 +208,7 @@ enum ib_port_cap_flags { IB_PORT_SM_DISABLED = 1 << 10, IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, IB_PORT_CM_SUP = 1 << 16, IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, IB_PORT_REINIT_SUP = 1 << 18, @@ -415,7 +417,15 @@ enum ib_rate { IB_RATE_40_GBPS = 7, IB_RATE_60_GBPS = 8, IB_RATE_80_GBPS = 9, - IB_RATE_120_GBPS = 10 + IB_RATE_120_GBPS = 10, + IB_RATE_14_GBPS = 11, + IB_RATE_56_GBPS = 12, + IB_RATE_112_GBPS = 13, + IB_RATE_168_GBPS = 14, + IB_RATE_25_GBPS = 15, + IB_RATE_100_GBPS = 16, + IB_RATE_200_GBPS = 17, + IB_RATE_300_GBPS = 18 }; /** @@ -427,6 +437,13 @@ enum ib_rate { int ib_rate_to_mult(enum ib_rate rate) __attribute_const__; /** + * ib_rate_to_mbps - Convert the IB rate enum to Mbps. + * For example, IB_RATE_2_5_GBPS will be converted to 2500. + * @rate: rate to convert. + */ +int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__; + +/** * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate * enum. * @mult: multiple to convert. @@ -522,6 +539,11 @@ enum ib_cq_notify_flags { IB_CQ_REPORT_MISSED_EVENTS = 1 << 2, }; +enum ib_srq_type { + IB_SRQT_BASIC, + IB_SRQT_XRC +}; + enum ib_srq_attr_mask { IB_SRQ_MAX_WR = 1 << 0, IB_SRQ_LIMIT = 1 << 1, @@ -537,6 +559,14 @@ struct ib_srq_init_attr { void (*event_handler)(struct ib_event *, void *); void *srq_context; struct ib_srq_attr attr; + enum ib_srq_type srq_type; + + union { + struct { + struct ib_xrcd *xrcd; + struct ib_cq *cq; + } xrc; + } ext; }; struct ib_qp_cap { @@ -565,7 +595,11 @@ enum ib_qp_type { IB_QPT_UC, IB_QPT_UD, IB_QPT_RAW_IPV6, - IB_QPT_RAW_ETHERTYPE + IB_QPT_RAW_ETHERTYPE, + /* Save 8 for RAW_PACKET */ + IB_QPT_XRC_INI = 9, + IB_QPT_XRC_TGT, + IB_QPT_MAX }; enum ib_qp_create_flags { @@ -579,6 +613,7 @@ struct ib_qp_init_attr { struct ib_cq *send_cq; struct ib_cq *recv_cq; struct ib_srq *srq; + struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct ib_qp_cap cap; enum ib_sig_type sq_sig_type; enum ib_qp_type qp_type; @@ -586,6 +621,13 @@ struct ib_qp_init_attr { u8 port_num; /* special QP types only */ }; +struct ib_qp_open_attr { + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + u32 qp_num; + enum ib_qp_type qp_type; +}; + enum ib_rnr_timeout { IB_RNR_TIMER_655_36 = 0, IB_RNR_TIMER_000_01 = 1, @@ -770,6 +812,7 @@ struct ib_send_wr { u32 rkey; } fast_reg; } wr; + u32 xrc_remote_srq_num; /* XRC TGT QPs only */ }; struct ib_recv_wr { @@ -831,6 +874,7 @@ struct ib_ucontext { struct list_head qp_list; struct list_head srq_list; struct list_head ah_list; + struct list_head xrcd_list; int closing; }; @@ -858,6 +902,15 @@ struct ib_pd { atomic_t usecnt; /* count all resources */ }; +struct ib_xrcd { + struct ib_device *device; + atomic_t usecnt; /* count all exposed resources */ + struct inode *inode; + + struct mutex tgt_qp_mutex; + struct list_head tgt_qp_list; +}; + struct ib_ah { struct ib_device *device; struct ib_pd *pd; @@ -882,7 +935,16 @@ struct ib_srq { struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *srq_context; + enum ib_srq_type srq_type; atomic_t usecnt; + + union { + struct { + struct ib_xrcd *xrcd; + struct ib_cq *cq; + u32 srq_num; + } xrc; + } ext; }; struct ib_qp { @@ -891,6 +953,11 @@ struct ib_qp { struct ib_cq *send_cq; struct ib_cq *recv_cq; struct ib_srq *srq; + struct ib_xrcd *xrcd; /* XRC TGT QPs only */ + struct list_head xrcd_list; + atomic_t usecnt; /* count times opened */ + struct list_head open_list; + struct ib_qp *real_qp; struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; @@ -1149,6 +1216,10 @@ struct ib_device { struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad); + struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device, + struct ib_ucontext *ucontext, + struct ib_udata *udata); + int (*dealloc_xrcd)(struct ib_xrcd *xrcd); struct ib_dma_mapping_ops *dma_ops; @@ -1443,6 +1514,25 @@ int ib_query_qp(struct ib_qp *qp, int ib_destroy_qp(struct ib_qp *qp); /** + * ib_open_qp - Obtain a reference to an existing sharable QP. + * @xrcd - XRC domain + * @qp_open_attr: Attributes identifying the QP to open. + * + * Returns a reference to a sharable QP. + */ +struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, + struct ib_qp_open_attr *qp_open_attr); + +/** + * ib_close_qp - Release an external reference to a QP. + * @qp: The QP handle to release + * + * The opened QP handle is released by the caller. The underlying + * shared QP is not destroyed until all internal references are released. + */ +int ib_close_qp(struct ib_qp *qp); + +/** * ib_post_send - Posts a list of work requests to the send queue of * the specified QP. * @qp: The QP to post the work request on. @@ -2060,4 +2150,16 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); */ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); +/** + * ib_alloc_xrcd - Allocates an XRC domain. + * @device: The device on which to allocate the XRC domain. + */ +struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device); + +/** + * ib_dealloc_xrcd - Deallocates an XRC domain. + * @xrcd: The XRC domain to deallocate. + */ +int ib_dealloc_xrcd(struct ib_xrcd *xrcd); + #endif /* IB_VERBS_H */ diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 26977c149c4..51988f80818 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -65,6 +65,7 @@ enum rdma_cm_event_type { enum rdma_port_space { RDMA_PS_SDP = 0x0001, RDMA_PS_IPOIB = 0x0002, + RDMA_PS_IB = 0x013F, RDMA_PS_TCP = 0x0106, RDMA_PS_UDP = 0x0111, }; diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h index fc82c1896f7..5348a000c8f 100644 --- a/include/rdma/rdma_user_cm.h +++ b/include/rdma/rdma_user_cm.h @@ -77,7 +77,8 @@ struct rdma_ucm_create_id { __u64 uid; __u64 response; __u16 ps; - __u8 reserved[6]; + __u8 qp_type; + __u8 reserved[5]; }; struct rdma_ucm_create_id_resp { |