diff options
Diffstat (limited to 'drivers/infiniband/hw/bnxt_re/main.c')
-rw-r--r-- | drivers/infiniband/hw/bnxt_re/main.c | 251 |
1 files changed, 209 insertions, 42 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index aafc19aa5de1..508d00a5a106 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -80,6 +80,79 @@ static DEFINE_MUTEX(bnxt_re_dev_lock); static struct workqueue_struct *bnxt_re_wq; static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait); +/* SR-IOV helper functions */ + +static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev) +{ + struct bnxt *bp; + + bp = netdev_priv(rdev->en_dev->net); + if (BNXT_VF(bp)) + rdev->is_virtfn = 1; +} + +/* Set the maximum number of each resource that the driver actually wants + * to allocate. This may be up to the maximum number the firmware has + * reserved for the function. The driver may choose to allocate fewer + * resources than the firmware maximum. + */ +static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) +{ + u32 vf_qps = 0, vf_srqs = 0, vf_cqs = 0, vf_mrws = 0, vf_gids = 0; + u32 i; + u32 vf_pct; + u32 num_vfs; + struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + + rdev->qplib_ctx.qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, + dev_attr->max_qp); + + rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT_256K; + /* Use max_mr from fw since max_mrw does not get set */ + rdev->qplib_ctx.mrw_count = min_t(u32, rdev->qplib_ctx.mrw_count, + dev_attr->max_mr); + rdev->qplib_ctx.srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, + dev_attr->max_srq); + rdev->qplib_ctx.cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, + dev_attr->max_cq); + + for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) + rdev->qplib_ctx.tqm_count[i] = + rdev->dev_attr.tqm_alloc_reqs[i]; + + if (rdev->num_vfs) { + /* + * Reserve a set of resources for the PF. Divide the remaining + * resources among the VFs + */ + vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF; + num_vfs = 100 * rdev->num_vfs; + vf_qps = (rdev->qplib_ctx.qpc_count * vf_pct) / num_vfs; + vf_srqs = (rdev->qplib_ctx.srqc_count * vf_pct) / num_vfs; + vf_cqs = (rdev->qplib_ctx.cq_count * vf_pct) / num_vfs; + /* + * The driver allows many more MRs than other resources. If the + * firmware does also, then reserve a fixed amount for the PF + * and divide the rest among VFs. VFs may use many MRs for NFS + * mounts, ISER, NVME applications, etc. If the firmware + * severely restricts the number of MRs, then let PF have + * half and divide the rest among VFs, as for the other + * resource types. + */ + if (rdev->qplib_ctx.mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) + vf_mrws = rdev->qplib_ctx.mrw_count * vf_pct / num_vfs; + else + vf_mrws = (rdev->qplib_ctx.mrw_count - + BNXT_RE_RESVD_MR_FOR_PF) / rdev->num_vfs; + vf_gids = BNXT_RE_MAX_GID_PER_VF; + } + rdev->qplib_ctx.vf_res.max_mrw_per_vf = vf_mrws; + rdev->qplib_ctx.vf_res.max_gid_per_vf = vf_gids; + rdev->qplib_ctx.vf_res.max_qp_per_vf = vf_qps; + rdev->qplib_ctx.vf_res.max_srq_per_vf = vf_srqs; + rdev->qplib_ctx.vf_res.max_cq_per_vf = vf_cqs; +} + /* for handling bnxt_en callbacks later */ static void bnxt_re_stop(void *p) { @@ -91,6 +164,15 @@ static void bnxt_re_start(void *p) static void bnxt_re_sriov_config(void *p, int num_vfs) { + struct bnxt_re_dev *rdev = p; + + if (!rdev) + return; + + rdev->num_vfs = num_vfs; + bnxt_re_set_resource_limits(rdev); + bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, + &rdev->qplib_ctx); } static void bnxt_re_shutdown(void *p) @@ -417,7 +499,7 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev) return ERR_PTR(-EINVAL); if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) { - dev_dbg(&pdev->dev, + dev_info(&pdev->dev, "%s: probe error: RoCE is not supported on this device", ROCE_DRV_MODULE_NAME); return ERR_PTR(-ENODEV); @@ -490,6 +572,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->query_port = bnxt_re_query_port; ibdev->get_port_immutable = bnxt_re_get_port_immutable; + ibdev->get_dev_fw_str = bnxt_re_query_fw_str; ibdev->query_pkey = bnxt_re_query_pkey; ibdev->query_gid = bnxt_re_query_gid; ibdev->get_netdev = bnxt_re_get_netdev; @@ -505,6 +588,12 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->query_ah = bnxt_re_query_ah; ibdev->destroy_ah = bnxt_re_destroy_ah; + ibdev->create_srq = bnxt_re_create_srq; + ibdev->modify_srq = bnxt_re_modify_srq; + ibdev->query_srq = bnxt_re_query_srq; + ibdev->destroy_srq = bnxt_re_destroy_srq; + ibdev->post_srq_recv = bnxt_re_post_srq_recv; + ibdev->create_qp = bnxt_re_create_qp; ibdev->modify_qp = bnxt_re_modify_qp; ibdev->query_qp = bnxt_re_query_qp; @@ -541,14 +630,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr, return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); - - return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->dev_attr.fw_ver); -} - static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) { @@ -558,12 +639,10 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr, } static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL); -static DEVICE_ATTR(fw_rev, 0444, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, 0444, show_hca, NULL); static struct device_attribute *bnxt_re_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_rev, &dev_attr_hca_type }; @@ -616,10 +695,10 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev, return rdev; } -static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw, - struct creq_func_event *aeqe) +static int bnxt_re_handle_unaffi_async_event(struct creq_func_event + *unaffi_async) { - switch (aeqe->event) { + switch (unaffi_async->event) { case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR: break; case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR: @@ -648,6 +727,93 @@ static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw, return 0; } +static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event, + struct bnxt_re_qp *qp) +{ + struct ib_event event; + + memset(&event, 0, sizeof(event)); + if (qp->qplib_qp.srq) { + event.device = &qp->rdev->ibdev; + event.element.qp = &qp->ib_qp; + event.event = IB_EVENT_QP_LAST_WQE_REACHED; + } + + if (event.device && qp->ib_qp.event_handler) + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + + return 0; +} + +static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async, + void *obj) +{ + int rc = 0; + u8 event; + + if (!obj) + return rc; /* QP was already dead, still return success */ + + event = affi_async->event; + if (event == CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION) { + struct bnxt_qplib_qp *lib_qp = obj; + struct bnxt_re_qp *qp = container_of(lib_qp, struct bnxt_re_qp, + qplib_qp); + rc = bnxt_re_handle_qp_async_event(affi_async, qp); + } + return rc; +} + +static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw, + void *aeqe, void *obj) +{ + struct creq_qp_event *affi_async; + struct creq_func_event *unaffi_async; + u8 type; + int rc; + + type = ((struct creq_base *)aeqe)->type; + if (type == CREQ_BASE_TYPE_FUNC_EVENT) { + unaffi_async = aeqe; + rc = bnxt_re_handle_unaffi_async_event(unaffi_async); + } else { + affi_async = aeqe; + rc = bnxt_re_handle_affi_async_event(affi_async, obj); + } + + return rc; +} + +static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq, + struct bnxt_qplib_srq *handle, u8 event) +{ + struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq, + qplib_srq); + struct ib_event ib_event; + int rc = 0; + + if (!srq) { + dev_err(NULL, "%s: SRQ is NULL, SRQN not handled", + ROCE_DRV_MODULE_NAME); + rc = -EINVAL; + goto done; + } + ib_event.device = &srq->rdev->ibdev; + ib_event.element.srq = &srq->ib_srq; + if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT) + ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED; + else + ib_event.event = IB_EVENT_SRQ_ERR; + + if (srq->ib_srq.event_handler) { + /* Lock event_handler? */ + (*srq->ib_srq.event_handler)(&ib_event, + srq->ib_srq.srq_context); + } +done: + return rc; +} + static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *handle) { @@ -690,7 +856,8 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], i - 1, rdev->msix_entries[i].vector, rdev->msix_entries[i].db_offset, - &bnxt_re_cqn_handler, NULL); + &bnxt_re_cqn_handler, + &bnxt_re_srqn_handler); if (rc) { dev_err(rdev_to_dev(rdev), @@ -734,7 +901,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) /* Configure and allocate resources for qplib */ rdev->qplib_res.rcfw = &rdev->rcfw; - rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); + rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr, + rdev->is_virtfn); if (rc) goto fail; @@ -1035,19 +1203,6 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait) } } -static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) -{ - u32 i; - - rdev->qplib_ctx.qpc_count = BNXT_RE_MAX_QPC_COUNT; - rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT; - rdev->qplib_ctx.srqc_count = BNXT_RE_MAX_SRQC_COUNT; - rdev->qplib_ctx.cq_count = BNXT_RE_MAX_CQ_COUNT; - for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) - rdev->qplib_ctx.tqm_count[i] = - rdev->dev_attr.tqm_alloc_reqs[i]; -} - /* worker thread for polling periodic events. Now used for QoS programming*/ static void bnxt_re_worker(struct work_struct *work) { @@ -1070,6 +1225,9 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) } set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); + /* Check whether VF or PF */ + bnxt_re_get_sriov_func_type(rdev); + rc = bnxt_re_request_msix(rdev); if (rc) { pr_err("Failed to get MSI-X vectors: %#x\n", rc); @@ -1101,16 +1259,18 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) (rdev->en_dev->pdev, &rdev->rcfw, rdev->msix_entries[BNXT_RE_AEQ_IDX].vector, rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset, - 0, &bnxt_re_aeq_handler); + rdev->is_virtfn, &bnxt_re_aeq_handler); if (rc) { pr_err("Failed to enable RCFW channel: %#x\n", rc); goto free_ring; } - rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); + rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr, + rdev->is_virtfn); if (rc) goto disable_rcfw; - bnxt_re_set_resource_limits(rdev); + if (!rdev->is_virtfn) + bnxt_re_set_resource_limits(rdev); rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0); if (rc) { @@ -1125,7 +1285,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) goto free_ctx; } - rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx, 0); + rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx, + rdev->is_virtfn); if (rc) { pr_err("Failed to initialize RCFW: %#x\n", rc); goto free_sctx; @@ -1144,13 +1305,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) goto fail; } - rc = bnxt_re_setup_qos(rdev); - if (rc) - pr_info("RoCE priority not yet configured\n"); + if (!rdev->is_virtfn) { + rc = bnxt_re_setup_qos(rdev); + if (rc) + pr_info("RoCE priority not yet configured\n"); - INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker); - set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags); - schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); + INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker); + set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags); + schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); + } /* Register ib dev */ rc = bnxt_re_register_ib(rdev); @@ -1176,6 +1339,7 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, &rdev->active_width); + set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE); bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE); @@ -1400,7 +1564,7 @@ err_netdev: static void __exit bnxt_re_mod_exit(void) { - struct bnxt_re_dev *rdev; + struct bnxt_re_dev *rdev, *next; LIST_HEAD(to_be_deleted); mutex_lock(&bnxt_re_dev_lock); @@ -1408,8 +1572,11 @@ static void __exit bnxt_re_mod_exit(void) if (!list_empty(&bnxt_re_dev_list)) list_splice_init(&bnxt_re_dev_list, &to_be_deleted); mutex_unlock(&bnxt_re_dev_lock); - - list_for_each_entry(rdev, &to_be_deleted, list) { + /* + * Cleanup the devices in reverse order so that the VF device + * cleanup is done before PF cleanup + */ + list_for_each_entry_safe_reverse(rdev, next, &to_be_deleted, list) { dev_info(rdev_to_dev(rdev), "Unregistering Device"); bnxt_re_dev_stop(rdev); bnxt_re_ib_unreg(rdev, true); |