summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-24 15:11:03 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-24 15:11:03 -0800
commit8cbd92339db08b19b93d1637e5799ff2a8dddfd2 (patch)
tree7e62d961f32e8a2a96271029b376f1e8bbd70a7c /drivers/infiniband/hw
parent143c7bc6496c886ce5db2a2f9cec580494690170 (diff)
parent66fb1d5df6ace316a4a6e2c31e13fc123ea2b644 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "Quite a small cycle this time, even with the rc8. I suppose everyone went to sleep over xmas. - Minor driver updates for hfi1, cxgb4, erdma, hns, irdma, mlx5, siw, mana - inline CQE support for hns - Have mlx5 display device error codes - Pinned DMABUF support for irdma - Continued rxe cleanups, particularly converting the MRs to use xarray - Improvements to what can be cached in the mlx5 mkey cache" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (61 commits) IB/mlx5: Extend debug control for CC parameters IB/hfi1: Fix sdma.h tx->num_descs off-by-one errors IB/hfi1: Fix math bugs in hfi1_can_pin_pages() RDMA/irdma: Add support for dmabuf pin memory regions RDMA/mlx5: Use query_special_contexts for mkeys net/mlx5e: Use query_special_contexts for mkeys net/mlx5: Change define name for 0x100 lkey value net/mlx5: Expose bits for querying special mkeys RDMA/rxe: Fix missing memory barriers in rxe_queue.h RDMA/mana_ib: Fix a bug when the PF indicates more entries for registering memory on first packet RDMA/rxe: Remove rxe_alloc() RDMA/cma: Distinguish between sockaddr_in and sockaddr_in6 by size Subject: RDMA/rxe: Handle zero length rdma iw_cxgb4: Fix potential NULL dereference in c4iw_fill_res_cm_id_entry() RDMA/mlx5: Use rdma_umem_for_each_dma_block() RDMA/umem: Remove unused 'work' member from struct ib_umem RDMA/irdma: Cap MSIX used to online CPUs + 1 RDMA/mlx5: Check reg_create() create for errors RDMA/restrack: Correct spelling RDMA/cxgb4: Fix potential null-ptr-deref in pass_establish() ...
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/restrack.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/t4fw_ri_api.h26
-rw-r--r--drivers/infiniband/hw/erdma/erdma_hw.h4
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.c4
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c59
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.h5
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c81
-rw-r--r--drivers/infiniband/hw/hfi1/init.c2
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c4
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h15
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c46
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c61
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c81
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h19
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c109
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c17
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c66
-rw-r--r--drivers/infiniband/hw/irdma/hw.c2
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c313
-rw-r--r--drivers/infiniband/hw/mana/main.c22
-rw-r--r--drivers/infiniband/hw/mlx4/main.c8
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h3
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c121
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c45
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h3
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c28
-rw-r--r--drivers/infiniband/hw/mlx5/main.c24
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h51
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c490
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c67
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c164
-rw-r--r--drivers/infiniband/hw/mlx5/qp.h4
-rw-r--r--drivers/infiniband/hw/mlx5/qpc.c7
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/umr.c4
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c2
39 files changed, 1236 insertions, 737 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 499a425a3379..ced615b5ea09 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -2676,6 +2676,9 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
u16 tcp_opt = ntohs(req->tcp_opt);
ep = get_ep_from_tid(dev, tid);
+ if (!ep)
+ return 0;
+
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
ep->snd_seq = be32_to_cpu(req->snd_isn);
ep->rcv_seq = be32_to_cpu(req->rcv_isn);
@@ -4144,6 +4147,10 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
if (neigh->dev->flags & IFF_LOOPBACK) {
pdev = ip_dev_find(&init_net, iph->daddr);
+ if (!pdev) {
+ pr_err("%s - failed to find device!\n", __func__);
+ goto free_dst;
+ }
e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
pdev, 0);
pi = (struct port_info *)netdev_priv(pdev);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index c7e8d7b3baa1..7e2835dcbc1c 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -767,7 +767,7 @@ static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
goto out;
wc->wr_id = cookie;
- wc->qp = qhp ? &qhp->ibqp : NULL;
+ wc->qp = &qhp->ibqp;
wc->vendor_err = CQE_STATUS(&cqe);
wc->wc_flags = 0;
diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c
index ff645b955a08..fd22c85d35f4 100644
--- a/drivers/infiniband/hw/cxgb4/restrack.c
+++ b/drivers/infiniband/hw/cxgb4/restrack.c
@@ -238,7 +238,7 @@ int c4iw_fill_res_cm_id_entry(struct sk_buff *msg,
if (rdma_nl_put_driver_u64_hex(msg, "history", epcp->history))
goto err_cancel_table;
- if (epcp->state == LISTEN) {
+ if (listen_ep) {
if (rdma_nl_put_driver_u32(msg, "stid", listen_ep->stid))
goto err_cancel_table;
if (rdma_nl_put_driver_u32(msg, "backlog", listen_ep->backlog))
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index a2f5e29ef226..1f79537fc8d1 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -122,9 +122,7 @@ struct fw_ri_dsgl {
__be16 nsge;
__be32 len0;
__be64 addr0;
-#ifndef C99_NOT_SUPPORTED
struct fw_ri_dsge_pair sge[];
-#endif
};
struct fw_ri_sge {
@@ -138,9 +136,7 @@ struct fw_ri_isgl {
__u8 r1;
__be16 nsge;
__be32 r2;
-#ifndef C99_NOT_SUPPORTED
struct fw_ri_sge sge[];
-#endif
};
struct fw_ri_immd {
@@ -148,9 +144,7 @@ struct fw_ri_immd {
__u8 r1;
__be16 r2;
__be32 immdlen;
-#ifndef C99_NOT_SUPPORTED
__u8 data[];
-#endif
};
struct fw_ri_tpte {
@@ -320,9 +314,7 @@ struct fw_ri_res_wr {
__be32 op_nres;
__be32 len16_pkd;
__u64 cookie;
-#ifndef C99_NOT_SUPPORTED
struct fw_ri_res res[];
-#endif
};
#define FW_RI_RES_WR_NRES_S 0
@@ -562,12 +554,10 @@ struct fw_ri_rdma_write_wr {
__be32 plen;
__be32 stag_sink;
__be64 to_sink;
-#ifndef C99_NOT_SUPPORTED
union {
- struct fw_ri_immd immd_src[0];
- struct fw_ri_isgl isgl_src[0];
+ DECLARE_FLEX_ARRAY(struct fw_ri_immd, immd_src);
+ DECLARE_FLEX_ARRAY(struct fw_ri_isgl, isgl_src);
} u;
-#endif
};
struct fw_ri_send_wr {
@@ -581,12 +571,10 @@ struct fw_ri_send_wr {
__be32 plen;
__be32 r3;
__be64 r4;
-#ifndef C99_NOT_SUPPORTED
union {
- struct fw_ri_immd immd_src[0];
- struct fw_ri_isgl isgl_src[0];
+ DECLARE_FLEX_ARRAY(struct fw_ri_immd, immd_src);
+ DECLARE_FLEX_ARRAY(struct fw_ri_isgl, isgl_src);
} u;
-#endif
};
#define FW_RI_SEND_WR_SENDOP_S 0
@@ -618,12 +606,10 @@ struct fw_ri_rdma_write_cmpl_wr {
struct fw_ri_isgl isgl_src;
} u_cmpl;
__be64 r3;
-#ifndef C99_NOT_SUPPORTED
union fw_ri_write {
- struct fw_ri_immd immd_src[0];
- struct fw_ri_isgl isgl_src[0];
+ DECLARE_FLEX_ARRAY(struct fw_ri_immd, immd_src);
+ DECLARE_FLEX_ARRAY(struct fw_ri_isgl, isgl_src);
} u;
-#endif
};
struct fw_ri_rdma_read_wr {
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index ab371fec610c..4c38d99c73f1 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -397,7 +397,7 @@ struct erdma_write_sqe {
__le32 rsvd;
- struct erdma_sge sgl[0];
+ struct erdma_sge sgl[];
};
struct erdma_send_sqe {
@@ -408,7 +408,7 @@ struct erdma_send_sqe {
};
__le32 length;
- struct erdma_sge sgl[0];
+ struct erdma_sge sgl[];
};
struct erdma_readreq_sqe {
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 5dab1e87975b..9c30d78730aa 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -1110,12 +1110,14 @@ int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
prot = pgprot_device(vma->vm_page_prot);
break;
default:
- return -EINVAL;
+ err = -EINVAL;
+ goto put_entry;
}
err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
prot, rdma_entry);
+put_entry:
rdma_user_mmap_entry_put(rdma_entry);
return err;
}
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index ebe970f76232..90b672feed83 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1056,7 +1056,7 @@ static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *dd);
static void dc_shutdown(struct hfi1_devdata *dd);
static void dc_start(struct hfi1_devdata *dd);
-static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
+static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp,
unsigned int *np);
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
@@ -13362,7 +13362,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
int ret;
unsigned ngroups;
int rmt_count;
- int user_rmt_reduced;
u32 n_usr_ctxts;
u32 send_contexts = chip_send_contexts(dd);
u32 rcv_contexts = chip_rcv_contexts(dd);
@@ -13421,28 +13420,34 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
(num_kernel_contexts + n_usr_ctxts),
&node_affinity.real_cpu_mask);
/*
- * The RMT entries are currently allocated as shown below:
- * 1. QOS (0 to 128 entries);
- * 2. FECN (num_kernel_context - 1 + num_user_contexts +
- * num_netdev_contexts);
- * 3. netdev (num_netdev_contexts).
- * It should be noted that FECN oversubscribe num_netdev_contexts
- * entries of RMT because both netdev and PSM could allocate any receive
- * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
- * and PSM FECN must reserve an RMT entry for each possible PSM receive
- * context.
+ * RMT entries are allocated as follows:
+ * 1. QOS (0 to 128 entries)
+ * 2. FECN (num_kernel_context - 1 [a] + num_user_contexts +
+ * num_netdev_contexts [b])
+ * 3. netdev (NUM_NETDEV_MAP_ENTRIES)
+ *
+ * Notes:
+ * [a] Kernel contexts (except control) are included in FECN if kernel
+ * TID_RDMA is active.
+ * [b] Netdev and user contexts are randomly allocated from the same
+ * context pool, so FECN must cover all contexts in the pool.
*/
- rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_netdev_contexts * 2);
- if (HFI1_CAP_IS_KSET(TID_RDMA))
- rmt_count += num_kernel_contexts - 1;
- if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
- user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
- dd_dev_err(dd,
- "RMT size is reducing the number of user receive contexts from %u to %d\n",
- n_usr_ctxts,
- user_rmt_reduced);
- /* recalculate */
- n_usr_ctxts = user_rmt_reduced;
+ rmt_count = qos_rmt_entries(num_kernel_contexts - 1, NULL, NULL)
+ + (HFI1_CAP_IS_KSET(TID_RDMA) ? num_kernel_contexts - 1
+ : 0)
+ + n_usr_ctxts
+ + num_netdev_contexts
+ + NUM_NETDEV_MAP_ENTRIES;
+ if (rmt_count > NUM_MAP_ENTRIES) {
+ int over = rmt_count - NUM_MAP_ENTRIES;
+ /* try to squish user contexts, minimum of 1 */
+ if (over >= n_usr_ctxts) {
+ dd_dev_err(dd, "RMT overflow: reduce the requested number of contexts\n");
+ return -EINVAL;
+ }
+ dd_dev_err(dd, "RMT overflow: reducing # user contexts from %u to %u\n",
+ n_usr_ctxts, n_usr_ctxts - over);
+ n_usr_ctxts -= over;
}
/* the first N are kernel contexts, the rest are user/netdev contexts */
@@ -14299,15 +14304,15 @@ static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
}
/* return the number of RSM map table entries that will be used for QOS */
-static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
+static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp,
unsigned int *np)
{
int i;
unsigned int m, n;
- u8 max_by_vl = 0;
+ uint max_by_vl = 0;
/* is QOS active at all? */
- if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
+ if (n_krcv_queues < MIN_KERNEL_KCTXTS ||
num_vls == 1 ||
krcvqsset <= 1)
goto no_qos;
@@ -14365,7 +14370,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
if (!rmt)
goto bail;
- rmt_entries = qos_rmt_entries(dd, &m, &n);
+ rmt_entries = qos_rmt_entries(dd->n_krcv_queues - 1, &m, &n);
if (rmt_entries == 0)
goto bail;
qpns_per_vl = 1 << m;
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h
index c6291bbf723c..41f7fe5d1839 100644
--- a/drivers/infiniband/hw/hfi1/exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.h
@@ -133,12 +133,13 @@ static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
return grp;
}
-static inline u32 rcventry2tidinfo(u32 rcventry)
+static inline u32 create_tid(u32 rcventry, u32 npages)
{
u32 pair = rcventry & ~0x1;
return EXP_TID_SET(IDX, pair >> 1) |
- EXP_TID_SET(CTRL, 1 << (rcventry - pair));
+ EXP_TID_SET(CTRL, 1 << (rcventry - pair)) |
+ EXP_TID_SET(LEN, npages);
}
/**
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 47e4c8084207..b1d6ca7e9708 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -306,6 +306,17 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
return reqs;
}
+static inline void mmap_cdbg(u16 ctxt, u8 subctxt, u8 type, u8 mapio, u8 vmf,
+ u64 memaddr, void *memvirt, dma_addr_t memdma,
+ ssize_t memlen, struct vm_area_struct *vma)
+{
+ hfi1_cdbg(PROC,
+ "%u:%u type:%u io/vf/dma:%d/%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx",
+ ctxt, subctxt, type, mapio, vmf, !!memdma,
+ memaddr ?: (u64)memvirt, memlen,
+ vma->vm_end - vma->vm_start, vma->vm_flags);
+}
+
static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
{
struct hfi1_filedata *fd = fp->private_data;
@@ -315,6 +326,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
u64 token = vma->vm_pgoff << PAGE_SHIFT,
memaddr = 0;
void *memvirt = NULL;
+ dma_addr_t memdma = 0;
u8 subctxt, mapio = 0, vmf = 0, type;
ssize_t memlen = 0;
int ret = 0;
@@ -334,6 +346,11 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
goto done;
}
+ /*
+ * vm_pgoff is used as a buffer selector cookie. Always mmap from
+ * the beginning.
+ */
+ vma->vm_pgoff = 0;
flags = vma->vm_flags;
switch (type) {
@@ -355,7 +372,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
mapio = 1;
break;
- case PIO_CRED:
+ case PIO_CRED: {
+ u64 cr_page_offset;
if (flags & VM_WRITE) {
ret = -EPERM;
goto done;
@@ -365,10 +383,11 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* second or third page allocated for credit returns (if number
* of enabled contexts > 64 and 128 respectively).
*/
- memvirt = dd->cr_base[uctxt->numa_id].va;
- memaddr = virt_to_phys(memvirt) +
- (((u64)uctxt->sc->hw_free -
- (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK);
+ cr_page_offset = ((u64)uctxt->sc->hw_free -
+ (u64)dd->cr_base[uctxt->numa_id].va) &
+ PAGE_MASK;
+ memvirt = dd->cr_base[uctxt->numa_id].va + cr_page_offset;
+ memdma = dd->cr_base[uctxt->numa_id].dma + cr_page_offset;
memlen = PAGE_SIZE;
flags &= ~VM_MAYWRITE;
flags |= VM_DONTCOPY | VM_DONTEXPAND;
@@ -378,14 +397,16 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* memory been flagged as non-cached?
*/
/* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */
- mapio = 1;
break;
+ }
case RCV_HDRQ:
memlen = rcvhdrq_size(uctxt);
memvirt = uctxt->rcvhdrq;
+ memdma = uctxt->rcvhdrq_dma;
break;
case RCV_EGRBUF: {
- unsigned long addr;
+ unsigned long vm_start_save;
+ unsigned long vm_end_save;
int i;
/*
* The RcvEgr buffer need to be handled differently
@@ -404,24 +425,34 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
goto done;
}
vm_flags_clear(vma, VM_MAYWRITE);
- addr = vma->vm_start;
+ /*
+ * Mmap multiple separate allocations into a single vma. From
+ * here, dma_mmap_coherent() calls dma_direct_mmap(), which
+ * requires the mmap to exactly fill the vma starting at
+ * vma_start. Adjust the vma start and end for each eager
+ * buffer segment mapped. Restore the originals when done.
+ */
+ vm_start_save = vma->vm_start;
+ vm_end_save = vma->vm_end;
+ vma->vm_end = vma->vm_start;
for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) {
memlen = uctxt->egrbufs.buffers[i].len;
memvirt = uctxt->egrbufs.buffers[i].addr;
- ret = remap_pfn_range(
- vma, addr,
- /*
- * virt_to_pfn() does the same, but
- * it's not available on x86_64
- * when CONFIG_MMU is enabled.
- */
- PFN_DOWN(__pa(memvirt)),
- memlen,
- vma->vm_page_prot);
- if (ret < 0)
+ memdma = uctxt->egrbufs.buffers[i].dma;
+ vma->vm_end += memlen;
+ mmap_cdbg(ctxt, subctxt, type, mapio, vmf, memaddr,
+ memvirt, memdma, memlen, vma);
+ ret = dma_mmap_coherent(&dd->pcidev->dev, vma,
+ memvirt, memdma, memlen);
+ if (ret < 0) {
+ vma->vm_start = vm_start_save;
+ vma->vm_end = vm_end_save;
goto done;
- addr += memlen;
+ }
+ vma->vm_start += memlen;
}
+ vma->vm_start = vm_start_save;
+ vma->vm_end = vm_end_save;
ret = 0;
goto done;
}
@@ -481,6 +512,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
}
memlen = PAGE_SIZE;
memvirt = (void *)hfi1_rcvhdrtail_kvaddr(uctxt);
+ memdma = uctxt->rcvhdrqtailaddr_dma;
flags &= ~VM_MAYWRITE;
break;
case SUBCTXT_UREGS:
@@ -529,14 +561,15 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
}
vm_flags_reset(vma, flags);
- hfi1_cdbg(PROC,
- "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n",
- ctxt, subctxt, type, mapio, vmf, memaddr, memlen,
- vma->vm_end - vma->vm_start, vma->vm_flags);
+ mmap_cdbg(ctxt, subctxt, type, mapio, vmf, memaddr, memvirt, memdma,
+ memlen, vma);
if (vmf) {
vma->vm_pgoff = PFN_DOWN(memaddr);
vma->vm_ops = &vm_ops;
ret = 0;
+ } else if (memdma) {
+ ret = dma_mmap_coherent(&dd->pcidev->dev, vma,
+ memvirt, memdma, memlen);
} else if (mapio) {
ret = io_remap_pfn_range(vma, vma->vm_start,
PFN_DOWN(memaddr),
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 24c0f0d257fc..62b6c5020039 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -464,7 +464,7 @@ bail:
*
* This wrapper is the free function that matches hfi1_create_ctxtdata().
* When a context is done being used (kernel or user), this function is called
- * for the "final" put to match the kref init from hf1i_create_ctxtdata().
+ * for the "final" put to match the kref init from hfi1_create_ctxtdata().
* Other users of the context do a get/put sequence to make sure that the
* structure isn't removed while in use.
*/
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index a95b654f5254..8ed20392e9f0 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -3160,8 +3160,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
{
int rval = 0;
- tx->num_desc++;
- if ((unlikely(tx->num_desc == tx->desc_limit))) {
+ if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) {
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
__sdma_txclean(dd, tx);
@@ -3174,6 +3173,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
SDMA_MAP_NONE,
dd->sdma_pad_phys,
sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
+ tx->num_desc++;
_sdma_close_tx(dd, tx);
return rval;
}
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index d8170fcbfbdd..b023fc461bd5 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -631,14 +631,13 @@ static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx)
static inline void _sdma_close_tx(struct hfi1_devdata *dd,
struct sdma_txreq *tx)
{
- tx->descp[tx->num_desc].qw[0] |=
- SDMA_DESC0_LAST_DESC_FLAG;
- tx->descp[tx->num_desc].qw[1] |=
- dd->default_desc1;
+ u16 last_desc = tx->num_desc - 1;
+
+ tx->descp[last_desc].qw[0] |= SDMA_DESC0_LAST_DESC_FLAG;
+ tx->descp[last_desc].qw[1] |= dd->default_desc1;
if (tx->flags & SDMA_TXREQ_F_URGENT)
- tx->descp[tx->num_desc].qw[1] |=
- (SDMA_DESC1_HEAD_TO_HOST_FLAG |
- SDMA_DESC1_INT_REQ_FLAG);
+ tx->descp[last_desc].qw[1] |= (SDMA_DESC1_HEAD_TO_HOST_FLAG |
+ SDMA_DESC1_INT_REQ_FLAG);
}
static inline int _sdma_txadd_daddr(
@@ -655,6 +654,7 @@ static inline int _sdma_txadd_daddr(
type,
addr, len);
WARN_ON(len > tx->tlen);
+ tx->num_desc++;
tx->tlen -= len;
/* special cases for last */
if (!tx->tlen) {
@@ -666,7 +666,6 @@ static inline int _sdma_txadd_daddr(
_sdma_close_tx(dd, tx);
}
}
- tx->num_desc++;
return rval;
}
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 350884d5f089..96058baf36ed 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -27,8 +27,7 @@ static bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq);
static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
- struct tid_group *grp,
- unsigned int start, u16 count,
+ struct tid_group *grp, u16 count,
u32 *tidlist, unsigned int *tididx,
unsigned int *pmapped);
static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo);
@@ -250,7 +249,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
int ret = 0, need_group = 0, pinned;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
- unsigned int ngroups, pageidx = 0, pageset_count,
+ unsigned int ngroups, pageset_count,
tididx = 0, mapped, mapped_pages = 0;
u32 *tidlist = NULL;
struct tid_user_buf *tidbuf;
@@ -332,7 +331,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
tid_group_pop(&uctxt->tid_group_list);
ret = program_rcvarray(fd, tidbuf, grp,
- pageidx, dd->rcv_entries.group_size,
+ dd->rcv_entries.group_size,
tidlist, &tididx, &mapped);
/*
* If there was a failure to program the RcvArray
@@ -348,11 +347,10 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
tid_group_add_tail(grp, &uctxt->tid_full_list);
ngroups--;
- pageidx += ret;
mapped_pages += mapped;
}
- while (pageidx < pageset_count) {
+ while (tididx < pageset_count) {
struct tid_group *grp, *ptr;
/*
* If we don't have any partially used tid groups, check
@@ -374,11 +372,11 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
*/
list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list,
list) {
- unsigned use = min_t(unsigned, pageset_count - pageidx,
+ unsigned use = min_t(unsigned, pageset_count - tididx,
grp->size - grp->used);
ret = program_rcvarray(fd, tidbuf, grp,
- pageidx, use, tidlist,
+ use, tidlist,
&tididx, &mapped);
if (ret < 0) {
hfi1_cdbg(TID,
@@ -390,11 +388,10 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
tid_group_move(grp,
&uctxt->tid_used_list,
&uctxt->tid_full_list);
- pageidx += ret;
mapped_pages += mapped;
need_group = 0;
/* Check if we are done so we break out early */
- if (pageidx >= pageset_count)
+ if (tididx >= pageset_count)
break;
} else if (WARN_ON(ret == 0)) {
/*
@@ -638,7 +635,6 @@ static u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages)
* struct tid_pageset holding information on physically contiguous
* chunks from the user buffer), and other fields.
* @grp: RcvArray group
- * @start: starting index into sets array
* @count: number of struct tid_pageset's to program
* @tidlist: the array of u32 elements when the information about the
* programmed RcvArray entries is to be encoded.
@@ -658,14 +654,14 @@ static u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages)
* number of RcvArray entries programmed.
*/
static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *tbuf,
- struct tid_group *grp,
- unsigned int start, u16 count,
+ struct tid_group *grp, u16 count,
u32 *tidlist, unsigned int *tididx,
unsigned int *pmapped)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
u16 idx;
+ unsigned int start = *tididx;
u32 tidinfo = 0, rcventry, useidx = 0;
int mapped = 0;
@@ -710,8 +706,7 @@ static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *tbuf,
return ret;
mapped += npages;
- tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) |
- EXP_TID_SET(LEN, npages);
+ tidinfo = create_tid(rcventry - uctxt->expected_base, npages);
tidlist[(*tididx)++] = tidinfo;
grp->used++;
grp->map |= 1 << useidx++;
@@ -795,20 +790,20 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo)
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
struct tid_rb_node *node;
- u8 tidctrl = EXP_TID_GET(tidinfo, CTRL);
+ u32 tidctrl = EXP_TID_GET(tidinfo, CTRL);
u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry;
- if (tididx >= uctxt->expected_count) {
- dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n",
- tididx, uctxt->ctxt);
- return -EINVAL;
- }
-
- if (tidctrl == 0x3)
+ if (tidctrl == 0x3 || tidctrl == 0x0)
return -EINVAL;
rcventry = tididx + (tidctrl - 1);
+ if (rcventry >= uctxt->expected_count) {
+ dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n",
+ rcventry, uctxt->ctxt);
+ return -EINVAL;
+ }
+
node = fd->entry_to_rb[rcventry];
if (!node || node->rcventry != (uctxt->expected_base + rcventry))
return -EBADF;
@@ -920,9 +915,8 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
spin_lock(&fdata->invalid_lock);
if (fdata->invalid_tid_idx < uctxt->expected_count) {
fdata->invalid_tids[fdata->invalid_tid_idx] =
- rcventry2tidinfo(node->rcventry - uctxt->expected_base);
- fdata->invalid_tids[fdata->invalid_tid_idx] |=
- EXP_TID_SET(LEN, node->npages);
+ create_tid(node->rcventry - uctxt->expected_base,
+ node->npages);
if (!fdata->invalid_tid_idx) {
unsigned long *ev;
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 7bce963e2ae6..36aaedc65145 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -29,33 +29,52 @@ MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)");
bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
u32 nlocked, u32 npages)
{
- unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
- size = (cache_size * (1UL << 20)); /* convert to bytes */
- unsigned int usr_ctxts =
- dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
- bool can_lock = capable(CAP_IPC_LOCK);
+ unsigned long ulimit_pages;
+ unsigned long cache_limit_pages;
+ unsigned int usr_ctxts;
/*
- * Calculate per-cache size. The calculation below uses only a quarter
- * of the available per-context limit. This leaves space for other
- * pinning. Should we worry about shared ctxts?
+ * Perform RLIMIT_MEMLOCK based checks unless CAP_IPC_LOCK is present.
*/
- cache_limit = (ulimit / usr_ctxts) / 4;
-
- /* If ulimit isn't set to "unlimited" and is smaller than cache_size. */
- if (ulimit != (-1UL) && size > cache_limit)
- size = cache_limit;
-
- /* Convert to number of pages */
- size = DIV_ROUND_UP(size, PAGE_SIZE);
-
- pinned = atomic64_read(&mm->pinned_vm);
+ if (!capable(CAP_IPC_LOCK)) {
+ ulimit_pages =
+ DIV_ROUND_DOWN_ULL(rlimit(RLIMIT_MEMLOCK), PAGE_SIZE);
+
+ /*
+ * Pinning these pages would exceed this process's locked memory
+ * limit.
+ */
+ if (atomic64_read(&mm->pinned_vm) + npages > ulimit_pages)
+ return false;
+
+ /*
+ * Only allow 1/4 of the user's RLIMIT_MEMLOCK to be used for HFI
+ * caches. This fraction is then equally distributed among all
+ * existing user contexts. Note that if RLIMIT_MEMLOCK is
+ * 'unlimited' (-1), the value of this limit will be > 2^42 pages
+ * (2^64 / 2^12 / 2^8 / 2^2).
+ *
+ * The effectiveness of this check may be reduced if I/O occurs on
+ * some user contexts before all user contexts are created. This
+ * check assumes that this process is the only one using this
+ * context (e.g., the corresponding fd was not passed to another
+ * process for concurrent access) as there is no per-context,
+ * per-process tracking of pinned pages. It also assumes that each
+ * user context has only one cache to limit.
+ */
+ usr_ctxts = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
+ if (nlocked + npages > (ulimit_pages / usr_ctxts / 4))
+ return false;
+ }
- /* First, check the absolute limit against all pinned pages. */
- if (pinned + npages >= ulimit && !can_lock)
+ /*
+ * Pinning these pages would exceed the size limit for this cache.
+ */
+ cache_limit_pages = cache_size * (1024 * 1024) / PAGE_SIZE;
+ if (nlocked + npages > cache_limit_pages)
return false;
- return ((nlocked + npages) <= size) || can_lock;
+ return true;
}
int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t npages,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index e6e17984553c..7f6d7fc7951d 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1598,13 +1598,11 @@ static const char * const driver_cntr_names[] = {
"DRIVER_EgrHdrFull"
};
-static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */
static struct rdma_stat_desc *dev_cntr_descs;
static struct rdma_stat_desc *port_cntr_descs;
int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
static int num_dev_cntrs;
static int num_port_cntrs;
-static int cntr_names_initialized;
/*
* Convert a list of names separated by '\n' into an array of NULL terminated
@@ -1615,8 +1613,8 @@ static int init_cntr_names(const char *names_in, const size_t names_len,
int num_extra_names, int *num_cntrs,
struct rdma_stat_desc **cntr_descs)
{
- struct rdma_stat_desc *q;
- char *names_out, *p;
+ struct rdma_stat_desc *names_out;
+ char *p;
int i, n;
n = 0;
@@ -1624,65 +1622,45 @@ static int init_cntr_names(const char *names_in, const size_t names_len,
if (names_in[i] == '\n')
n++;
- names_out =
- kzalloc((n + num_extra_names) * sizeof(*q) + names_len,
- GFP_KERNEL);
+ names_out = kzalloc((n + num_extra_names) * sizeof(*names_out)
+ + names_len,
+ GFP_KERNEL);
if (!names_out) {
*num_cntrs = 0;
*cntr_descs = NULL;
return -ENOMEM;
}
- p = names_out + (n + num_extra_names) * sizeof(*q);
+ p = (char *)&names_out[n + num_extra_names];
memcpy(p, names_in, names_len);
- q = (struct rdma_stat_desc *)names_out;
for (i = 0; i < n; i++) {
- q[i].name = p;
+ names_out[i].name = p;
p = strchr(p, '\n');
*p++ = '\0';
}
*num_cntrs = n;
- *cntr_descs = (struct rdma_stat_desc *)names_out;
+ *cntr_descs = names_out;
return 0;
}
-static int init_counters(struct ib_device *ibdev)
-{
- struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- int i, err = 0;
-
- mutex_lock(&cntr_names_lock);
- if (cntr_names_initialized)
- goto out_unlock;
-
- err = init_cntr_names(dd->cntrnames, dd->cntrnameslen, num_driver_cntrs,
- &num_dev_cntrs, &dev_cntr_descs);
- if (err)
- goto out_unlock;
-
- for (i = 0; i < num_driver_cntrs; i++)
- dev_cntr_descs[num_dev_cntrs + i].name = driver_cntr_names[i];
-
- err = init_cntr_names(dd->portcntrnames, dd->portcntrnameslen, 0,
- &num_port_cntrs, &port_cntr_descs);
- if (err) {
- kfree(dev_cntr_descs);
- dev_cntr_descs = NULL;
- goto out_unlock;
- }
- cntr_names_initialized = 1;
-
-out_unlock:
- mutex_unlock(&cntr_names_lock);
- return err;
-}
-
static struct rdma_hw_stats *hfi1_alloc_hw_device_stats(struct ib_device *ibdev)
{
- if (init_counters(ibdev))
- return NULL;
+ if (!dev_cntr_descs) {
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ int i, err;
+
+ err = init_cntr_names(dd->cntrnames, dd->cntrnameslen,
+ num_driver_cntrs,
+ &num_dev_cntrs, &dev_cntr_descs);
+ if (err)
+ return NULL;
+
+ for (i = 0; i < num_driver_cntrs; i++)
+ dev_cntr_descs[num_dev_cntrs + i].name =
+ driver_cntr_names[i];
+ }
return rdma_alloc_hw_stats_struct(dev_cntr_descs,
num_dev_cntrs + num_driver_cntrs,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
@@ -1691,8 +1669,16 @@ static struct rdma_hw_stats *hfi1_alloc_hw_device_stats(struct ib_device *ibdev)
static struct rdma_hw_stats *hfi_alloc_hw_port_stats(struct ib_device *ibdev,
u32 port_num)
{
- if (init_counters(ibdev))
- return NULL;
+ if (!port_cntr_descs) {
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ int err;
+
+ err = init_cntr_names(dd->portcntrnames, dd->portcntrnameslen,
+ 0,
+ &num_port_cntrs, &port_cntr_descs);
+ if (err)
+ return NULL;
+ }
return rdma_alloc_hw_stats_struct(port_cntr_descs, num_port_cntrs,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
@@ -1917,13 +1903,10 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
del_timer_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
- mutex_lock(&cntr_names_lock);
kfree(dev_cntr_descs);
kfree(port_cntr_descs);
dev_cntr_descs = NULL;
port_cntr_descs = NULL;
- cntr_names_initialized = 0;
- mutex_unlock(&cntr_names_lock);
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index f701cc86896b..84239b907de2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -144,6 +144,7 @@ enum {
HNS_ROCE_CAP_FLAG_DIRECT_WQE = BIT(12),
HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14),
HNS_ROCE_CAP_FLAG_STASH = BIT(17),
+ HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19),
};
#define HNS_ROCE_DB_TYPE_COUNT 2
@@ -567,21 +568,6 @@ struct hns_roce_mbox_msg {
struct hns_roce_dev;
-struct hns_roce_rinl_sge {
- void *addr;
- u32 len;
-};
-
-struct hns_roce_rinl_wqe {
- struct hns_roce_rinl_sge *sg_list;
- u32 sge_cnt;
-};
-
-struct hns_roce_rinl_buf {
- struct hns_roce_rinl_wqe *wqe_list;
- u32 wqe_cnt;
-};
-
enum {
HNS_ROCE_FLUSH_FLAG = 0,
};
@@ -632,7 +618,6 @@ struct hns_roce_qp {
/* 0: flush needed, 1: unneeded */
unsigned long flush_flag;
struct hns_roce_work flush_work;
- struct hns_roce_rinl_buf rq_inl_buf;
struct list_head node; /* all qps are on a list */
struct list_head rq_node; /* all recv qps are on a list */
struct list_head sq_node; /* all send qps are on a list */
@@ -887,7 +872,7 @@ struct hns_roce_hw {
u32 step_idx);
int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state);
+ enum ib_qp_state new_state, struct ib_udata *udata);
int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp);
void (*dereg_mr)(struct hns_roce_dev *hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index b2421883993b..dbf97fe5948f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -821,22 +821,10 @@ static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe,
static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
u32 wqe_idx, u32 max_sge)
{
- struct hns_roce_rinl_sge *sge_list;
void *wqe = NULL;
- u32 i;
wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge);
-
- /* rq support inline data */
- if (hr_qp->rq_inl_buf.wqe_cnt) {
- sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list;
- hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge;
- for (i = 0; i < wr->num_sge; i++) {
- sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr;
- sge_list[i].len = wr->sg_list[i].length;
- }
- }
}
static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
@@ -2849,7 +2837,7 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
attr->port_num = 1;
attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
- IB_QPS_INIT);
+ IB_QPS_INIT, NULL);
if (ret) {
ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
ret);
@@ -2871,7 +2859,7 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num);
ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
- IB_QPS_RTR);
+ IB_QPS_RTR, NULL);
hr_dev->loop_idc = loopback;
if (ret) {
ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n",
@@ -2886,7 +2874,7 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
attr->retry_cnt = HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT;
attr->timeout = HNS_ROCE_FREE_MR_USED_QP_TIMEOUT;
ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_RTR,
- IB_QPS_RTS);
+ IB_QPS_RTS, NULL);
if (ret)
ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n",
ret);
@@ -3730,39 +3718,6 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
return 0;
}
-static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
- struct hns_roce_qp *qp,
- struct ib_wc *wc)
-{
- struct hns_roce_rinl_sge *sge_list;
- u32 wr_num, wr_cnt, sge_num;
- u32 sge_cnt, data_len, size;
- void *wqe_buf;
-
- wr_num = hr_reg_read(cqe, CQE_WQE_IDX);
- wr_cnt = wr_num & (qp->rq.wqe_cnt - 1);
-
- sge_list = qp->rq_inl_buf.wqe_list[wr_cnt].sg_list;
- sge_num = qp->rq_inl_buf.wqe_list[wr_cnt].sge_cnt;
- wqe_buf = hns_roce_get_recv_wqe(qp, wr_cnt);
- data_len = wc->byte_len;
-
- for (sge_cnt = 0; (sge_cnt < sge_num) && (data_len); sge_cnt++) {
- size = min(sge_list[sge_cnt].len, data_len);
- memcpy((void *)sge_list[sge_cnt].addr, wqe_buf, size);
-
- data_len -= size;
- wqe_buf += size;
- }
-
- if (unlikely(data_len)) {
- wc->status = IB_WC_LOC_LEN_ERR;
- return -EAGAIN;
- }
-
- return 0;
-}
-
static int sw_comp(struct hns_roce_qp *hr_qp, struct hns_roce_wq *wq,
int num_entries, struct ib_wc *wc)
{
@@ -3974,22 +3929,10 @@ static void fill_send_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
wc->opcode = ib_opcode;
}
-static inline bool is_rq_inl_enabled(struct ib_wc *wc, u32 hr_opcode,
- struct hns_roce_v2_cqe *cqe)
-{
- return wc->qp->qp_type != IB_QPT_UD && wc->qp->qp_type != IB_QPT_GSI &&
- (hr_opcode == HNS_ROCE_V2_OPCODE_SEND ||
- hr_opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_IMM ||
- hr_opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_INV) &&
- hr_reg_read(cqe, CQE_RQ_INLINE);
-}
-
static int fill_recv_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
{
- struct hns_roce_qp *qp = to_hr_qp(wc->qp);
u32 hr_opcode;
int ib_opcode;
- int ret;
wc->byte_len = le32_to_cpu(cqe->byte_cnt);
@@ -4014,12 +3957,6 @@ static int fill_recv_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
else
wc->opcode = ib_opcode;
- if (is_rq_inl_enabled(wc, hr_opcode, cqe)) {
- ret = hns_roce_handle_recv_inl_wqe(cqe, qp, wc);
- if (unlikely(ret))
- return ret;
- }
-
wc->sl = hr_reg_read(cqe, CQE_SL);
wc->src_qp = hr_reg_read(cqe, CQE_RMT_QPN);
wc->slid = 0;
@@ -4445,10 +4382,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
hr_reg_write(context, QPC_RQ_DB_RECORD_ADDR_H,
upper_32_bits(hr_qp->rdb.dma));
- if (ibqp->qp_type != IB_QPT_UD && ibqp->qp_type != IB_QPT_GSI)
- hr_reg_write_bool(context, QPC_RQIE,
- hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE);
-
hr_reg_write(context, QPC_RX_CQN, get_cqn(ibqp->recv_cq));
if (ibqp->srq) {
@@ -4639,8 +4572,11 @@ static inline enum ib_mtu get_mtu(struct ib_qp *ibqp,
static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
struct hns_roce_v2_qp_context *context,
- struct hns_roce_v2_qp_context *qpc_mask)
+ struct hns_roce_v2_qp_context *qpc_mask,
+ struct ib_udata *udata)
{
+ struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct ib_device *ibdev = &hr_dev->ib_dev;
@@ -4760,6 +4696,26 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
hr_reg_write(context, QPC_LP_SGEN_INI, 3);
hr_reg_clear(qpc_mask, QPC_LP_SGEN_INI);
+ if (udata && ibqp->qp_type == IB_QPT_RC &&
+ (uctx->config & HNS_ROCE_RQ_INLINE_FLAGS)) {
+ hr_reg_write_bool(context, QPC_RQIE,
+ hr_dev->caps.flags &
+ HNS_ROCE_CAP_FLAG_RQ_INLINE);
+ hr_reg_clear(qpc_mask, QPC_RQIE);
+ }
+
+ if (udata &&
+ (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_XRC_TGT) &&
+ (uctx->config & HNS_ROCE_CQE_INLINE_FLAGS)) {
+ hr_reg_write_bool(context, QPC_CQEIE,
+ hr_dev->caps.flags &
+ HNS_ROCE_CAP_FLAG_CQE_INLINE);
+ hr_reg_clear(qpc_mask, QPC_CQEIE);
+
+ hr_reg_write(context, QPC_CQEIS, 0);
+ hr_reg_clear(qpc_mask, QPC_CQEIS);
+ }
+
return 0;
}
@@ -5107,7 +5063,8 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
enum ib_qp_state cur_state,
enum ib_qp_state new_state,
struct hns_roce_v2_qp_context *context,
- struct hns_roce_v2_qp_context *qpc_mask)
+ struct hns_roce_v2_qp_context *qpc_mask,
+ struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
int ret = 0;
@@ -5124,7 +5081,7 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
modify_qp_init_to_init(ibqp, attr, context, qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context,
- qpc_mask);
+ qpc_mask, udata);
} else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
ret = modify_qp_rtr_to_rts(ibqp, attr, attr_mask, context,
qpc_mask);
@@ -5329,7 +5286,7 @@ static void v2_set_flushed_fields(struct ib_qp *ibqp,
static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
+ enum ib_qp_state new_state, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -5352,7 +5309,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz);
ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state,
- new_state, context, qpc_mask);
+ new_state, context, qpc_mask, udata);
if (ret)
goto out;
@@ -5555,7 +5512,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
if (modify_qp_is_ok(hr_qp)) {
/* Modify qp to reset before destroying qp */
ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
- hr_qp->state, IB_QPS_RESET);
+ hr_qp->state, IB_QPS_RESET, udata);
if (ret)
ibdev_err(ibdev,
"failed to modify QP to RST, ret = %d.\n",
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index b1b3e1e0b84e..af9d00225cdf 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -531,7 +531,8 @@ struct hns_roce_v2_qp_context {
#define QPC_RQ_RTY_TX_ERR QPC_FIELD_LOC(607, 607)
#define QPC_RX_CQN QPC_FIELD_LOC(631, 608)
#define QPC_XRC_QP_TYPE QPC_FIELD_LOC(632, 632)
-#define QPC_RSV3 QPC_FIELD_LOC(634, 633)
+#define QPC_CQEIE QPC_FIELD_LOC(633, 633)
+#define QPC_CQEIS QPC_FIELD_LOC(634, 634)
#define QPC_MIN_RNR_TIME QPC_FIELD_LOC(639, 635)
#define QPC_RQ_PRODUCER_IDX QPC_FIELD_LOC(655, 640)
#define QPC_RQ_CONSUMER_IDX QPC_FIELD_LOC(671, 656)
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 8ba68ac12388..485e110ca433 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -379,6 +379,18 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
resp.max_inline_data = hr_dev->caps.max_sq_inline;
}
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ context->config |= ucmd.config & HNS_ROCE_RQ_INLINE_FLAGS;
+ if (context->config & HNS_ROCE_RQ_INLINE_FLAGS)
+ resp.config |= HNS_ROCE_RSP_RQ_INLINE_FLAGS;
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQE_INLINE) {
+ context->config |= ucmd.config & HNS_ROCE_CQE_INLINE_FLAGS;
+ if (context->config & HNS_ROCE_CQE_INLINE_FLAGS)
+ resp.config |= HNS_ROCE_RSP_CQE_INLINE_FLAGS;
+ }
+
ret = hns_roce_uar_alloc(hr_dev, &context->uar);
if (ret)
goto error_fail_uar_alloc;
@@ -443,14 +455,15 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
prot = pgprot_device(vma->vm_page_prot);
break;
default:
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE,
prot, rdma_entry);
+out:
rdma_user_mmap_entry_put(rdma_entry);
-
return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 0ae335fb205c..d855a917f4cf 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -433,7 +433,6 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
if (!has_rq) {
hr_qp->rq.wqe_cnt = 0;
hr_qp->rq.max_gs = 0;
- hr_qp->rq_inl_buf.wqe_cnt = 0;
cap->max_recv_wr = 0;
cap->max_recv_sge = 0;
@@ -463,12 +462,6 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
hr_qp->rq.max_gs);
hr_qp->rq.wqe_cnt = cnt;
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE &&
- hr_qp->ibqp.qp_type != IB_QPT_UD &&
- hr_qp->ibqp.qp_type != IB_QPT_GSI)
- hr_qp->rq_inl_buf.wqe_cnt = cnt;
- else
- hr_qp->rq_inl_buf.wqe_cnt = 0;
cap->max_recv_wr = cnt;
cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
@@ -732,49 +725,6 @@ static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
return 1;
}
-static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp,
- struct ib_qp_init_attr *init_attr)
-{
- u32 max_recv_sge = init_attr->cap.max_recv_sge;
- u32 wqe_cnt = hr_qp->rq_inl_buf.wqe_cnt;
- struct hns_roce_rinl_wqe *wqe_list;
- int i;
-
- /* allocate recv inline buf */
- wqe_list = kcalloc(wqe_cnt, sizeof(struct hns_roce_rinl_wqe),
- GFP_KERNEL);
- if (!wqe_list)
- goto err;
-
- /* Allocate a continuous buffer for all inline sge we need */
- wqe_list[0].sg_list = kcalloc(wqe_cnt, (max_recv_sge *
- sizeof(struct hns_roce_rinl_sge)),
- GFP_KERNEL);
- if (!wqe_list[0].sg_list)
- goto err_wqe_list;
-
- /* Assign buffers of sg_list to each inline wqe */
- for (i = 1; i < wqe_cnt; i++)
- wqe_list[i].sg_list = &wqe_list[0].sg_list[i * max_recv_sge];
-
- hr_qp->rq_inl_buf.wqe_list = wqe_list;
-
- return 0;
-
-err_wqe_list:
- kfree(wqe_list);
-
-err:
- return -ENOMEM;
-}
-
-static void free_rq_inline_buf(struct hns_roce_qp *hr_qp)
-{
- if (hr_qp->rq_inl_buf.wqe_list)
- kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
- kfree(hr_qp->rq_inl_buf.wqe_list);
-}
-
static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, unsigned long addr)
@@ -783,18 +733,6 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
struct hns_roce_buf_attr buf_attr = {};
int ret;
- if (!udata && hr_qp->rq_inl_buf.wqe_cnt) {
- ret = alloc_rq_inline_buf(hr_qp, init_attr);
- if (ret) {
- ibdev_err(ibdev,
- "failed to alloc inline buf, ret = %d.\n",
- ret);
- return ret;
- }
- } else {
- hr_qp->rq_inl_buf.wqe_list = NULL;
- }
-
ret = set_wqe_buf_attr(hr_dev, hr_qp, &buf_attr);
if (ret) {
ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret);
@@ -814,7 +752,6 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
return 0;
err_inline:
- free_rq_inline_buf(hr_qp);
return ret;
}
@@ -822,7 +759,6 @@ err_inline:
static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr);
- free_rq_inline_buf(hr_qp);
}
static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev,
@@ -1410,7 +1346,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
- new_state);
+ new_state, udata);
out:
mutex_unlock(&hr_qp->mutex);
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index ab246447520b..2e1e2bad0401 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -483,6 +483,8 @@ static int irdma_save_msix_info(struct irdma_pci_f *rf)
iw_qvlist->num_vectors = rf->msix_count;
if (rf->msix_count <= num_online_cpus())
rf->msix_shared = true;
+ else if (rf->msix_count > num_online_cpus() + 1)
+ rf->msix_count = num_online_cpus() + 1;
pmsix = rf->msix_entries;
for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) {
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index f6973ea55eda..1b2e3e800c9a 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -2745,6 +2745,162 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
return ret;
}
+static int irdma_reg_user_mr_type_mem(struct irdma_mr *iwmr, int access)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ bool use_pbles;
+ u32 stag;
+ int err;
+
+ use_pbles = iwmr->page_cnt != 1;
+
+ err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false);
+ if (err)
+ return err;
+
+ if (use_pbles) {
+ err = irdma_check_mr_contiguous(&iwpbl->pble_alloc,
+ iwmr->page_size);
+ if (err) {
+ irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
+ iwpbl->pbl_allocated = false;
+ }
+ }
+
+ stag = irdma_create_stag(iwdev);
+ if (!stag) {
+ err = -ENOMEM;
+ goto free_pble;
+ }
+
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ err = irdma_hwreg_mr(iwdev, iwmr, access);
+ if (err)
+ goto err_hwreg;
+
+ return 0;
+
+err_hwreg:
+ irdma_free_stag(iwdev, stag);
+
+free_pble:
+ if (iwpbl->pble_alloc.level != PBLE_LEVEL_0 && iwpbl->pbl_allocated)
+ irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
+
+ return err;
+}
+
+static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region,
+ struct ib_pd *pd, u64 virt,
+ enum irdma_memreg_type reg_type)
+{
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_pbl *iwpbl = NULL;
+ struct irdma_mr *iwmr = NULL;
+ unsigned long pgsz_bitmap;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->region = region;
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ iwmr->ibmr.iova = virt;
+ iwmr->type = reg_type;
+
+ pgsz_bitmap = (reg_type == IRDMA_MEMREG_TYPE_MEM) ?
+ iwdev->rf->sc_dev.hw_attrs.page_size_cap : PAGE_SIZE;
+
+ iwmr->page_size = ib_umem_find_best_pgsz(region, pgsz_bitmap, virt);
+ if (unlikely(!iwmr->page_size)) {
+ kfree(iwmr);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ iwmr->len = region->length;
+ iwpbl->user_base = virt;
+ iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size);
+
+ return iwmr;
+}
+
+static void irdma_free_iwmr(struct irdma_mr *iwmr)
+{
+ kfree(iwmr);
+}
+
+static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req,
+ struct ib_udata *udata,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_ucontext *ucontext = NULL;
+ unsigned long flags;
+ bool use_pbles;
+ u32 total;
+ int err;
+
+ total = req.sq_pages + req.rq_pages + 1;
+ if (total > iwmr->page_cnt)
+ return -EINVAL;
+
+ total = req.sq_pages + req.rq_pages;
+ use_pbles = total > 2;
+ err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+ if (err)
+ return err;
+
+ ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
+ iwpbl->on_list = true;
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+
+ return 0;
+}
+
+static int irdma_reg_user_mr_type_cq(struct irdma_mem_reg_req req,
+ struct ib_udata *udata,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_ucontext *ucontext = NULL;
+ u8 shadow_pgcnt = 1;
+ unsigned long flags;
+ bool use_pbles;
+ u32 total;
+ int err;
+
+ if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE)
+ shadow_pgcnt = 0;
+ total = req.cq_pages + shadow_pgcnt;
+ if (total > iwmr->page_cnt)
+ return -EINVAL;
+
+ use_pbles = req.cq_pages > 1;
+ err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+ if (err)
+ return err;
+
+ ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
+ iwpbl->on_list = true;
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+
+ return 0;
+}
+
/**
* irdma_reg_user_mr - Register a user memory region
* @pd: ptr of pd
@@ -2760,18 +2916,10 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
{
#define IRDMA_MEM_REG_MIN_REQ_LEN offsetofend(struct irdma_mem_reg_req, sq_pages)
struct irdma_device *iwdev = to_iwdev(pd->device);
- struct irdma_ucontext *ucontext;
- struct irdma_pble_alloc *palloc;
- struct irdma_pbl *iwpbl;
- struct irdma_mr *iwmr;
- struct ib_umem *region;
- struct irdma_mem_reg_req req;
- u32 total, stag = 0;
- u8 shadow_pgcnt = 1;
- bool use_pbles = false;
- unsigned long flags;
- int err = -EINVAL;
- int ret;
+ struct irdma_mem_reg_req req = {};
+ struct ib_umem *region = NULL;
+ struct irdma_mr *iwmr = NULL;
+ int err;
if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
return ERR_PTR(-EINVAL);
@@ -2792,122 +2940,80 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
return ERR_PTR(-EFAULT);
}
- iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
- if (!iwmr) {
+ iwmr = irdma_alloc_iwmr(region, pd, virt, req.reg_type);
+ if (IS_ERR(iwmr)) {
ib_umem_release(region);
- return ERR_PTR(-ENOMEM);
- }
-
- iwpbl = &iwmr->iwpbl;
- iwpbl->iwmr = iwmr;
- iwmr->region = region;
- iwmr->ibmr.pd = pd;
- iwmr->ibmr.device = pd->device;
- iwmr->ibmr.iova = virt;
- iwmr->page_size = PAGE_SIZE;
-
- if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) {
- iwmr->page_size = ib_umem_find_best_pgsz(region,
- iwdev->rf->sc_dev.hw_attrs.page_size_cap,
- virt);
- if (unlikely(!iwmr->page_size)) {
- kfree(iwmr);
- ib_umem_release(region);
- return ERR_PTR(-EOPNOTSUPP);
- }
+ return (struct ib_mr *)iwmr;
}
- iwmr->len = region->length;
- iwpbl->user_base = virt;
- palloc = &iwpbl->pble_alloc;
- iwmr->type = req.reg_type;
- iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size);
switch (req.reg_type) {
case IRDMA_MEMREG_TYPE_QP:
- total = req.sq_pages + req.rq_pages + shadow_pgcnt;
- if (total > iwmr->page_cnt) {
- err = -EINVAL;
- goto error;
- }
- total = req.sq_pages + req.rq_pages;
- use_pbles = (total > 2);
- err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+ err = irdma_reg_user_mr_type_qp(req, udata, iwmr);
if (err)
goto error;
- ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
- ibucontext);
- spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
- list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
- iwpbl->on_list = true;
- spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
break;
case IRDMA_MEMREG_TYPE_CQ:
- if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE)
- shadow_pgcnt = 0;
- total = req.cq_pages + shadow_pgcnt;
- if (total > iwmr->page_cnt) {
- err = -EINVAL;
- goto error;
- }
-
- use_pbles = (req.cq_pages > 1);
- err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+ err = irdma_reg_user_mr_type_cq(req, udata, iwmr);
if (err)
goto error;
-
- ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
- ibucontext);
- spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
- list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
- iwpbl->on_list = true;
- spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
break;
case IRDMA_MEMREG_TYPE_MEM:
- use_pbles = (iwmr->page_cnt != 1);
-
- err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false);
+ err = irdma_reg_user_mr_type_mem(iwmr, access);
if (err)
goto error;
- if (use_pbles) {
- ret = irdma_check_mr_contiguous(palloc,
- iwmr->page_size);
- if (ret) {
- irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
- iwpbl->pbl_allocated = false;
- }
- }
-
- stag = irdma_create_stag(iwdev);
- if (!stag) {
- err = -ENOMEM;
- goto error;
- }
-
- iwmr->stag = stag;
- iwmr->ibmr.rkey = stag;
- iwmr->ibmr.lkey = stag;
- err = irdma_hwreg_mr(iwdev, iwmr, access);
- if (err) {
- irdma_free_stag(iwdev, stag);
- goto error;
- }
-
break;
default:
+ err = -EINVAL;
goto error;
}
- iwmr->type = req.reg_type;
-
return &iwmr->ibmr;
-
error:
- if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated)
- irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
ib_umem_release(region);
- kfree(iwmr);
+ irdma_free_iwmr(iwmr);
+
+ return ERR_PTR(err);
+}
+
+static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
+ u64 len, u64 virt,
+ int fd, int access,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct irdma_mr *iwmr;
+ int err;
+
+ if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
+ return ERR_PTR(-EINVAL);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, access);
+ if (IS_ERR(umem_dmabuf)) {
+ err = PTR_ERR(umem_dmabuf);
+ ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err);
+ return ERR_PTR(err);
+ }
+
+ iwmr = irdma_alloc_iwmr(&umem_dmabuf->umem, pd, virt, IRDMA_MEMREG_TYPE_MEM);
+ if (IS_ERR(iwmr)) {
+ err = PTR_ERR(iwmr);
+ goto err_release;
+ }
+
+ err = irdma_reg_user_mr_type_mem(iwmr, access);
+ if (err)
+ goto err_iwmr;
+
+ return &iwmr->ibmr;
+
+err_iwmr:
+ irdma_free_iwmr(iwmr);
+
+err_release:
+ ib_umem_release(&umem_dmabuf->umem);
return ERR_PTR(err);
}
@@ -4418,6 +4524,7 @@ static const struct ib_device_ops irdma_dev_ops = {
.query_port = irdma_query_port,
.query_qp = irdma_query_qp,
.reg_user_mr = irdma_reg_user_mr,
+ .reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf,
.req_notify_cq = irdma_req_notify_cq,
.resize_cq = irdma_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd),
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 8b3bc302d6f3..7be4c3adb4e2 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -249,7 +249,8 @@ static int
mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
struct gdma_context *gc,
struct gdma_create_dma_region_req *create_req,
- size_t num_pages, mana_handle_t *gdma_region)
+ size_t num_pages, mana_handle_t *gdma_region,
+ u32 expected_status)
{
struct gdma_create_dma_region_resp create_resp = {};
unsigned int create_req_msg_size;
@@ -261,7 +262,7 @@ mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
err = mana_gd_send_request(gc, create_req_msg_size, create_req,
sizeof(create_resp), &create_resp);
- if (err || create_resp.hdr.status) {
+ if (err || create_resp.hdr.status != expected_status) {
ibdev_dbg(&dev->ib_dev,
"Failed to create DMA region: %d, 0x%x\n",
err, create_resp.hdr.status);
@@ -372,14 +373,21 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
page_addr_list = create_req->page_addr_list;
rdma_umem_for_each_dma_block(umem, &biter, page_sz) {
+ u32 expected_status = 0;
+
page_addr_list[tail++] = rdma_block_iter_dma_address(&biter);
if (tail < num_pages_to_handle)
continue;
+ if (num_pages_processed + num_pages_to_handle <
+ num_pages_total)
+ expected_status = GDMA_STATUS_MORE_ENTRIES;
+
if (!num_pages_processed) {
/* First create message */
err = mana_ib_gd_first_dma_region(dev, gc, create_req,
- tail, gdma_region);
+ tail, gdma_region,
+ expected_status);
if (err)
goto out;
@@ -392,14 +400,8 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
page_addr_list = add_req->page_addr_list;
} else {
/* Subsequent create messages */
- u32 expected_s = 0;
-
- if (num_pages_processed + num_pages_to_handle <
- num_pages_total)
- expected_s = GDMA_STATUS_MORE_ENTRIES;
-
err = mana_ib_gd_add_dma_region(dev, gc, add_req, tail,
- expected_s);
+ expected_status);
if (err)
break;
}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index dceebcd885bb..b18e9f2adc82 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -3303,6 +3303,10 @@ static int __init mlx4_ib_init(void)
if (!wq)
return -ENOMEM;
+ err = mlx4_ib_qp_event_init();
+ if (err)
+ goto clean_qp_event;
+
err = mlx4_ib_cm_init();
if (err)
goto clean_wq;
@@ -3324,6 +3328,9 @@ clean_cm:
mlx4_ib_cm_destroy();
clean_wq:
+ mlx4_ib_qp_event_cleanup();
+
+clean_qp_event:
destroy_workqueue(wq);
return err;
}
@@ -3333,6 +3340,7 @@ static void __exit mlx4_ib_cleanup(void)
mlx4_unregister_interface(&mlx4_ib_interface);
mlx4_ib_mcg_destroy();
mlx4_ib_cm_destroy();
+ mlx4_ib_qp_event_cleanup();
destroy_workqueue(wq);
}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 6a3b0f121045..17fee1e73a45 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -940,4 +940,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
int mlx4_ib_cm_init(void);
void mlx4_ib_cm_destroy(void);
+int mlx4_ib_qp_event_init(void);
+void mlx4_ib_qp_event_cleanup(void);
+
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index b17d6ebc5b70..884825b2e5f7 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -102,6 +102,14 @@ enum mlx4_ib_source_type {
MLX4_IB_RWQ_SRC = 1,
};
+struct mlx4_ib_qp_event_work {
+ struct work_struct work;
+ struct mlx4_qp *qp;
+ enum mlx4_event type;
+};
+
+static struct workqueue_struct *mlx4_ib_qp_event_wq;
+
static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
if (!mlx4_is_master(dev->dev))
@@ -200,50 +208,77 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)
}
}
+static void mlx4_ib_handle_qp_event(struct work_struct *_work)
+{
+ struct mlx4_ib_qp_event_work *qpe_work =
+ container_of(_work, struct mlx4_ib_qp_event_work, work);
+ struct ib_qp *ibqp = &to_mibqp(qpe_work->qp)->ibqp;
+ struct ib_event event = {};
+
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
+
+ switch (qpe_work->type) {
+ case MLX4_EVENT_TYPE_PATH_MIG:
+ event.event = IB_EVENT_PATH_MIG;
+ break;
+ case MLX4_EVENT_TYPE_COMM_EST:
+ event.event = IB_EVENT_COMM_EST;
+ break;
+ case MLX4_EVENT_TYPE_SQ_DRAINED:
+ event.event = IB_EVENT_SQ_DRAINED;
+ break;
+ case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+ case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
+ event.event = IB_EVENT_PATH_MIG_ERR;
+ break;
+ case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ default:
+ pr_warn("Unexpected event type %d on QP %06x\n",
+ qpe_work->type, qpe_work->qp->qpn);
+ goto out;
+ }
+
+ ibqp->event_handler(&event, ibqp->qp_context);
+
+out:
+ mlx4_put_qp(qpe_work->qp);
+ kfree(qpe_work);
+}
+
static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
{
- struct ib_event event;
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
+ struct mlx4_ib_qp_event_work *qpe_work;
if (type == MLX4_EVENT_TYPE_PATH_MIG)
to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
- if (ibqp->event_handler) {
- event.device = ibqp->device;
- event.element.qp = ibqp;
- switch (type) {
- case MLX4_EVENT_TYPE_PATH_MIG:
- event.event = IB_EVENT_PATH_MIG;
- break;
- case MLX4_EVENT_TYPE_COMM_EST:
- event.event = IB_EVENT_COMM_EST;
- break;
- case MLX4_EVENT_TYPE_SQ_DRAINED:
- event.event = IB_EVENT_SQ_DRAINED;
- break;
- case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
- event.event = IB_EVENT_QP_LAST_WQE_REACHED;
- break;
- case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
- event.event = IB_EVENT_QP_FATAL;
- break;
- case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
- event.event = IB_EVENT_PATH_MIG_ERR;
- break;
- case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- event.event = IB_EVENT_QP_REQ_ERR;
- break;
- case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
- event.event = IB_EVENT_QP_ACCESS_ERR;
- break;
- default:
- pr_warn("Unexpected event type %d "
- "on QP %06x\n", type, qp->qpn);
- return;
- }
+ if (!ibqp->event_handler)
+ goto out_no_handler;
- ibqp->event_handler(&event, ibqp->qp_context);
- }
+ qpe_work = kzalloc(sizeof(*qpe_work), GFP_ATOMIC);
+ if (!qpe_work)
+ goto out_no_handler;
+
+ qpe_work->qp = qp;
+ qpe_work->type = type;
+ INIT_WORK(&qpe_work->work, mlx4_ib_handle_qp_event);
+ queue_work(mlx4_ib_qp_event_wq, &qpe_work->work);
+ return;
+
+out_no_handler:
+ mlx4_put_qp(qp);
}
static void mlx4_ib_wq_event(struct mlx4_qp *qp, enum mlx4_event type)
@@ -4468,3 +4503,17 @@ void mlx4_ib_drain_rq(struct ib_qp *qp)
handle_drain_completion(cq, &rdrain, dev);
}
+
+int mlx4_ib_qp_event_init(void)
+{
+ mlx4_ib_qp_event_wq = alloc_ordered_workqueue("mlx4_ib_qp_event_wq", 0);
+ if (!mlx4_ib_qp_event_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_ib_qp_event_cleanup(void)
+{
+ destroy_workqueue(mlx4_ib_qp_event_wq);
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index ff3742b0460a..1d0c8d5e745b 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -5,34 +5,41 @@
#include "cmd.h"
-int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey)
+int mlx5r_cmd_query_special_mkeys(struct mlx5_ib_dev *dev)
{
u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
+ bool is_terminate, is_dump, is_null;
int err;
- MLX5_SET(query_special_contexts_in, in, opcode,
- MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
- err = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out);
- if (!err)
- *mkey = MLX5_GET(query_special_contexts_out, out,
- dump_fill_mkey);
- return err;
-}
+ is_terminate = MLX5_CAP_GEN(dev->mdev, terminate_scatter_list_mkey);
+ is_dump = MLX5_CAP_GEN(dev->mdev, dump_fill_mkey);
+ is_null = MLX5_CAP_GEN(dev->mdev, null_mkey);
-int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
-{
- u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
- int err;
+ dev->mkeys.terminate_scatter_list_mkey = MLX5_TERMINATE_SCATTER_LIST_LKEY;
+ if (!is_terminate && !is_dump && !is_null)
+ return 0;
MLX5_SET(query_special_contexts_in, in, opcode,
MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
- err = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out);
- if (!err)
- *null_mkey = MLX5_GET(query_special_contexts_out, out,
- null_mkey);
- return err;
+ err = mlx5_cmd_exec_inout(dev->mdev, query_special_contexts, in, out);
+ if (err)
+ return err;
+
+ if (is_dump)
+ dev->mkeys.dump_fill_mkey = MLX5_GET(query_special_contexts_out,
+ out, dump_fill_mkey);
+
+ if (is_null)
+ dev->mkeys.null_mkey = cpu_to_be32(
+ MLX5_GET(query_special_contexts_out, out, null_mkey));
+
+ if (is_terminate)
+ dev->mkeys.terminate_scatter_list_mkey =
+ cpu_to_be32(MLX5_GET(query_special_contexts_out, out,
+ terminate_scatter_list_mkey));
+
+ return 0;
}
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index ee46638db5de..93a971a40d11 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -37,8 +37,7 @@
#include <linux/kernel.h>
#include <linux/mlx5/driver.h>
-int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
-int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
+int mlx5r_cmd_query_special_mkeys(struct mlx5_ib_dev *dev);
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out);
int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index 290ea8ac3838..f87531318feb 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -38,6 +38,7 @@
enum mlx5_ib_cong_node_type {
MLX5_IB_RROCE_ECN_RP = 1,
MLX5_IB_RROCE_ECN_NP = 2,
+ MLX5_IB_RROCE_GENERAL = 3,
};
static const char * const mlx5_ib_dbg_cc_name[] = {
@@ -61,6 +62,8 @@ static const char * const mlx5_ib_dbg_cc_name[] = {
"np_cnp_dscp",
"np_cnp_prio_mode",
"np_cnp_prio",
+ "rtt_resp_dscp_valid",
+ "rtt_resp_dscp",
};
#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1)
@@ -84,14 +87,18 @@ static const char * const mlx5_ib_dbg_cc_name[] = {
#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3)
#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4)
+#define MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR BIT(0)
+
static enum mlx5_ib_cong_node_type
mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset)
{
- if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE &&
- param_offset <= MLX5_IB_DBG_CC_RP_GD)
+ if (param_offset <= MLX5_IB_DBG_CC_RP_GD)
return MLX5_IB_RROCE_ECN_RP;
- else
+
+ if (param_offset <= MLX5_IB_DBG_CC_NP_CNP_PRIO)
return MLX5_IB_RROCE_ECN_NP;
+
+ return MLX5_IB_RROCE_GENERAL;
}
static u32 mlx5_get_cc_param_val(void *field, int offset)
@@ -157,6 +164,12 @@ static u32 mlx5_get_cc_param_val(void *field, int offset)
case MLX5_IB_DBG_CC_NP_CNP_PRIO:
return MLX5_GET(cong_control_r_roce_ecn_np, field,
cnp_802p_prio);
+ case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID:
+ return MLX5_GET(cong_control_r_roce_general, field,
+ rtt_resp_dscp_valid);
+ case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP:
+ return MLX5_GET(cong_control_r_roce_general, field,
+ rtt_resp_dscp);
default:
return 0;
}
@@ -264,6 +277,15 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
break;
+ case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID:
+ *attr_mask |= MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR;
+ MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp_valid, var);
+ break;
+ case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP:
+ *attr_mask |= MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR;
+ MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp_valid, 1);
+ MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp, var);
+ break;
}
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 471c3455dfeb..5b988db66b8f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1756,13 +1756,9 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_ucontext *context = to_mucontext(uctx);
struct mlx5_bfreg_info *bfregi = &context->bfregi;
- int err;
if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
- err = mlx5_cmd_dump_fill_mkey(dev->mdev,
- &resp->dump_fill_mkey);
- if (err)
- return err;
+ resp->dump_fill_mkey = dev->mkeys.dump_fill_mkey;
resp->comp_mask |=
MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
}
@@ -3666,6 +3662,10 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->port[i].roce.last_port_state = IB_PORT_DOWN;
}
+ err = mlx5r_cmd_query_special_mkeys(dev);
+ if (err)
+ return err;
+
err = mlx5_ib_init_multiport_master(dev);
if (err)
return err;
@@ -4030,12 +4030,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
- int err;
-
- err = mlx5_mkey_cache_cleanup(dev);
- if (err)
- mlx5_ib_warn(dev, "mr cache cleanup failed\n");
-
+ mlx5_mkey_cache_cleanup(dev);
mlx5r_umr_resource_cleanup(dev);
}
@@ -4433,6 +4428,10 @@ static int __init mlx5_ib_init(void)
return -ENOMEM;
}
+ ret = mlx5_ib_qp_event_init();
+ if (ret)
+ goto qp_event_err;
+
mlx5_ib_odp_init();
ret = mlx5r_rep_init();
if (ret)
@@ -4450,6 +4449,8 @@ drv_err:
mp_err:
mlx5r_rep_cleanup();
rep_err:
+ mlx5_ib_qp_event_cleanup();
+qp_event_err:
destroy_workqueue(mlx5_ib_event_wq);
free_page((unsigned long)xlt_emergency_page);
return ret;
@@ -4461,6 +4462,7 @@ static void __exit mlx5_ib_cleanup(void)
auxiliary_driver_unregister(&mlx5r_mp_driver);
mlx5r_rep_cleanup();
+ mlx5_ib_qp_event_cleanup();
destroy_workqueue(mlx5_ib_event_wq);
free_page((unsigned long)xlt_emergency_page);
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 7394e7f36ba7..efa4dc6e7dee 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -617,12 +617,21 @@ enum mlx5_mkey_type {
MLX5_MKEY_INDIRECT_DEVX,
};
+struct mlx5r_cache_rb_key {
+ u8 ats:1;
+ unsigned int access_mode;
+ unsigned int access_flags;
+ unsigned int ndescs;
+};
+
struct mlx5_ib_mkey {
u32 key;
enum mlx5_mkey_type type;
unsigned int ndescs;
struct wait_queue_head wait;
refcount_t usecount;
+ /* User Mkey must hold either a rb_key or a cache_ent. */
+ struct mlx5r_cache_rb_key rb_key;
struct mlx5_cache_ent *cache_ent;
};
@@ -737,11 +746,11 @@ struct mlx5_cache_ent {
unsigned long reserved;
char name[4];
- u32 order;
- u32 access_mode;
- u32 page;
- unsigned int ndescs;
+ struct rb_node node;
+ struct mlx5r_cache_rb_key rb_key;
+
+ u8 is_tmp:1;
u8 disabled:1;
u8 fill_to_high_water:1;
@@ -771,9 +780,11 @@ struct mlx5r_async_create_mkey {
struct mlx5_mkey_cache {
struct workqueue_struct *wq;
- struct mlx5_cache_ent ent[MAX_MKEY_CACHE_ENTRIES];
- struct dentry *root;
+ struct rb_root rb_root;
+ struct mutex rb_lock;
+ struct dentry *fs_root;
unsigned long last_add;
+ struct delayed_work remove_ent_dwork;
};
struct mlx5_ib_port_resources {
@@ -877,6 +888,8 @@ enum mlx5_ib_dbg_cc_types {
MLX5_IB_DBG_CC_NP_CNP_DSCP,
MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE,
MLX5_IB_DBG_CC_NP_CNP_PRIO,
+ MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID,
+ MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP,
MLX5_IB_DBG_CC_MAX,
};
@@ -1054,6 +1067,13 @@ struct mlx5_port_caps {
u8 ext_port_cap;
};
+
+struct mlx5_special_mkeys {
+ u32 dump_fill_mkey;
+ __be32 null_mkey;
+ __be32 terminate_scatter_list_mkey;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
@@ -1084,7 +1104,6 @@ struct mlx5_ib_dev {
struct xarray odp_mkeys;
- u32 null_mkey;
struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
@@ -1113,6 +1132,7 @@ struct mlx5_ib_dev {
struct mlx5_port_caps port_caps[MLX5_MAX_PORTS];
u16 pkey_table_len;
u8 lag_ports;
+ struct mlx5_special_mkeys mkeys;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1319,11 +1339,15 @@ void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
-int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
+void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
+struct mlx5_cache_ent *
+mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key,
+ bool persistent_entry);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
- struct mlx5_cache_ent *ent,
- int access_flags);
+ int access_flags, int access_mode,
+ int ndescs);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
@@ -1347,7 +1371,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq);
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
-void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent);
+int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev);
void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
struct mlx5_ib_mr *mr, int flags);
@@ -1366,7 +1390,10 @@ static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev,
static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
-static inline void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) {}
+static inline int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
+{
+ return 0;
+}
static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
struct mlx5_ib_mr *mr, int flags) {}
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 053fe946e45a..67356f515261 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -140,19 +140,16 @@ static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out)
mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out);
}
-
-static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
- void *to_store)
+static int push_mkey_locked(struct mlx5_cache_ent *ent, bool limit_pendings,
+ void *to_store)
{
XA_STATE(xas, &ent->mkeys, 0);
void *curr;
- xa_lock_irq(&ent->mkeys);
if (limit_pendings &&
- (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) {
- xa_unlock_irq(&ent->mkeys);
+ (ent->reserved - ent->stored) > MAX_PENDING_REG_MR)
return -EAGAIN;
- }
+
while (1) {
/*
* This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version
@@ -191,6 +188,7 @@ static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
break;
xa_lock_irq(&ent->mkeys);
}
+ xa_lock_irq(&ent->mkeys);
if (xas_error(&xas))
return xas_error(&xas);
if (WARN_ON(curr))
@@ -198,6 +196,17 @@ static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
return 0;
}
+static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
+ void *to_store)
+{
+ int ret;
+
+ xa_lock_irq(&ent->mkeys);
+ ret = push_mkey_locked(ent, limit_pendings, to_store);
+ xa_unlock_irq(&ent->mkeys);
+ return ret;
+}
+
static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent)
{
void *old;
@@ -292,12 +301,14 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
- MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2,
+ (ent->rb_key.access_mode >> 2) & 0x7);
MLX5_SET(mkc, mkc, translations_octword_size,
- get_mkc_octo_size(ent->access_mode, ent->ndescs));
- MLX5_SET(mkc, mkc, log_page_size, ent->page);
+ get_mkc_octo_size(ent->rb_key.access_mode,
+ ent->rb_key.ndescs));
+ MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
}
/* Asynchronously schedule new MRs to be populated in the cache. */
@@ -515,18 +526,22 @@ static const struct file_operations limit_fops = {
static bool someone_adding(struct mlx5_mkey_cache *cache)
{
- unsigned int i;
-
- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
- struct mlx5_cache_ent *ent = &cache->ent[i];
- bool ret;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
+ bool ret;
+ mutex_lock(&cache->rb_lock);
+ for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
xa_lock_irq(&ent->mkeys);
ret = ent->stored < ent->limit;
xa_unlock_irq(&ent->mkeys);
- if (ret)
+ if (ret) {
+ mutex_unlock(&cache->rb_lock);
return true;
+ }
}
+ mutex_unlock(&cache->rb_lock);
return false;
}
@@ -539,7 +554,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
{
lockdep_assert_held(&ent->mkeys.xa_lock);
- if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
+ if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp)
return;
if (ent->stored < ent->limit) {
ent->fill_to_high_water = true;
@@ -590,8 +605,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
if (err != -EAGAIN) {
mlx5_ib_warn(
dev,
- "command failed order %d, err %d\n",
- ent->order, err);
+ "add keys command failed, err %d\n",
+ err);
queue_delayed_work(cache->wq, &ent->dwork,
msecs_to_jiffies(1000));
}
@@ -637,16 +652,99 @@ static void delayed_cache_work_func(struct work_struct *work)
__cache_work_func(ent);
}
-struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
- struct mlx5_cache_ent *ent,
- int access_flags)
+static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1,
+ struct mlx5r_cache_rb_key key2)
+{
+ int res;
+
+ res = key1.ats - key2.ats;
+ if (res)
+ return res;
+
+ res = key1.access_mode - key2.access_mode;
+ if (res)
+ return res;
+
+ res = key1.access_flags - key2.access_flags;
+ if (res)
+ return res;
+
+ /*
+ * keep ndescs the last in the compare table since the find function
+ * searches for an exact match on all properties and only closest
+ * match in size.
+ */
+ return key1.ndescs - key2.ndescs;
+}
+
+static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
+ struct mlx5_cache_ent *ent)
+{
+ struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL;
+ struct mlx5_cache_ent *cur;
+ int cmp;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ cur = rb_entry(*new, struct mlx5_cache_ent, node);
+ parent = *new;
+ cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key);
+ if (cmp > 0)
+ new = &((*new)->rb_left);
+ if (cmp < 0)
+ new = &((*new)->rb_right);
+ if (cmp == 0) {
+ mutex_unlock(&cache->rb_lock);
+ return -EEXIST;
+ }
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&ent->node, parent, new);
+ rb_insert_color(&ent->node, &cache->rb_root);
+
+ return 0;
+}
+
+static struct mlx5_cache_ent *
+mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key)
+{
+ struct rb_node *node = dev->cache.rb_root.rb_node;
+ struct mlx5_cache_ent *cur, *smallest = NULL;
+ int cmp;
+
+ /*
+ * Find the smallest ent with order >= requested_order.
+ */
+ while (node) {
+ cur = rb_entry(node, struct mlx5_cache_ent, node);
+ cmp = cache_ent_key_cmp(cur->rb_key, rb_key);
+ if (cmp > 0) {
+ smallest = cur;
+ node = node->rb_left;
+ }
+ if (cmp < 0)
+ node = node->rb_right;
+ if (cmp == 0)
+ return cur;
+ }
+
+ return (smallest &&
+ smallest->rb_key.access_mode == rb_key.access_mode &&
+ smallest->rb_key.access_flags == rb_key.access_flags &&
+ smallest->rb_key.ats == rb_key.ats) ?
+ smallest :
+ NULL;
+}
+
+static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+ struct mlx5_cache_ent *ent,
+ int access_flags)
{
struct mlx5_ib_mr *mr;
int err;
- if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
- return ERR_PTR(-EOPNOTSUPP);
-
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -677,10 +775,48 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
return mr;
}
-static void clean_keys(struct mlx5_ib_dev *dev, int c)
+static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev,
+ int access_flags)
+{
+ int ret = 0;
+
+ if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+ MLX5_CAP_GEN(dev->mdev, atomic) &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+ ret |= IB_ACCESS_REMOTE_ATOMIC;
+
+ if ((access_flags & IB_ACCESS_RELAXED_ORDERING) &&
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ ret |= IB_ACCESS_RELAXED_ORDERING;
+
+ if ((access_flags & IB_ACCESS_RELAXED_ORDERING) &&
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ ret |= IB_ACCESS_RELAXED_ORDERING;
+
+ return ret;
+}
+
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+ int access_flags, int access_mode,
+ int ndescs)
+{
+ struct mlx5r_cache_rb_key rb_key = {
+ .ndescs = ndescs,
+ .access_mode = access_mode,
+ .access_flags = get_unchangeable_access_flags(dev, access_flags)
+ };
+ struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key);
+
+ if (!ent)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return _mlx5_mr_cache_alloc(dev, ent, access_flags);
+}
+
+static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
{
- struct mlx5_mkey_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent = &cache->ent[c];
u32 mkey;
cancel_delayed_work(&ent->dwork);
@@ -699,31 +835,39 @@ static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
if (!mlx5_debugfs_root || dev->is_rep)
return;
- debugfs_remove_recursive(dev->cache.root);
- dev->cache.root = NULL;
+ debugfs_remove_recursive(dev->cache.fs_root);
+ dev->cache.fs_root = NULL;
}
-static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
+static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev,
+ struct mlx5_cache_ent *ent)
{
- struct mlx5_mkey_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent;
+ int order = order_base_2(ent->rb_key.ndescs);
struct dentry *dir;
- int i;
if (!mlx5_debugfs_root || dev->is_rep)
return;
- cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev));
+ if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+ order = MLX5_IMR_KSM_CACHE_ENTRY + 2;
- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
- ent = &cache->ent[i];
- sprintf(ent->name, "%d", ent->order);
- dir = debugfs_create_dir(ent->name, cache->root);
- debugfs_create_file("size", 0600, dir, ent, &size_fops);
- debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
- debugfs_create_ulong("cur", 0400, dir, &ent->stored);
- debugfs_create_u32("miss", 0600, dir, &ent->miss);
- }
+ sprintf(ent->name, "%d", order);
+ dir = debugfs_create_dir(ent->name, dev->cache.fs_root);
+ debugfs_create_file("size", 0600, dir, ent, &size_fops);
+ debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
+ debugfs_create_ulong("cur", 0400, dir, &ent->stored);
+ debugfs_create_u32("miss", 0600, dir, &ent->miss);
+}
+
+static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
+{
+ struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev);
+ struct mlx5_mkey_cache *cache = &dev->cache;
+
+ if (!mlx5_debugfs_root || dev->is_rep)
+ return;
+
+ cache->fs_root = debugfs_create_dir("mr_cache", dbg_root);
}
static void delay_time_func(struct timer_list *t)
@@ -733,13 +877,100 @@ static void delay_time_func(struct timer_list *t)
WRITE_ONCE(dev->fill_delay, 0);
}
+struct mlx5_cache_ent *
+mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key,
+ bool persistent_entry)
+{
+ struct mlx5_cache_ent *ent;
+ int order;
+ int ret;
+
+ ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ return ERR_PTR(-ENOMEM);
+
+ xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
+ ent->rb_key = rb_key;
+ ent->dev = dev;
+ ent->is_tmp = !persistent_entry;
+
+ INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
+
+ ret = mlx5_cache_ent_insert(&dev->cache, ent);
+ if (ret) {
+ kfree(ent);
+ return ERR_PTR(ret);
+ }
+
+ if (persistent_entry) {
+ if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+ order = MLX5_IMR_KSM_CACHE_ENTRY;
+ else
+ order = order_base_2(rb_key.ndescs) - 2;
+
+ if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
+ !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
+ mlx5r_umr_can_load_pas(dev, 0))
+ ent->limit = dev->mdev->profile.mr_cache[order].limit;
+ else
+ ent->limit = 0;
+
+ mlx5_mkey_cache_debugfs_add_ent(dev, ent);
+ } else {
+ mod_delayed_work(ent->dev->cache.wq,
+ &ent->dev->cache.remove_ent_dwork,
+ msecs_to_jiffies(30 * 1000));
+ }
+
+ return ent;
+}
+
+static void remove_ent_work_func(struct work_struct *work)
+{
+ struct mlx5_mkey_cache *cache;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *cur;
+
+ cache = container_of(work, struct mlx5_mkey_cache,
+ remove_ent_dwork.work);
+ mutex_lock(&cache->rb_lock);
+ cur = rb_last(&cache->rb_root);
+ while (cur) {
+ ent = rb_entry(cur, struct mlx5_cache_ent, node);
+ cur = rb_prev(cur);
+ mutex_unlock(&cache->rb_lock);
+
+ xa_lock_irq(&ent->mkeys);
+ if (!ent->is_tmp) {
+ xa_unlock_irq(&ent->mkeys);
+ mutex_lock(&cache->rb_lock);
+ continue;
+ }
+ xa_unlock_irq(&ent->mkeys);
+
+ clean_keys(ent->dev, ent);
+ mutex_lock(&cache->rb_lock);
+ }
+ mutex_unlock(&cache->rb_lock);
+}
+
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mkey_cache *cache = &dev->cache;
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5r_cache_rb_key rb_key = {
+ .access_mode = MLX5_MKC_ACCESS_MODE_MTT,
+ };
struct mlx5_cache_ent *ent;
+ struct rb_node *node;
+ int ret;
int i;
mutex_init(&dev->slow_path_mutex);
+ mutex_init(&dev->cache.rb_lock);
+ dev->cache.rb_root = RB_ROOT;
+ INIT_DELAYED_WORK(&dev->cache.remove_ent_dwork, remove_ent_work_func);
cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
if (!cache->wq) {
mlx5_ib_warn(dev, "failed to create work queue\n");
@@ -748,52 +979,51 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
timer_setup(&dev->delay_timer, delay_time_func, 0);
- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
- ent = &cache->ent[i];
- xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
- ent->order = i + 2;
- ent->dev = dev;
- ent->limit = 0;
-
- INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
-
- if (i > MKEY_CACHE_LAST_STD_ENTRY) {
- mlx5_odp_init_mkey_cache_entry(ent);
- continue;
+ mlx5_mkey_cache_debugfs_init(dev);
+ mutex_lock(&cache->rb_lock);
+ for (i = 0; i <= mkey_cache_max_order(dev); i++) {
+ rb_key.ndescs = 1 << (i + 2);
+ ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
+ if (IS_ERR(ent)) {
+ ret = PTR_ERR(ent);
+ goto err;
}
+ }
- if (ent->order > mkey_cache_max_order(dev))
- continue;
+ ret = mlx5_odp_init_mkey_cache(dev);
+ if (ret)
+ goto err;
- ent->page = PAGE_SHIFT;
- ent->ndescs = 1 << ent->order;
- ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
- if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
- !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
- mlx5r_umr_can_load_pas(dev, 0))
- ent->limit = dev->mdev->profile.mr_cache[i].limit;
- else
- ent->limit = 0;
+ mutex_unlock(&cache->rb_lock);
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
xa_lock_irq(&ent->mkeys);
queue_adjust_cache_locked(ent);
xa_unlock_irq(&ent->mkeys);
}
- mlx5_mkey_cache_debugfs_init(dev);
-
return 0;
+
+err:
+ mutex_unlock(&cache->rb_lock);
+ mlx5_mkey_cache_debugfs_cleanup(dev);
+ mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
+ return ret;
}
-int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
{
- unsigned int i;
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
if (!dev->cache.wq)
- return 0;
-
- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
- struct mlx5_cache_ent *ent = &dev->cache.ent[i];
+ return;
+ cancel_delayed_work_sync(&dev->cache.remove_ent_dwork);
+ mutex_lock(&dev->cache.rb_lock);
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
xa_lock_irq(&ent->mkeys);
ent->disabled = true;
xa_unlock_irq(&ent->mkeys);
@@ -803,13 +1033,18 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
mlx5_mkey_cache_debugfs_cleanup(dev);
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++)
- clean_keys(dev, i);
+ node = rb_first(root);
+ while (node) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ node = rb_next(node);
+ clean_keys(dev, ent);
+ rb_erase(&ent->node, root);
+ kfree(ent);
+ }
+ mutex_unlock(&dev->cache.rb_lock);
destroy_workqueue(dev->cache.wq);
del_timer_sync(&dev->delay_timer);
-
- return 0;
}
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
@@ -873,23 +1108,10 @@ static int get_octo_len(u64 addr, u64 len, int page_shift)
static int mkey_cache_max_order(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
- return MKEY_CACHE_LAST_STD_ENTRY + 2;
+ return MKEY_CACHE_LAST_STD_ENTRY;
return MLX5_MAX_UMR_SHIFT;
}
-static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
- unsigned int order)
-{
- struct mlx5_mkey_cache *cache = &dev->cache;
-
- if (order < cache->ent[0].order)
- return &cache->ent[0];
- order = order - cache->ent[0].order;
- if (order > MKEY_CACHE_LAST_STD_ENTRY)
- return NULL;
- return &cache->ent[order];
-}
-
static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
u64 length, int access_flags, u64 iova)
{
@@ -916,6 +1138,9 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
int access_flags)
{
+ struct mlx5r_cache_rb_key rb_key = {
+ .access_mode = MLX5_MKC_ACCESS_MODE_MTT,
+ };
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
@@ -928,22 +1153,26 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
0, iova);
if (WARN_ON(!page_size))
return ERR_PTR(-EINVAL);
- ent = mkey_cache_ent_from_order(
- dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
+
+ rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size);
+ rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags);
+ rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags);
+ ent = mkey_cache_ent_from_rb_key(dev, rb_key);
/*
- * Matches access in alloc_cache_mr(). If the MR can't come from the
- * cache then synchronously create an uncached one.
+ * If the MR can't come from the cache then synchronously create an uncached
+ * one.
*/
- if (!ent || ent->limit == 0 ||
- !mlx5r_umr_can_reconfig(dev, 0, access_flags) ||
- mlx5_umem_needs_ats(dev, umem, access_flags)) {
+ if (!ent) {
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(pd, umem, iova, access_flags, page_size, false);
mutex_unlock(&dev->slow_path_mutex);
+ if (IS_ERR(mr))
+ return mr;
+ mr->mmkey.rb_key = rb_key;
return mr;
}
- mr = mlx5_mr_cache_alloc(dev, ent, access_flags);
+ mr = _mlx5_mr_cache_alloc(dev, ent, access_flags);
if (IS_ERR(mr))
return mr;
@@ -1030,6 +1259,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
goto err_2;
}
mr->mmkey.type = MLX5_MKEY_MR;
+ mr->mmkey.ndescs = get_octo_len(iova, umem->length, mr->page_shift);
mr->umem = umem;
set_mr_fields(dev, mr, umem->length, access_flags, iova);
kvfree(in);
@@ -1372,7 +1602,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
if (WARN_ON(!*page_size))
return false;
- return (1ULL << mr->mmkey.cache_ent->order) >=
+ return (mr->mmkey.cache_ent->rb_key.ndescs) >=
ib_umem_num_dma_blocks(new_umem, *page_size);
}
@@ -1567,6 +1797,49 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
}
}
+static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mr *mr)
+{
+ struct mlx5_mkey_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ int ret;
+
+ if (mr->mmkey.cache_ent) {
+ xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
+ mr->mmkey.cache_ent->in_use--;
+ goto end;
+ }
+
+ mutex_lock(&cache->rb_lock);
+ ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key);
+ if (ent) {
+ if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) {
+ if (ent->disabled) {
+ mutex_unlock(&cache->rb_lock);
+ return -EOPNOTSUPP;
+ }
+ mr->mmkey.cache_ent = ent;
+ xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
+ mutex_unlock(&cache->rb_lock);
+ goto end;
+ }
+ }
+
+ ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false);
+ mutex_unlock(&cache->rb_lock);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
+ mr->mmkey.cache_ent = ent;
+ xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
+
+end:
+ ret = push_mkey_locked(mr->mmkey.cache_ent, false,
+ xa_mk_value(mr->mmkey.key));
+ xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
+ return ret;
+}
+
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct mlx5_ib_mr *mr = to_mmr(ibmr);
@@ -1612,16 +1885,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
}
/* Stop DMA */
- if (mr->mmkey.cache_ent) {
- xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
- mr->mmkey.cache_ent->in_use--;
- xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
-
+ if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length))
if (mlx5r_umr_revoke_mr(mr) ||
- push_mkey(mr->mmkey.cache_ent, false,
- xa_mk_value(mr->mmkey.key)))
+ cache_ent_find_and_store(dev, mr))
mr->mmkey.cache_ent = NULL;
- }
+
if (!mr->mmkey.cache_ent) {
rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
if (rc)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index e6e021af6aa9..4a04cbc5b78a 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -104,7 +104,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
if (flags & MLX5_IB_UPD_XLT_ZAP) {
for (; pklm != end; pklm++, idx++) {
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
- pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey);
+ pklm->key = mr_to_mdev(imr)->mkeys.null_mkey;
pklm->va = 0;
}
return;
@@ -137,7 +137,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
pklm->key = cpu_to_be32(mtt->ibmr.lkey);
pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE);
} else {
- pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey);
+ pklm->key = mr_to_mdev(imr)->mkeys.null_mkey;
pklm->va = 0;
}
}
@@ -417,8 +417,9 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
if (IS_ERR(odp))
return ERR_CAST(odp);
- mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[MLX5_IMR_MTT_CACHE_ENTRY],
- imr->access_flags);
+ mr = mlx5_mr_cache_alloc(dev, imr->access_flags,
+ MLX5_MKC_ACCESS_MODE_MTT,
+ MLX5_IMR_MTT_ENTRIES);
if (IS_ERR(mr)) {
ib_umem_odp_release(odp);
return mr;
@@ -492,9 +493,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
if (IS_ERR(umem_odp))
return ERR_CAST(umem_odp);
- imr = mlx5_mr_cache_alloc(dev,
- &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY],
- access_flags);
+ imr = mlx5_mr_cache_alloc(dev, access_flags, MLX5_MKC_ACCESS_MODE_KSM,
+ mlx5_imr_ksm_entries);
if (IS_ERR(imr)) {
ib_umem_odp_release(umem_odp);
return imr;
@@ -986,7 +986,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
{
int ret = 0, npages = 0;
u64 io_virt;
- u32 key;
+ __be32 key;
u32 byte_count;
size_t bcnt;
int inline_segment;
@@ -1000,7 +1000,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
struct mlx5_wqe_data_seg *dseg = wqe;
io_virt = be64_to_cpu(dseg->addr);
- key = be32_to_cpu(dseg->lkey);
+ key = dseg->lkey;
byte_count = be32_to_cpu(dseg->byte_count);
inline_segment = !!(byte_count & MLX5_INLINE_SEG);
bcnt = byte_count & ~MLX5_INLINE_SEG;
@@ -1014,7 +1014,8 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
}
/* receive WQE end of sg list. */
- if (receive_queue && bcnt == 0 && key == MLX5_INVALID_LKEY &&
+ if (receive_queue && bcnt == 0 &&
+ key == dev->mkeys.terminate_scatter_list_mkey &&
io_virt == 0)
break;
@@ -1034,7 +1035,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
continue;
}
- ret = pagefault_single_data_segment(dev, NULL, key,
+ ret = pagefault_single_data_segment(dev, NULL, be32_to_cpu(key),
io_virt, bcnt,
&pfault->bytes_committed,
bytes_mapped);
@@ -1587,26 +1588,22 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
return err;
}
-void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent)
+int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
{
- if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
- return;
+ struct mlx5r_cache_rb_key rb_key = {
+ .access_mode = MLX5_MKC_ACCESS_MODE_KSM,
+ .ndescs = mlx5_imr_ksm_entries,
+ };
+ struct mlx5_cache_ent *ent;
- switch (ent->order - 2) {
- case MLX5_IMR_MTT_CACHE_ENTRY:
- ent->page = PAGE_SHIFT;
- ent->ndescs = MLX5_IMR_MTT_ENTRIES;
- ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
- ent->limit = 0;
- break;
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return 0;
- case MLX5_IMR_KSM_CACHE_ENTRY:
- ent->page = MLX5_KSM_PAGE_SHIFT;
- ent->ndescs = mlx5_imr_ksm_entries;
- ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
- ent->limit = 0;
- break;
- }
+ ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
+ return 0;
}
static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
@@ -1615,25 +1612,15 @@ static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
- int ret = 0;
-
internal_fill_odp_caps(dev);
if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT))
- return ret;
+ return 0;
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops);
- if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
- ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
- if (ret) {
- mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret);
- return ret;
- }
- }
-
mutex_init(&dev->odp_eq_mutex);
- return ret;
+ return 0;
}
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index cf953d23d18d..7cc3b973dec7 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -71,6 +71,14 @@ struct mlx5_modify_raw_qp_param {
u32 port;
};
+struct mlx5_ib_qp_event_work {
+ struct work_struct work;
+ struct mlx5_core_qp *qp;
+ int type;
+};
+
+static struct workqueue_struct *mlx5_ib_qp_event_wq;
+
static void get_cqs(enum ib_qp_type qp_type,
struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq);
@@ -302,51 +310,120 @@ int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
return mlx5_ib_read_user_wqe_srq(srq, wqe_index, buffer, buflen, bc);
}
+static void mlx5_ib_qp_err_syndrome(struct ib_qp *ibqp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ void *pas_ext_union, *err_syn;
+ u32 *outb;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev->mdev, qpc_extension) ||
+ !MLX5_CAP_GEN(dev->mdev, qp_error_syndrome))
+ return;
+
+ outb = kzalloc(outlen, GFP_KERNEL);
+ if (!outb)
+ return;
+
+ err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen,
+ true);
+ if (err)
+ goto out;
+
+ pas_ext_union =
+ MLX5_ADDR_OF(query_qp_out, outb, qp_pas_or_qpc_ext_and_pas);
+ err_syn = MLX5_ADDR_OF(qpc_extension_and_pas_list_in, pas_ext_union,
+ qpc_data_extension.error_syndrome);
+
+ pr_err("%s/%d: QP %d error: %s (0x%x 0x%x 0x%x)\n",
+ ibqp->device->name, ibqp->port, ibqp->qp_num,
+ ib_wc_status_msg(
+ MLX5_GET(cqe_error_syndrome, err_syn, syndrome)),
+ MLX5_GET(cqe_error_syndrome, err_syn, vendor_error_syndrome),
+ MLX5_GET(cqe_error_syndrome, err_syn, hw_syndrome_type),
+ MLX5_GET(cqe_error_syndrome, err_syn, hw_error_syndrome));
+out:
+ kfree(outb);
+}
+
+static void mlx5_ib_handle_qp_event(struct work_struct *_work)
+{
+ struct mlx5_ib_qp_event_work *qpe_work =
+ container_of(_work, struct mlx5_ib_qp_event_work, work);
+ struct ib_qp *ibqp = &to_mibqp(qpe_work->qp)->ibqp;
+ struct ib_event event = {};
+
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
+ switch (qpe_work->type) {
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ event.event = IB_EVENT_PATH_MIG;
+ break;
+ case MLX5_EVENT_TYPE_COMM_EST:
+ event.event = IB_EVENT_COMM_EST;
+ break;
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ event.event = IB_EVENT_SQ_DRAINED;
+ break;
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ event.event = IB_EVENT_PATH_MIG_ERR;
+ break;
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ default:
+ pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n",
+ qpe_work->type, qpe_work->qp->qpn);
+ goto out;
+ }
+
+ if ((event.event == IB_EVENT_QP_FATAL) ||
+ (event.event == IB_EVENT_QP_ACCESS_ERR))
+ mlx5_ib_qp_err_syndrome(ibqp);
+
+ ibqp->event_handler(&event, ibqp->qp_context);
+
+out:
+ mlx5_core_res_put(&qpe_work->qp->common);
+ kfree(qpe_work);
+}
+
static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
{
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
- struct ib_event event;
+ struct mlx5_ib_qp_event_work *qpe_work;
if (type == MLX5_EVENT_TYPE_PATH_MIG) {
/* This event is only valid for trans_qps */
to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
}
- if (ibqp->event_handler) {
- event.device = ibqp->device;
- event.element.qp = ibqp;
- switch (type) {
- case MLX5_EVENT_TYPE_PATH_MIG:
- event.event = IB_EVENT_PATH_MIG;
- break;
- case MLX5_EVENT_TYPE_COMM_EST:
- event.event = IB_EVENT_COMM_EST;
- break;
- case MLX5_EVENT_TYPE_SQ_DRAINED:
- event.event = IB_EVENT_SQ_DRAINED;
- break;
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
- event.event = IB_EVENT_QP_LAST_WQE_REACHED;
- break;
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- event.event = IB_EVENT_QP_FATAL;
- break;
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
- event.event = IB_EVENT_PATH_MIG_ERR;
- break;
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- event.event = IB_EVENT_QP_REQ_ERR;
- break;
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- event.event = IB_EVENT_QP_ACCESS_ERR;
- break;
- default:
- pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
- return;
- }
+ if (!ibqp->event_handler)
+ goto out_no_handler;
- ibqp->event_handler(&event, ibqp->qp_context);
- }
+ qpe_work = kzalloc(sizeof(*qpe_work), GFP_ATOMIC);
+ if (!qpe_work)
+ goto out_no_handler;
+
+ qpe_work->qp = qp;
+ qpe_work->type = type;
+ INIT_WORK(&qpe_work->work, mlx5_ib_handle_qp_event);
+ queue_work(mlx5_ib_qp_event_wq, &qpe_work->work);
+ return;
+
+out_no_handler:
+ mlx5_core_res_put(&qp->common);
}
static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
@@ -4827,7 +4904,8 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (!outb)
return -ENOMEM;
- err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen);
+ err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen,
+ false);
if (err)
goto out;
@@ -5720,3 +5798,17 @@ out:
mutex_unlock(&mqp->mutex);
return err;
}
+
+int mlx5_ib_qp_event_init(void)
+{
+ mlx5_ib_qp_event_wq = alloc_ordered_workqueue("mlx5_ib_qp_event_wq", 0);
+ if (!mlx5_ib_qp_event_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_ib_qp_event_cleanup(void)
+{
+ destroy_workqueue(mlx5_ib_qp_event_wq);
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
index 5d4e140db99c..77f9b4a54816 100644
--- a/drivers/infiniband/hw/mlx5/qp.h
+++ b/drivers/infiniband/hw/mlx5/qp.h
@@ -20,7 +20,7 @@ int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask,
int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp);
int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct);
int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
- u32 *out, int outlen);
+ u32 *out, int outlen, bool qpc_ext);
int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
u32 *out, int outlen);
@@ -44,4 +44,6 @@ void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn);
int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn);
int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
+int mlx5_ib_qp_event_init(void);
+void mlx5_ib_qp_event_cleanup(void);
#endif /* _MLX5_IB_QP_H */
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
index 542e4c63a8de..bae0334d6e7f 100644
--- a/drivers/infiniband/hw/mlx5/qpc.c
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -135,7 +135,8 @@ static int rsc_event_notifier(struct notifier_block *nb,
case MLX5_RES_SQ:
qp = (struct mlx5_core_qp *)common;
qp->event(qp, event_type);
- break;
+ /* Need to put resource in event handler */
+ return NOTIFY_OK;
case MLX5_RES_DCT:
dct = (struct mlx5_core_dct *)common;
if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED)
@@ -504,12 +505,14 @@ void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev)
}
int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
- u32 *out, int outlen)
+ u32 *out, int outlen, bool qpc_ext)
{
u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {};
MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
MLX5_SET(query_qp_in, in, qpn, qp->qpn);
+ MLX5_SET(query_qp_in, in, qpc_ext, qpc_ext);
+
return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, outlen);
}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 09b365a98bbf..a056ea835da5 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -447,7 +447,7 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
if (i < srq->msrq.max_avail_gather) {
scat[i].byte_count = 0;
- scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
+ scat[i].lkey = dev->mkeys.terminate_scatter_list_mkey;
scat[i].addr = 0;
}
}
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index 029e9536ec28..55f4e048d947 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -636,9 +636,7 @@ int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
cur_mtt = mtt;
- rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter,
- mr->umem->sgt_append.sgt.nents,
- BIT(mr->page_shift)) {
+ rdma_umem_for_each_dma_block(mr->umem, &biter, BIT(mr->page_shift)) {
if (cur_mtt == (void *)mtt + sg.length) {
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index 855f3f4fefad..df1d1b0a3ef7 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -1252,7 +1252,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
if (i < qp->rq.max_gs) {
scat[i].byte_count = 0;
- scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
+ scat[i].lkey = dev->mkeys.terminate_scatter_list_mkey;
scat[i].addr = 0;
}