From 26e990badde40b2fb824bfa3cb9d4288a79584bc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:06 -0300 Subject: RDMA: Check attr_mask during modify_qp Each driver should check that it can support the provided attr_mask during modify_qp. IB_USER_VERBS_EX_CMD_MODIFY_QP was being used to block modify_qp_ex because the driver didn't check RATE_LIMIT. Link: https://lore.kernel.org/r/6-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 2 ++ include/uapi/rdma/ib_user_verbs.h | 14 -------------- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9bf6c319a670..0f9ce27bedcb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1234,6 +1234,8 @@ enum ib_qp_attr_mask { IB_QP_RESERVED3 = (1<<23), IB_QP_RESERVED4 = (1<<24), IB_QP_RATE_LIMIT = (1<<25), + + IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0), }; enum ib_qp_state { diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 456438c18c2c..7ee73a0652f1 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -596,20 +596,6 @@ enum { IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, }; -enum { - /* - * This value is equal to IB_QP_DEST_QPN. - */ - IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20, -}; - -enum { - /* - * This value is equal to IB_QP_RATE_LIMIT. - */ - IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25, -}; - struct ib_uverbs_ex_create_qp { __aligned_u64 user_handle; __u32 pd_handle; -- cgit v1.2.3-70-g09d2 From bd2a40cc2463766ed1a55d94a4ccbdcd621da323 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:09 -0300 Subject: RDMA/core Remove uverbs_ex_cmd_mask No driver sets it, and the core code sets a maximum mask, simply remove it. Disabled operations are now handled either by having a NULL ops pointer, or by having the common driver callbacks check for unsupported extended attributes. Link: https://lore.kernel.org/r/9-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 15 --------------- drivers/infiniband/core/uverbs_uapi.c | 5 +---- include/rdma/ib_verbs.h | 1 - 3 files changed, 1 insertion(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e5f5d656951d..939664765536 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -630,21 +630,6 @@ struct ib_device *_ib_alloc_device(size_t size) BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); - - device->uverbs_ex_cmd_mask = - BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_CQ) | - BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_QP) | - BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_WQ) | - BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | - BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL) | - BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_QP) | - BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - BIT_ULL(IB_USER_VERBS_EX_CMD_QUERY_DEVICE); - return device; } EXPORT_SYMBOL(_ib_alloc_device); diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 5addc8fae3f3..62f5bcb712cf 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -79,10 +79,7 @@ static int uapi_create_write(struct uverbs_api *uapi, method_elm->is_ex = def->write.is_ex; method_elm->handler = def->func_write; - if (def->write.is_ex) - method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask & - BIT_ULL(def->write.command_num)); - else + if (!def->write.is_ex) method_elm->disabled = !(ibdev->uverbs_cmd_mask & BIT_ULL(def->write.command_num)); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0f9ce27bedcb..c7c6b06171e9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2667,7 +2667,6 @@ struct ib_device { const struct attribute_group *groups[3]; u64 uverbs_cmd_mask; - u64 uverbs_ex_cmd_mask; char node_desc[IB_DEVICE_NODE_DESC_MAX]; __be64 node_guid; -- cgit v1.2.3-70-g09d2 From 676a80adba0131e1603ef3de5f73a19a0d3d0e65 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:11 -0300 Subject: RDMA: Remove AH from uverbs_cmd_mask Drivers that need a uverbs AH should instead set the create_user_ah() op similar to reg_user_mr(). MODIFY_AH and QUERY_AH cmds were never implemented so are just deleted. Link: https://lore.kernel.org/r/11-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 3 +++ drivers/infiniband/core/uverbs_cmd.c | 8 ++++---- drivers/infiniband/core/verbs.c | 5 ++++- drivers/infiniband/hw/bnxt_re/main.c | 10 +--------- drivers/infiniband/hw/efa/efa_main.c | 5 +---- drivers/infiniband/hw/mlx5/main.c | 5 +---- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 7 +------ drivers/infiniband/sw/rdmavt/ah.c | 1 - drivers/infiniband/sw/rdmavt/vt.c | 5 +---- drivers/infiniband/sw/rxe/rxe_verbs.c | 9 ++------- include/rdma/ib_verbs.h | 2 ++ 11 files changed, 20 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 939664765536..ce26564d4edf 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -606,6 +606,7 @@ struct ib_device *_ib_alloc_device(size_t size) BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) | BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | @@ -614,6 +615,7 @@ struct ib_device *_ib_alloc_device(size_t size) BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) | @@ -2606,6 +2608,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_qp); SET_DEVICE_OP(dev_ops, create_rwq_ind_table); SET_DEVICE_OP(dev_ops, create_srq); + SET_DEVICE_OP(dev_ops, create_user_ah); SET_DEVICE_OP(dev_ops, create_wq); SET_DEVICE_OP(dev_ops, dealloc_dm); SET_DEVICE_OP(dev_ops, dealloc_driver); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 54c3eb463da8..72b209ca77c7 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3689,13 +3689,13 @@ const struct uapi_definition uverbs_def_write_intf[] = { ib_uverbs_create_ah, UAPI_DEF_WRITE_UDATA_IO( struct ib_uverbs_create_ah, - struct ib_uverbs_create_ah_resp), - UAPI_DEF_METHOD_NEEDS_FN(create_ah)), + struct ib_uverbs_create_ah_resp)), DECLARE_UVERBS_WRITE( IB_USER_VERBS_CMD_DESTROY_AH, ib_uverbs_destroy_ah, - UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah), - UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))), + UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah)), + UAPI_DEF_OBJ_NEEDS_FN(create_user_ah), + UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)), DECLARE_UVERBS_OBJECT( UVERBS_OBJECT_COMP_CHANNEL, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 740f8454b6b4..cfcfa3ae32cf 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -533,7 +533,10 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, init_attr.flags = flags; init_attr.xmit_slave = xmit_slave; - ret = device->ops.create_ah(ah, &init_attr, udata); + if (udata) + ret = device->ops.create_user_ah(ah, &init_attr, udata); + else + ret = device->ops.create_ah(ah, &init_attr, NULL); if (ret) { kfree(ah); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 47ebbba4a207..838744b6811b 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -646,6 +646,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .create_cq = bnxt_re_create_cq, .create_qp = bnxt_re_create_qp, .create_srq = bnxt_re_create_srq, + .create_user_ah = bnxt_re_create_ah, .dealloc_driver = bnxt_re_dealloc_driver, .dealloc_pd = bnxt_re_dealloc_pd, .dealloc_ucontext = bnxt_re_dealloc_ucontext, @@ -701,15 +702,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->dev.parent = &rdev->en_dev->pdev->dev; ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; - /* User space */ - ibdev->uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */ - - rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ib_set_device_ops(ibdev, &bnxt_re_dev_ops); ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 843b44a4887b..2b3da85fb43c 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -248,6 +248,7 @@ static const struct ib_device_ops efa_dev_ops = { .create_ah = efa_create_ah, .create_cq = efa_create_cq, .create_qp = efa_create_qp, + .create_user_ah = efa_create_ah, .dealloc_pd = efa_dealloc_pd, .dealloc_ucontext = efa_dealloc_ucontext, .dereg_mr = efa_dereg_mr, @@ -308,10 +309,6 @@ static int efa_ib_device_add(struct efa_dev *dev) dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &pdev->dev; - dev->ibdev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - ib_set_device_ops(&dev->ibdev, &efa_dev_ops); err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index daa1de6497e7..e07ad30f0aca 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4022,6 +4022,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .create_cq = mlx5_ib_create_cq, .create_qp = mlx5_ib_create_qp, .create_srq = mlx5_ib_create_srq, + .create_user_ah = mlx5_ib_create_ah, .dealloc_pd = mlx5_ib_dealloc_pd, .dealloc_ucontext = mlx5_ib_dealloc_ucontext, .del_gid = mlx5_ib_del_gid, @@ -4139,10 +4140,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; int err; - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) ib_set_device_ops(&dev->ib_dev, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 5e67cd66359a..d12ec85ee226 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -154,6 +154,7 @@ static const struct ib_device_ops ocrdma_dev_ops = { .create_ah = ocrdma_create_ah, .create_cq = ocrdma_create_cq, .create_qp = ocrdma_create_qp, + .create_user_ah = ocrdma_create_ah, .dealloc_pd = ocrdma_dealloc_pd, .dealloc_ucontext = ocrdma_dealloc_ucontext, .dereg_mr = ocrdma_dereg_mr, @@ -205,12 +206,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(CREATE_AH) | - OCRDMA_UVERBS(MODIFY_AH) | - OCRDMA_UVERBS(QUERY_AH) | - OCRDMA_UVERBS(DESTROY_AH); - dev->ibdev.node_type = RDMA_NODE_IB_CA; dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = dev->eq_cnt; diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index b938c4ffa99a..f9754dcd250b 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -129,7 +129,6 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, * rvt_destory_ah - Destory an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) - * * Return: 0 on success */ int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 77afd06166fc..43fbc4e54edf 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -384,6 +384,7 @@ static const struct ib_device_ops rvt_dev_ops = { .create_cq = rvt_create_cq, .create_qp = rvt_create_qp, .create_srq = rvt_create_srq, + .create_user_ah = rvt_create_ah, .dealloc_pd = rvt_dealloc_pd, .dealloc_ucontext = rvt_dealloc_ucontext, .dereg_mr = rvt_dereg_mr, @@ -594,10 +595,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) * version, so we do all of this sort of stuff here. */ rdi->ibdev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index b3323c305a63..c282b9a92712 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1079,6 +1079,7 @@ static const struct ib_device_ops rxe_dev_ops = { .create_cq = rxe_create_cq, .create_qp = rxe_create_qp, .create_srq = rxe_create_srq, + .create_user_ah = rxe_create_ah, .dealloc_driver = rxe_dealloc, .dealloc_pd = rxe_dealloc_pd, .dealloc_ucontext = rxe_dealloc_ucontext, @@ -1141,13 +1142,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dma_set_max_seg_size(&dev->dev, UINT_MAX); dma_set_coherent_mask(&dev->dev, dma_get_required_mask(&dev->dev)); - dev->uverbs_cmd_mask |= - BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) - ; + dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); ib_set_device_ops(dev, &rxe_dev_ops); err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c7c6b06171e9..9420827d2421 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2403,6 +2403,8 @@ struct ib_device_ops { int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, struct ib_udata *udata); + int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah, u32 flags); -- cgit v1.2.3-70-g09d2 From bfb972c5e1cba88c93912f271ed5ecc114e31431 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:15:39 +0100 Subject: IB/verbs: avoid nested container_of() Nested container_of() calls work correctly but cause a warning when building with W=2. Invoking it from an inline function like in drivers/infiniband/hw/mlx5/mlx5_ib.h means we get hundreds of warnings like: include/linux/kernel.h:852:8: warning: declaration of '__mptr' shadows a previous local [-Wshadow] 852 | void *__mptr = (void *)(ptr); \ | ^~~~~~ include/rdma/uverbs_ioctl.h:651:11: note: in expansion of macro 'container_of' 651 | (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ | ^~~~~~~~~~~~ include/rdma/uverbs_ioctl.h:651:24: note: in expansion of macro 'container_of' 651 | (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ | ^~~~~~~~~~~~ drivers/infiniband/hw/mthca/mthca_qp.c:564:35: note: in expansion of macro 'rdma_udata_to_drv_context' 564 | struct mthca_ucontext *context = rdma_udata_to_drv_context( | ^~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/kernel.h:852:8: note: shadowed declaration is here 852 | void *__mptr = (void *)(ptr); \ | ^~~~~~ include/rdma/uverbs_ioctl.h:651:11: note: in expansion of macro 'container_of' 651 | (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ | ^~~~~~~~~~~~ drivers/infiniband/hw/mthca/mthca_qp.c:564:35: note: in expansion of macro 'rdma_udata_to_drv_context' 564 | struct mthca_ucontext *context = rdma_udata_to_drv_context( | ^~~~~~~~~~~~~~~~~~~~~~~~~ In file included from : include/linux/kernel.h:852:8: warning: declaration of '__mptr' shadows a previous local [-Wshadow] 852 | void *__mptr = (void *)(ptr); \ | ^~~~~~ Rewrite the macro to use an inline function internally, which makes it more readable and reduces the amount of useless output from make W=2. Fixes: 730623f4a56f ("IB/verbs: Add helper function rdma_udata_to_drv_context") Link: https://lore.kernel.org/r/20201026161549.3709175-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- include/rdma/uverbs_ioctl.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index b00270c72740..bf167ef6c688 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -647,12 +647,15 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b * 'ucontext'. * */ -#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ - (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ - driver_udata) \ - ->context, \ - drv_dev_struct, member) : \ - (drv_dev_struct *)NULL) +static inline struct uverbs_attr_bundle * +rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata) +{ + return container_of(udata, struct uverbs_attr_bundle, driver_udata); +} + +#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ + (udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \ + drv_dev_struct, member) : (drv_dev_struct *)NULL) #define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) -- cgit v1.2.3-70-g09d2 From efa968ee20248ebf8da8542f21d5d2811e86392f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Nov 2020 16:45:55 +0200 Subject: RDMA/core: Postpone uobject cleanup on failure till FD close Remove the ib_is_destroyable_retryable() concept. The idea here was to allow the drivers to forcibly clean the HW object even if they otherwise didn't want to (eg because of usecnt). This was an attempt to clean up in a world where drivers were not allowed to fail HW object destruction. Now that we are going back to allowing HW objects to fail destroy this doesn't make sense. Instead if a uobject's HW object can't be destroyed it is left on the uobject list and it is up to uverbs_destroy_ufile_hw() to clean it. Multiple passes over the uobject list allow hidden dependencies to be resolved. If that fails the HW driver is broken, throw a WARN_ON and leak the HW object memory. All the other tricky failure paths (eg on creation error unwind) have already been updated to this new model. Link: https://lore.kernel.org/r/20201104144556.3809085-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 51 ++++++++-------------- drivers/infiniband/core/uverbs_cmd.c | 5 +-- drivers/infiniband/core/uverbs_std_types.c | 15 +++---- .../infiniband/core/uverbs_std_types_counters.c | 5 +-- drivers/infiniband/core/uverbs_std_types_cq.c | 4 +- drivers/infiniband/core/uverbs_std_types_dm.c | 6 +-- .../infiniband/core/uverbs_std_types_flow_action.c | 6 +-- drivers/infiniband/core/uverbs_std_types_qp.c | 4 +- drivers/infiniband/core/uverbs_std_types_srq.c | 4 +- drivers/infiniband/core/uverbs_std_types_wq.c | 4 +- drivers/infiniband/hw/mlx5/devx.c | 4 +- drivers/infiniband/hw/mlx5/fs.c | 6 +-- include/rdma/ib_verbs.h | 44 +------------------ 13 files changed, 45 insertions(+), 113 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index ffe11b03724c..61fa0a3f8e5e 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -137,15 +137,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, } else if (uobj->object) { ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, attrs); - if (ret) { - if (ib_is_destroy_retryable(ret, reason, uobj)) - return ret; - - /* Nothing to be done, dangle the memory and move on */ - WARN(true, - "ib_uverbs: failed to remove uobject id %d, driver err=%d", - uobj->id, ret); - } + if (ret) + /* Nothing to be done, wait till ucontext will clean it */ + return ret; uobj->object = NULL; } @@ -543,12 +537,7 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, struct uverbs_obj_idr_type, type); int ret = idr_type->destroy_object(uobj, why, attrs); - /* - * We can only fail gracefully if the user requested to destroy the - * object or when a retry may be called upon an error. - * In the rest of the cases, just remove whatever you can. - */ - if (ib_is_destroy_retryable(ret, why, uobj)) + if (ret) return ret; if (why == RDMA_REMOVE_ABORT) @@ -581,12 +570,8 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, { const struct uverbs_obj_fd_type *fd_type = container_of( uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); - int ret = fd_type->destroy_object(uobj, why); - if (ib_is_destroy_retryable(ret, why, uobj)) - return ret; - - return 0; + return fd_type->destroy_object(uobj, why); } static void remove_handle_fd_uobject(struct ib_uobject *uobj) @@ -863,11 +848,18 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * racing with a lookup_get. */ WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); + if (reason == RDMA_REMOVE_DRIVER_FAILURE) + obj->object = NULL; if (!uverbs_destroy_uobject(obj, reason, &attrs)) ret = 0; else atomic_set(&obj->usecnt, 0); } + + if (reason == RDMA_REMOVE_DRIVER_FAILURE) { + WARN_ON(!list_empty(&ufile->uobjects)); + return 0; + } return ret; } @@ -889,21 +881,12 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, if (!ufile->ucontext) goto done; - ufile->ucontext->cleanup_retryable = true; - while (!list_empty(&ufile->uobjects)) - if (__uverbs_cleanup_ufile(ufile, reason)) { - /* - * No entry was cleaned-up successfully during this - * iteration. It is a driver bug to fail destruction. - */ - WARN_ON(!list_empty(&ufile->uobjects)); - break; - } - - ufile->ucontext->cleanup_retryable = false; - if (!list_empty(&ufile->uobjects)) - __uverbs_cleanup_ufile(ufile, reason); + while (!list_empty(&ufile->uobjects) && + !__uverbs_cleanup_ufile(ufile, reason)) { + } + if (WARN_ON(!list_empty(&ufile->uobjects))) + __uverbs_cleanup_ufile(ufile, RDMA_REMOVE_DRIVER_FAILURE); ufile_destroy_ucontext(ufile, reason); done: diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 72b209ca77c7..92c9d2a548f3 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -681,8 +681,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, return 0; ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata); - - if (ib_is_destroy_retryable(ret, why, uobject)) { + if (ret) { atomic_inc(&xrcd->usecnt); return ret; } @@ -690,7 +689,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, if (inode) xrcd_table_delete(dev, inode); - return ret; + return 0; } static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 0658101fca00..585042ead939 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -88,7 +88,7 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, return -EBUSY; ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; for (i = 0; i < table_size; i++) @@ -96,7 +96,7 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, kfree(rwq_ind_tbl); kfree(ind_tbl); - return ret; + return 0; } static int uverbs_free_xrcd(struct ib_uobject *uobject, @@ -108,9 +108,8 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, container_of(uobject, struct ib_uxrcd_object, uobject); int ret; - ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&uxrcd->refcnt)) + return -EBUSY; mutex_lock(&attrs->ufile->device->xrcd_tree_mutex); ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs); @@ -124,11 +123,9 @@ static int uverbs_free_pd(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_pd *pd = uobject->object; - int ret; - ret = ib_destroy_usecnt(&pd->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&pd->usecnt)) + return -EBUSY; return ib_dealloc_pd_user(pd, &attrs->driver_udata); } diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index b3c6c066b601..999da9c79866 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -42,9 +42,8 @@ static int uverbs_free_counters(struct ib_uobject *uobject, struct ib_counters *counters = uobject->object; int ret; - ret = ib_destroy_usecnt(&counters->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&counters->usecnt)) + return -EBUSY; ret = counters->device->ops.destroy_counters(counters); if (ret) diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 8dabd05988b2..370ad7c83f88 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -46,7 +46,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, int ret; ret = ib_destroy_cq_user(cq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; ib_uverbs_release_ucq( @@ -55,7 +55,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, ev_queue) : NULL, ucq); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index d5a1de33c2c9..98c522cf86d6 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -39,11 +39,9 @@ static int uverbs_free_dm(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_dm *dm = uobject->object; - int ret; - ret = ib_destroy_usecnt(&dm->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&dm->usecnt)) + return -EBUSY; return dm->device->ops.dealloc_dm(dm, attrs); } diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index 459cf165b231..d42ed7ff223e 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -39,11 +39,9 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_flow_action *action = uobject->object; - int ret; - ret = ib_destroy_usecnt(&action->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&action->usecnt)) + return -EBUSY; return action->device->ops.destroy_flow_action(action); } diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c index 3bf8dcdfe7eb..294cd29d5ced 100644 --- a/drivers/infiniband/core/uverbs_std_types_qp.c +++ b/drivers/infiniband/core/uverbs_std_types_qp.c @@ -32,14 +32,14 @@ static int uverbs_free_qp(struct ib_uobject *uobject, } ret = ib_destroy_qp_user(qp, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; if (uqp->uxrcd) atomic_dec(&uqp->uxrcd->refcnt); ib_uverbs_release_uevent(&uqp->uevent); - return ret; + return 0; } static int check_creation_flags(enum ib_qp_type qp_type, diff --git a/drivers/infiniband/core/uverbs_std_types_srq.c b/drivers/infiniband/core/uverbs_std_types_srq.c index c0ecbba26bf4..e5513f828bdc 100644 --- a/drivers/infiniband/core/uverbs_std_types_srq.c +++ b/drivers/infiniband/core/uverbs_std_types_srq.c @@ -18,7 +18,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, int ret; ret = ib_destroy_srq_user(srq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; if (srq_type == IB_SRQT_XRC) { @@ -30,7 +30,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, } ib_uverbs_release_uevent(uevent); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c index f2e6a625724a..7ded8339346f 100644 --- a/drivers/infiniband/core/uverbs_std_types_wq.c +++ b/drivers/infiniband/core/uverbs_std_types_wq.c @@ -17,11 +17,11 @@ static int uverbs_free_wq(struct ib_uobject *uobject, int ret; ret = ib_destroy_wq_user(wq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; ib_uverbs_release_uevent(&uwq->uevent); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)( diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 611ce21157de..c315dbe0b6bd 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1310,7 +1310,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, else ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; devx_event_table = &dev->devx_event_table; @@ -2181,7 +2181,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, int err; err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(err, why, uobject)) + if (err) return err; ib_umem_release(obj->umem); diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 492cfe063bca..25da0b05b4e2 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2035,11 +2035,9 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_flow_matcher *obj = uobject->object; - int ret; - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&obj->usecnt)) + return -EBUSY; kfree(obj); return 0; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9420827d2421..7e330f4a6d33 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1471,6 +1471,8 @@ enum rdma_remove_reason { RDMA_REMOVE_DRIVER_REMOVE, /* uobj is being cleaned-up before being committed */ RDMA_REMOVE_ABORT, + /* The driver failed to destroy the uobject and is being disconnected */ + RDMA_REMOVE_DRIVER_FAILURE, }; struct ib_rdmacg_object { @@ -1483,8 +1485,6 @@ struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; - bool cleanup_retryable; - struct ib_rdmacg_object cg_obj; /* * Implementation details of the RDMA core, don't use in drivers: @@ -2903,46 +2903,6 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, return ib_is_buffer_cleared(udata->inbuf + offset, len); } -/** - * ib_is_destroy_retryable - Check whether the uobject destruction - * is retryable. - * @ret: The initial destruction return code - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * This function is a helper function that IB layer and low-level drivers - * can use to consider whether the destruction of the given uobject is - * retry-able. - * It checks the original return code, if it wasn't success the destruction - * is retryable according to the ucontext state (i.e. cleanup_retryable) and - * the remove reason. (i.e. why). - * Must be called with the object locked for destroy. - */ -static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - return ret && (why == RDMA_REMOVE_DESTROY || - uobj->context->cleanup_retryable); -} - -/** - * ib_destroy_usecnt - Called during destruction to check the usecnt - * @usecnt: The usecnt atomic - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * Non-zero usecnts will block destruction unless destruction was triggered by - * a ucontext cleanup. - */ -static inline int ib_destroy_usecnt(atomic_t *usecnt, - enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj)) - return -EBUSY; - return 0; -} - /** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for -- cgit v1.2.3-70-g09d2 From c5633a72a1b8a2740bdb1495eab010f1124fd5ee Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Nov 2020 16:45:56 +0200 Subject: RDMA/core: Make FD destroy callback void All FD object destroy implementations return 0, so declare this callback void. Link: https://lore.kernel.org/r/20201104144556.3809085-3-leon@kernel.org Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 3 ++- drivers/infiniband/core/uverbs_std_types.c | 3 +-- drivers/infiniband/core/uverbs_std_types_async_fd.c | 5 ++--- drivers/infiniband/hw/mlx5/devx.c | 10 ++++------ include/rdma/uverbs_types.h | 4 ++-- 5 files changed, 11 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 61fa0a3f8e5e..c44079b9158e 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -571,7 +571,8 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, const struct uverbs_obj_fd_type *fd_type = container_of( uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); - return fd_type->destroy_object(uobj, why); + fd_type->destroy_object(uobj, why); + return 0; } static void remove_handle_fd_uobject(struct ib_uobject *uobj) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 585042ead939..13776a66e2e4 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -154,7 +154,7 @@ void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue) spin_unlock_irq(&event_queue->lock); } -static int +static void uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, enum rdma_remove_reason why) { @@ -163,7 +163,6 @@ uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, uobj); ib_uverbs_free_event_queue(&file->ev_queue); - return 0; } int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c index 61899eaf1f91..cc24cfdf7aee 100644 --- a/drivers/infiniband/core/uverbs_std_types_async_fd.c +++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c @@ -19,8 +19,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)( return 0; } -static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct ib_uverbs_async_event_file *event_file = container_of(uobj, struct ib_uverbs_async_event_file, uobj); @@ -30,7 +30,6 @@ static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, if (why == RDMA_REMOVE_DRIVER_REMOVE) ib_uverbs_async_handler(event_file, 0, IB_EVENT_DEVICE_FATAL, NULL, NULL); - return 0; } int uverbs_async_event_release(struct inode *inode, struct file *filp) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index c315dbe0b6bd..f6839472b6fb 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2594,8 +2594,8 @@ static const struct file_operations devx_async_event_fops = { .llseek = no_llseek, }; -static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_cmd_event_file *comp_ev_file = container_of(uobj, struct devx_async_cmd_event_file, @@ -2617,11 +2617,10 @@ static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, kvfree(entry); } spin_unlock_irq(&comp_ev_file->ev_queue.lock); - return 0; }; -static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_event_file *ev_file = container_of(uobj, struct devx_async_event_file, @@ -2665,7 +2664,6 @@ static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, mutex_unlock(&dev->devx_event_table.event_xa_lock); put_device(&dev->ib_dev.dev); - return 0; }; DECLARE_UVERBS_NAMED_METHOD( diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 06db27e35f40..a27e9fb4903f 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -138,8 +138,8 @@ struct uverbs_obj_fd_type { * because the driver is removed or the FD is closed. */ struct uverbs_obj_type type; - int (*destroy_object)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + void (*destroy_object)(struct ib_uobject *uobj, + enum rdma_remove_reason why); const struct file_operations *fops; const char *name; int flags; -- cgit v1.2.3-70-g09d2 From 2af29468e3b3793b49f6c4385d2cabcea43fe076 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:37 +0100 Subject: RDMA/core: Remove ib_dma_{alloc,free}_coherent These two functions are entirely unused. Link: https://lore.kernel.org/r/20201106181941.1878556-7-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7e330f4a6d33..a0f9b1c133e4 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4061,35 +4061,6 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, dma_sync_single_for_device(dev->dma_device, addr, size, dir); } -/** - * ib_dma_alloc_coherent - Allocate memory and map it for DMA - * @dev: The device for which the DMA address is requested - * @size: The size of the region to allocate in bytes - * @dma_handle: A pointer for returning the DMA address of the region - * @flag: memory allocator flags - */ -static inline void *ib_dma_alloc_coherent(struct ib_device *dev, - size_t size, - dma_addr_t *dma_handle, - gfp_t flag) -{ - return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag); -} - -/** - * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent() - * @dev: The device for which the DMA addresses were allocated - * @size: The size of the region - * @cpu_addr: the address returned by ib_dma_alloc_coherent() - * @dma_handle: the DMA address returned by ib_dma_alloc_coherent() - */ -static inline void ib_dma_free_coherent(struct ib_device *dev, - size_t size, void *cpu_addr, - dma_addr_t dma_handle) -{ - dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); -} - /* ib_reg_user_mr - register a memory region for virtual addresses from kernel * space. This function should be called when 'current' is the owning MM. */ -- cgit v1.2.3-70-g09d2 From 8ecfca68dc4cbee1272a0161e3f2fb9387dc6930 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:34 +0100 Subject: RDMA: Lift ibdev_to_node from rds to common code Lift the ibdev_to_node from rds to common code and document it. Link: https://lore.kernel.org/r/20201106181941.1878556-4-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 13 +++++++++++++ net/rds/ib.h | 7 ------- 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a0f9b1c133e4..3feb42ef82dc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4549,6 +4549,19 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) return coredev->owner; } +/** + * ibdev_to_node - return the NUMA node for a given ib_device + * @dev: device to get the NUMA node for. + */ +static inline int ibdev_to_node(struct ib_device *ibdev) +{ + struct device *parent = ibdev->dev.parent; + + if (!parent) + return NUMA_NO_NODE; + return dev_to_node(parent); +} + /** * rdma_device_to_drv_device - Helper macro to reach back to driver's * ib_device holder structure from device pointer. diff --git a/net/rds/ib.h b/net/rds/ib.h index 8dfff43cf07f..c23a11d9ad36 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -264,13 +264,6 @@ struct rds_ib_device { int *vector_load; }; -static inline int ibdev_to_node(struct ib_device *ibdev) -{ - struct device *parent; - - parent = ibdev->dev.parent; - return parent ? dev_to_node(parent) : NUMA_NO_NODE; -} #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev) /* bits for i_ack_flags */ -- cgit v1.2.3-70-g09d2 From b045db62f6f61c2f0f993696abe620379db34163 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 15 Nov 2020 13:43:05 +0200 Subject: RDMA/mlx5: Use ib_umem_find_best_pgoff() for SRQ SRQ uses a quantized and scaled page_offset, which is another variation of ib_umem_find_best_pgsz(). Add mlx5_umem_find_best_quantized_pgoff() to perform this calculation for each mailbox. A macro shows how the calculation is directly connected to the mailbox format. This new routine replaces the limited mlx5_ib_cont_pages() and mlx5_ib_get_buf_offset() pairing which would reject valid configurations rather than adjust the page_size to make it work. In turn this is much more aggressive about choosing large page sizes for these objects and when THP is enabled it will now often find a single page solution. Link: https://lore.kernel.org/r/20201115114311.136250-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mem.c | 45 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 31 +++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/srq.c | 20 ++++++++-------- include/rdma/ib_umem.h | 42 +++++++++++++++++++++++++++++++++ 4 files changed, 128 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 92e7621ec858..fd9778113d26 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -107,6 +107,51 @@ void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, } } +/* + * Compute the page shift and page_offset for mailboxes that use a quantized + * page_offset. The granulatity of the page offset scales according to page + * size. + */ +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized) +{ + const u64 page_offset_mask = (1 << page_offset_bits) - 1; + unsigned long page_size; + u64 page_offset; + + page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask); + if (!page_size) + return 0; + + /* + * page size is the largest possible page size. + * + * Reduce the page_size, and thus the page_offset and quanta, until the + * page_offset fits into the mailbox field. Once page_size < scale this + * loop is guaranteed to terminate. + */ + page_offset = ib_umem_dma_offset(umem, page_size); + while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) { + page_size /= 2; + page_offset = ib_umem_dma_offset(umem, page_size); + } + + /* + * The address is not aligned, or otherwise cannot be represented by the + * page_offset. + */ + if (!(pgsz_bitmap & page_size)) + return 0; + + *page_offset_quantized = + (unsigned long)page_offset / (page_size / scale); + if (WARN_ON(*page_offset_quantized > page_offset_mask)) + return 0; + return page_size; +} + int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) { u64 page_size; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bb44080170be..2f08a5b4a438 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -69,6 +69,37 @@ __mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits, pgsz_shift), \ iova) +static __always_inline unsigned long +__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits, + unsigned int offset_shift) +{ + unsigned int largest_offset_shift = + min_t(unsigned long, page_offset_bits - 1 + offset_shift, + BITS_PER_LONG - 1); + + return GENMASK(largest_offset_shift, offset_shift); +} + +/* + * QP/CQ/WQ/etc type commands take a page offset that satisifies: + * page_offset_quantized * (page_size/scale) = page_offset + * Which restricts allowed page sizes to ones that satisify the above. + */ +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized); +#define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), \ + GENMASK(31, order_base_2(scale)), scale, \ + page_offset_quantized) + enum { MLX5_IB_MMAP_OFFSET_START = 9, MLX5_IB_MMAP_OFFSET_END = 255, diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index cb5ad045da9a..7dfdc9e54866 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -51,8 +51,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata, struct mlx5_ib_ucontext, ibucontext); size_t ucmdlen; int err; - int page_shift; - u32 offset; + unsigned int page_offset_quantized; + unsigned int page_size; u32 uidx = MLX5_IB_DEFAULT_UIDX; ucmdlen = min(udata->inlen, sizeof(ucmd)); @@ -85,22 +85,22 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return err; } - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &page_shift); - err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, - &offset); - if (err) { + page_size = mlx5_umem_find_best_quantized_pgoff( + srq->umem, srqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { mlx5_ib_warn(dev, "bad offset\n"); goto err_umem; } - in->pas = kvcalloc(ib_umem_num_dma_blocks(srq->umem, 1UL << page_shift), + in->pas = kvcalloc(ib_umem_num_dma_blocks(srq->umem, page_size), sizeof(*in->pas), GFP_KERNEL); if (!in->pas) { err = -ENOMEM; goto err_umem; } - mlx5_ib_populate_pas(srq->umem, 1UL << page_shift, in->pas, 0); + mlx5_ib_populate_pas(srq->umem, page_size, in->pas, 0); err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); if (err) { @@ -108,8 +108,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, goto err_in; } - in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - in->page_offset = offset; + in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT; + in->page_offset = page_offset_quantized; in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 70597508c765..7752211c9638 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -34,6 +34,13 @@ static inline int ib_umem_offset(struct ib_umem *umem) return umem->address & ~PAGE_MASK; } +static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem, + unsigned long pgsz) +{ + return (sg_dma_address(umem->sg_head.sgl) + ib_umem_offset(umem)) & + (pgsz - 1); +} + static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem, unsigned long pgsz) { @@ -79,6 +86,35 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt); +/** + * ib_umem_find_best_pgoff - Find best HW page size + * + * @umem: umem struct + * @pgsz_bitmap bitmap of HW supported page sizes + * @pgoff_bitmask: Mask of bits that can be represented with an offset + * + * This is very similar to ib_umem_find_best_pgsz() except instead of accepting + * an IOVA it accepts a bitmask specifying what address bits can be represented + * with a page offset. + * + * For instance if the HW has multiple page sizes, requires 64 byte alignemnt, + * and can support aligned offsets up to 4032 then pgoff_bitmask would be + * "111111000000". + * + * If the pgoff_bitmask requires either alignment in the low bit or an + * unavailable page size for the high bits, this function returns 0. + */ +static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, + unsigned long pgsz_bitmap, + u64 pgoff_bitmask) +{ + struct scatterlist *sg = umem->sg_head.sgl; + dma_addr_t dma_addr; + + dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK); + return ib_umem_find_best_pgsz(umem, pgsz_bitmap, + dma_addr & pgoff_bitmask); +} #else /* CONFIG_INFINIBAND_USER_MEM */ @@ -101,6 +137,12 @@ static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, { return 0; } +static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, + unsigned long pgsz_bitmap, + u64 pgoff_bitmask) +{ + return 0; +} #endif /* CONFIG_INFINIBAND_USER_MEM */ -- cgit v1.2.3-70-g09d2 From 5a7a9e038b032137ae9c45d5429f18a2ffdf7d42 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:38 +0100 Subject: RDMA/core: remove use of dma_virt_ops Use the ib_dma_* helpers to skip the DMA translation instead. This removes the last user if dma_virt_ops and keeps the weird layering violation inside the RDMA core instead of burderning the DMA mapping subsystems with it. This also means the software RDMA drivers now don't have to mess with DMA parameters that are not relevant to them at all, and that in the future we can use PCI P2P transfers even for software RDMA, as there is no first fake layer of DMA mapping that the P2P DMA support. Link: https://lore.kernel.org/r/20201106181941.1878556-8-hch@lst.de Signed-off-by: Christoph Hellwig Tested-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 43 +++++++++++---------- drivers/infiniband/core/rw.c | 5 ++- drivers/infiniband/sw/rdmavt/Kconfig | 1 - drivers/infiniband/sw/rdmavt/mr.c | 6 +-- drivers/infiniband/sw/rdmavt/vt.c | 8 ---- drivers/infiniband/sw/rxe/Kconfig | 1 - drivers/infiniband/sw/rxe/rxe_verbs.c | 7 ---- drivers/infiniband/sw/rxe/rxe_verbs.h | 1 - drivers/infiniband/sw/siw/Kconfig | 1 - drivers/infiniband/sw/siw/siw.h | 1 - drivers/infiniband/sw/siw/siw_main.c | 7 ---- drivers/nvme/target/rdma.c | 3 +- include/rdma/ib_verbs.h | 73 ++++++++++++++++++++++++----------- 13 files changed, 81 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ce26564d4edf..3ab1edea6acb 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1209,25 +1209,6 @@ out: return ret; } -static void setup_dma_device(struct ib_device *device, - struct device *dma_device) -{ - /* - * If the caller does not provide a DMA capable device then the IB - * device will be used. In this case the caller should fully setup the - * ibdev for DMA. This usually means using dma_virt_ops. - */ -#ifdef CONFIG_DMA_VIRT_OPS - if (!dma_device) { - device->dev.dma_ops = &dma_virt_ops; - dma_device = &device->dev; - } -#endif - WARN_ON(!dma_device); - device->dma_device = dma_device; - WARN_ON(!device->dma_device->dma_parms); -} - /* * setup_device() allocates memory and sets up data that requires calling the * device ops, this is the only reason these actions are not done during @@ -1373,7 +1354,14 @@ int ib_register_device(struct ib_device *device, const char *name, if (ret) return ret; - setup_dma_device(device, dma_device); + /* + * If the caller does not provide a DMA capable device then the IB core + * will set up ib_sge and scatterlist structures that stash the kernel + * virtual address into the address field. + */ + WARN_ON(dma_device && !dma_device->dma_parms); + device->dma_device = dma_device; + ret = setup_device(device); if (ret) return ret; @@ -2708,6 +2696,21 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) } EXPORT_SYMBOL(ib_set_device_ops); +#ifdef CONFIG_INFINIBAND_VIRT_DMA +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + sg_dma_address(s) = (uintptr_t)sg_virt(s); + sg_dma_len(s) = s->length; + } + return nents; +} +EXPORT_SYMBOL(ib_dma_virt_map_sg); +#endif /* CONFIG_INFINIBAND_VIRT_DMA */ + static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { [RDMA_NL_LS_OP_RESOLVE] = { .doit = ib_nl_handle_resolve_resp, diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 13f43ab7220b..a96030b784eb 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -285,8 +285,11 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir) { - if (is_pci_p2pdma_page(sg_page(sg))) + if (is_pci_p2pdma_page(sg_page(sg))) { + if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) + return 0; return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + } return ib_dma_map_sg(dev, sg, sg_cnt, dir); } diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig index c8e268082952..0df48b3a6b56 100644 --- a/drivers/infiniband/sw/rdmavt/Kconfig +++ b/drivers/infiniband/sw/rdmavt/Kconfig @@ -4,6 +4,5 @@ config INFINIBAND_RDMAVT depends on INFINIBAND_VIRT_DMA depends on X86_64 depends on PCI - select DMA_VIRT_OPS help This is a common software verbs provider for RDMA networks. diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 8490fdb9c91e..90fc234f489a 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -324,8 +324,6 @@ static void __rvt_free_mr(struct rvt_mr *mr) * @acc: access flags * * Return: the memory region on success, otherwise returns an errno. - * Note that all DMA addresses should be created via the functions in - * struct dma_virt_ops. */ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) { @@ -766,7 +764,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, /* * We use LKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ if (sge->lkey == 0) { struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); @@ -877,7 +875,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, /* * We use RKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ rcu_read_lock(); if (rkey == 0) { diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 5bd817490b1f..49cec85a372a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -525,7 +525,6 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) int rvt_register_device(struct rvt_dev_info *rdi) { int ret = 0, i; - u64 dma_mask; if (!rdi) return -EINVAL; @@ -580,13 +579,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) /* Completion queues */ spin_lock_init(&rdi->n_cqs_lock); - /* DMA Operations */ - rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms; - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, dma_mask); - if (ret) - goto bail_wss; - /* Protection Domain */ spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index 8810bfa68049..452149066792 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -5,7 +5,6 @@ config RDMA_RXE depends on INFINIBAND_VIRT_DMA select NET_UDP_TUNNEL select CRYPTO_CRC32 - select DMA_VIRT_OPS help This driver implements the InfiniBand RDMA transport over the Linux network stack. It enables a system with a diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index bbc89002c456..a2bd91aaa5de 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1128,7 +1128,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) int err; struct ib_device *dev = &rxe->ib_dev; struct crypto_shash *tfm; - u64 dma_mask; strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); @@ -1139,12 +1138,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); - dev->dev.dma_parms = &rxe->dma_parms; - dma_set_max_seg_size(&dev->dev, UINT_MAX); - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - err = dma_coerce_mask_and_coherent(&dev->dev, dma_mask); - if (err) - return err; dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 57967fc39c04..79e0a5a878da 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -351,7 +351,6 @@ struct rxe_port { struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; - struct device_dma_parameters dma_parms; int max_ucontext; int max_inline_data; struct mutex usdev_lock; diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index 3450ba5081df..1b5105cbabae 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -2,7 +2,6 @@ config RDMA_SIW tristate "Software RDMA over TCP/IP (iWARP) driver" depends on INET && INFINIBAND && LIBCRC32C depends on INFINIBAND_VIRT_DMA - select DMA_VIRT_OPS help This driver implements the iWARP RDMA transport over the Linux TCP/IP network stack. It enables a system with a diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index e9753831ac3f..adda78996219 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -69,7 +69,6 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; - struct device_dma_parameters dma_parms; struct net_device *netdev; struct siw_dev_cap attrs; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index ac6756227677..d9de062852c4 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -306,7 +306,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) struct siw_device *sdev = NULL; struct ib_device *base_dev; struct device *parent = netdev->dev.parent; - u64 dma_mask; int rv; if (!parent) { @@ -361,12 +360,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) */ base_dev->phys_port_cnt = 1; base_dev->dev.parent = parent; - base_dev->dev.dma_parms = &sdev->dma_parms; - dma_set_max_seg_size(&base_dev->dev, UINT_MAX); - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - if (dma_coerce_mask_and_coherent(&base_dev->dev, dma_mask)) - goto error; - base_dev->num_comp_vectors = num_possible_cpus(); xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ae6620489457..5c1e7cb7fe0d 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -414,7 +414,8 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) goto out_free_rsp; - r->req.p2p_client = &ndev->device->dev; + if (!ib_uses_virt_dma(ndev->device)) + r->req.p2p_client = &ndev->device->dev; r->send_sge.length = sizeof(*r->req.cqe); r->send_sge.lkey = ndev->pd->local_dma_lkey; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3feb42ef82dc..174c1bffa00c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3906,6 +3906,16 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) -ENOSYS; } +/* + * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to + * NULL. This causes the ib_dma* helpers to just stash the kernel virtual + * address into the dma address. + */ +static inline bool ib_uses_virt_dma(struct ib_device *dev) +{ + return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device; +} + /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created @@ -3913,6 +3923,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { + if (ib_uses_virt_dma(dev)) + return 0; return dma_mapping_error(dev->dma_device, dma_addr); } @@ -3927,6 +3939,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)cpu_addr; return dma_map_single(dev->dma_device, cpu_addr, size, direction); } @@ -3941,7 +3955,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_single(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_single(dev->dma_device, addr, size, direction); } /** @@ -3958,6 +3973,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)(page_address(page) + offset); return dma_map_page(dev->dma_device, page, offset, size, direction); } @@ -3972,7 +3989,30 @@ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_page(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_page(dev->dma_device, addr, size, direction); +} + +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents); +static inline int ib_dma_map_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (ib_uses_virt_dma(dev)) + return ib_dma_virt_map_sg(dev, sg, nents); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); +} + +static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (!ib_uses_virt_dma(dev)) + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } /** @@ -3986,7 +4026,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - return dma_map_sg(dev->dma_device, sg, nents, direction); + return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4000,24 +4040,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - dma_unmap_sg(dev->dma_device, sg, nents, direction); -} - -static inline int ib_dma_map_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, - dma_attrs); -} - -static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); + ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4028,6 +4051,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, */ static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { + if (ib_uses_virt_dma(dev)) + return UINT_MAX; return dma_get_max_seg_size(dev->dma_device); } @@ -4043,7 +4068,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** @@ -4058,7 +4084,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_device(dev->dma_device, addr, size, dir); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /* ib_reg_user_mr - register a memory region for virtual addresses from kernel -- cgit v1.2.3-70-g09d2 From 172292be01dbd6c26aba23f62e8ec090f31cdb71 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:41 +0100 Subject: dma-mapping: remove dma_virt_ops Now that the RDMA core deals with devices that only do DMA mapping in lower layers properly, there is no user for dma_virt_ops and it can be removed. Link: https://lore.kernel.org/r/20201106181941.1878556-11-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/dma-mapping.h | 2 -- kernel/dma/Kconfig | 5 ---- kernel/dma/Makefile | 1 - kernel/dma/virt.c | 61 --------------------------------------------- 4 files changed, 69 deletions(-) delete mode 100644 kernel/dma/virt.c (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 956151052d45..2aaed35b556d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -565,6 +565,4 @@ static inline int dma_mmap_wc(struct device *dev, int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, dma_addr_t dma_start, u64 size); -extern const struct dma_map_ops dma_virt_ops; - #endif /* _LINUX_DMA_MAPPING_H */ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index c99de4a21458..fd2db2665fc6 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -75,11 +75,6 @@ config ARCH_HAS_DMA_PREP_COHERENT config ARCH_HAS_FORCE_DMA_UNENCRYPTED bool -config DMA_VIRT_OPS - bool - depends on HAS_DMA - select DMA_OPS - config SWIOTLB bool select NEED_DMA_MAP_STATE diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index dc755ab68aab..cd1d86358a7a 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_DMA_OPS) += ops_helpers.o obj-$(CONFIG_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o -obj-$(CONFIG_DMA_VIRT_OPS) += virt.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c deleted file mode 100644 index 59d32317dd57..000000000000 --- a/kernel/dma/virt.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations that map to virtual addresses without flushing memory. - */ -#include -#include -#include -#include - -static void *dma_virt_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - ret = (void *)__get_free_pages(gfp | __GFP_ZERO, get_order(size)); - if (ret) - *dma_handle = (uintptr_t)ret; - return ret; -} - -static void dma_virt_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, - unsigned long attrs) -{ - free_pages((unsigned long)cpu_addr, get_order(size)); -} - -static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return (uintptr_t)(page_address(page) + offset); -} - -static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - sg_dma_address(sg) = (uintptr_t)sg_virt(sg); - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -const struct dma_map_ops dma_virt_ops = { - .alloc = dma_virt_alloc, - .free = dma_virt_free, - .map_page = dma_virt_map_page, - .map_sg = dma_virt_map_sg, - .alloc_pages = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, -}; -EXPORT_SYMBOL(dma_virt_ops); -- cgit v1.2.3-70-g09d2 From 2b1f747071c5ce5ad571d80c1541b732cf07f9c1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 17 Nov 2020 09:01:47 +0200 Subject: RDMA/core: Allow drivers to disable restrack DB Driver QP types are special case with no IBTA restrictions. For example, EFA implemented creation of this QP type as regular one, while mlx5 separated create to two step: create and modify. That separation causes to the situation where DC QP (mlx5) is always added to the same xarray index zero. This change allows to drivers like mlx5 simply disable restrack DB tracking, but it doesn't disable kref on the memory. Fixes: 52e0a118a203 ("RDMA/restrack: Track driver QP types in resource tracker") Link: https://lore.kernel.org/r/20201117070148.1974114-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 2 +- drivers/infiniband/core/restrack.c | 12 ++++++++++-- drivers/infiniband/hw/mlx4/qp.c | 5 +++++ drivers/infiniband/hw/mlx5/qp.c | 2 +- include/rdma/restrack.h | 24 ++++++++++++++++++++++++ 5 files changed, 41 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 2c67ba6a2725..92745522250e 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -285,7 +285,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; - if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) + if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res)) return 0; if (!rdma_is_port_valid(dev, port)) diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index a123b22b454a..e0a41c867002 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -221,11 +221,14 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); struct rdma_restrack_root *rt; - int ret; + int ret = 0; if (!dev) return; + if (res->no_track) + goto out; + rt = &dev->res[res->type]; if (res->type == RDMA_RESTRACK_QP) { @@ -253,6 +256,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) &rt->next_id, GFP_KERNEL); } +out: if (!ret) res->valid = true; } @@ -325,6 +329,9 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) return; } + if (res->no_track) + goto out; + dev = res_to_dev(res); if (WARN_ON(!dev)) return; @@ -335,8 +342,9 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP) return; WARN_ON(old != res); - res->valid = false; +out: + res->valid = false; rdma_restrack_put(res); wait_for_completion(&res->comp); } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 7636e7f14858..651785bd57f2 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1561,6 +1561,11 @@ static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp, if (err) return err; + if (init_attr->create_flags & + (MLX4_IB_SRIOV_SQP | MLX4_IB_SRIOV_TUNNEL_QP)) + /* Internal QP created with ib_create_qp */ + rdma_restrack_no_track(&qp->ibqp.res); + qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8d40c929b2f4..9d32f3173231 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2410,7 +2410,7 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd, } qp->state = IB_QPS_RESET; - + rdma_restrack_no_track(&qp->ibqp.res); return 0; } diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index d3a1cc5be7bc..05e18839eaff 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -68,6 +68,14 @@ struct rdma_restrack_entry { * As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI */ bool valid; + /** + * @no_track: don't add this entry to restrack DB + * + * This field is used to mark an entry that doesn't need to be added to + * internal restrack DB and presented later to the users at the nldev + * query stage. + */ + u8 no_track : 1; /* * @kref: Protect destroy of the resource */ @@ -145,4 +153,20 @@ int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id); + +/** + * rdma_restrack_no_track() - don't add resource to the DB + * @res: resource entry + * + * Every user of thie API should be cross examined. + * Probaby you don't need to use this function. + */ +static inline void rdma_restrack_no_track(struct rdma_restrack_entry *res) +{ + res->no_track = true; +} +static inline bool rdma_restrack_is_tracked(struct rdma_restrack_entry *res) +{ + return !res->no_track; +} #endif /* _RDMA_RESTRACK_H_ */ -- cgit v1.2.3-70-g09d2 From 66f57b871efc576dfe8117b65af4e805e03ea689 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 17 Nov 2020 09:01:48 +0200 Subject: RDMA/restrack: Support all QP types The latest changes in restrack name handling allowed to simplify the QP creation code to support all types of QPs. For example XRC QP are presented with rdmatool. $ ibv_xsrq_pingpong & $ rdma res show qp link ibp0s9/1 lqpn 0 type SMI state RTS sq-psn 0 comm [ib_core] link ibp0s9/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core] link ibp0s9/1 lqpn 7 type UD state RTS sq-psn 0 comm [mlx5_ib] link ibp0s9/1 lqpn 42 type XRC_TGT state INIT sq-psn 0 path-mig-state MIGRATED comm [ib_uverbs] link ibp0s9/1 lqpn 43 type XRC_INI state INIT sq-psn 0 path-mig-state MIGRATED pdn 197 pid 419 comm ibv_xsrq_pingpong Link: https://lore.kernel.org/r/20201117070148.1974114-4-leon@kernel.org Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 25 +++++++------------------ drivers/infiniband/core/uverbs_cmd.c | 4 ++-- drivers/infiniband/core/uverbs_std_types_qp.c | 4 ++-- drivers/infiniband/core/verbs.c | 11 ++++++----- include/rdma/ib_verbs.h | 10 ++++++++-- 5 files changed, 25 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 7c4752c47f80..baa86c86efad 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -318,15 +318,12 @@ struct ib_device *ib_device_get_by_index(const struct net *net, u32 index); void nldev_init(void); void nldev_exit(void); -static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, - struct ib_pd *pd, - struct ib_qp_init_attr *attr, - struct ib_udata *udata, - struct ib_uqp_object *uobj) +static inline struct ib_qp * +_ib_create_qp(struct ib_device *dev, struct ib_pd *pd, + struct ib_qp_init_attr *attr, struct ib_udata *udata, + struct ib_uqp_object *uobj, const char *caller) { - enum ib_qp_type qp_type = attr->qp_type; struct ib_qp *qp; - bool is_xrc; if (!dev->ops.create_qp) return ERR_PTR(-EOPNOTSUPP); @@ -347,7 +344,6 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->srq = attr->srq; qp->rwq_ind_tbl = attr->rwq_ind_tbl; qp->event_handler = attr->event_handler; - qp->qp_type = attr->qp_type; qp->port = attr->port_num; atomic_set(&qp->usecnt, 0); @@ -356,16 +352,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, INIT_LIST_HEAD(&qp->sig_mrs); rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP); - /* - * We don't track XRC QPs for now, because they don't have PD - * and more importantly they are created internaly by driver, - * see mlx5 create_dev_resources() as an example. - */ - is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT; - if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { - rdma_restrack_parent_name(&qp->res, &pd->res); - rdma_restrack_add(&qp->res); - } + WARN_ONCE(!udata && !caller, "Missing kernel QP owner"); + rdma_restrack_set_name(&qp->res, udata ? NULL : caller); + rdma_restrack_add(&qp->res); return qp; } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 92c9d2a548f3..402d0b8bf58e 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1400,8 +1400,8 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - obj); + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj, + NULL); if (IS_ERR(qp)) { ret = PTR_ERR(qp); diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c index 294cd29d5ced..c00cfb5ed387 100644 --- a/drivers/infiniband/core/uverbs_std_types_qp.c +++ b/drivers/infiniband/core/uverbs_std_types_qp.c @@ -251,8 +251,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)( if (attr.qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - obj); + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj, + NULL); if (IS_ERR(qp)) { ret = PTR_ERR(qp); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 33778f8674a1..5d4c7c263665 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1191,7 +1191,7 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, } /** - * ib_create_qp - Creates a kernel QP associated with the specified protection + * ib_create_named_qp - Creates a kernel QP associated with the specified protection * domain. * @pd: The protection domain associated with the QP. * @qp_init_attr: A list of initial attributes required to create the @@ -1200,8 +1200,9 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, * * NOTE: for user qp use ib_create_qp_user with valid udata! */ -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr) +struct ib_qp *ib_create_named_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller) { struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; struct ib_qp *qp; @@ -1226,7 +1227,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (qp_init_attr->cap.max_rdma_ctxs) rdma_rw_init_qp(device, qp_init_attr); - qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL); + qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL, caller); if (IS_ERR(qp)) return qp; @@ -1292,7 +1293,7 @@ err: return ERR_PTR(ret); } -EXPORT_SYMBOL(ib_create_qp); +EXPORT_SYMBOL(ib_create_named_qp); static const struct { int valid; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 174c1bffa00c..7bee8abae35c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3618,8 +3618,14 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, bad_recv_wr ? : &dummy); } -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); +struct ib_qp *ib_create_named_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller); +static inline struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr) +{ + return ib_create_named_qp(pd, init_attr, KBUILD_MODNAME); +} /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. -- cgit v1.2.3-70-g09d2 From adac4cb3c1ff5c47c9f47be5d017a0e054176e3c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 30 Nov 2020 09:58:36 +0200 Subject: RDMA/uverbs: Check ODP in ib_check_mr_access() as well No reason only one caller checks this. This properly blocks ODP from the rereg flow if the device does not support ODP. Link: https://lore.kernel.org/r/20201130075839.278575-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 19 +++++-------------- drivers/infiniband/core/uverbs_std_types_mr.c | 2 +- drivers/infiniband/hw/mlx5/devx.c | 2 +- include/rdma/ib_verbs.h | 6 +++++- 4 files changed, 12 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 143a0e33fe52..817b25045acd 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -709,29 +709,20 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; - ret = ib_check_mr_access(cmd.access_flags); - if (ret) - return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); + ret = ib_check_mr_access(ib_dev, cmd.access_flags); + if (ret) + goto err_free; + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_free; } - if (cmd.access_flags & IB_ACCESS_ON_DEMAND) { - if (!(pd->device->attrs.device_cap_flags & - IB_DEVICE_ON_DEMAND_PAGING)) { - pr_debug("ODP support not available\n"); - ret = -EINVAL; - goto err_put; - } - } - mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, cmd.access_flags, &attrs->driver_udata); @@ -805,7 +796,7 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) } if (cmd.flags & IB_MR_REREG_ACCESS) { - ret = ib_check_mr_access(cmd.access_flags); + ret = ib_check_mr_access(mr->device, cmd.access_flags); if (ret) goto put_uobjs; } diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index dc5856441729..dd4e76b26c74 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -115,7 +115,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( if (!(attr.access_flags & IB_ZERO_BASED)) return -EINVAL; - ret = ib_check_mr_access(attr.access_flags); + ret = ib_check_mr_access(ib_dev, attr.access_flags); if (ret) return ret; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index ad0173f62c0e..819c142857d6 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2068,7 +2068,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - err = ib_check_mr_access(access); + err = ib_check_mr_access(&dev->ib_dev, access); if (err) return err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7bee8abae35c..4fcbc6d3d0e0 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4183,7 +4183,8 @@ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, struct inode *inode, struct ib_udata *udata); int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata); -static inline int ib_check_mr_access(int flags) +static inline int ib_check_mr_access(struct ib_device *ib_dev, + unsigned int flags) { /* * Local write permission is required if remote write or @@ -4196,6 +4197,9 @@ static inline int ib_check_mr_access(int flags) if (flags & ~IB_ACCESS_SUPPORTED) return -EINVAL; + if (flags & IB_ACCESS_ON_DEMAND && + !(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) + return -EINVAL; return 0; } -- cgit v1.2.3-70-g09d2 From 6e0954b11c056570cb29676a84e2f8dc4d1dd05e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 30 Nov 2020 09:58:37 +0200 Subject: RDMA/uverbs: Allow drivers to create a new HW object during rereg_mr mlx5 has an ugly flow where it tries to allocate a new MR and replace the existing MR in the same memory during rereg. This is very complicated and buggy. Instead of trying to replace in-place inside the driver, provide support from uverbs to change the entire HW object assigned to a handle during rereg_mr. Since destroying a MR is allowed to fail (ie if a MW is pointing at it) and can't be detected in advance, the algorithm creates a completely new uobject to hold the new MR and swaps the IDR entries of the two objects. The old MR in the temporary IDR entry is destroyed, and if it fails rereg_mr succeeds and destruction is deferred to FD release. This complexity is why this cannot live in a driver safely. Link: https://lore.kernel.org/r/20201130075839.278575-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 51 +++++++++++++++++ drivers/infiniband/core/uverbs_cmd.c | 85 ++++++++++++++++++++++------- drivers/infiniband/hw/hns/hns_roce_device.h | 7 ++- drivers/infiniband/hw/hns/hns_roce_mr.c | 15 ++--- drivers/infiniband/hw/mlx4/mlx4_ib.h | 8 +-- drivers/infiniband/hw/mlx4/mr.c | 16 +++--- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +- drivers/infiniband/hw/mlx5/mr.c | 15 ++--- include/rdma/ib_verbs.h | 7 ++- include/rdma/uverbs_types.h | 5 ++ 10 files changed, 160 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index c44079b9158e..75eafd9208aa 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -595,6 +595,27 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj) WARN_ON(old != NULL); } +static void swap_idr_uobjects(struct ib_uobject *obj_old, + struct ib_uobject *obj_new) +{ + struct ib_uverbs_file *ufile = obj_old->ufile; + void *old; + + /* + * New must be an object that been allocated but not yet committed, this + * moves the pre-committed state to obj_old, new still must be comitted. + */ + old = xa_cmpxchg(&ufile->idr, obj_old->id, obj_old, XA_ZERO_ENTRY, + GFP_KERNEL); + if (WARN_ON(old != obj_old)) + return; + + swap(obj_old->id, obj_new->id); + + old = xa_cmpxchg(&ufile->idr, obj_old->id, NULL, obj_old, GFP_KERNEL); + WARN_ON(old != NULL); +} + static void alloc_commit_fd_uobject(struct ib_uobject *uobj) { int fd = uobj->id; @@ -640,6 +661,35 @@ void rdma_alloc_commit_uobject(struct ib_uobject *uobj, up_read(&ufile->hw_destroy_rwsem); } +/* + * new_uobj will be assigned to the handle currently used by to_uobj, and + * to_uobj will be destroyed. + * + * Upon return the caller must do: + * rdma_alloc_commit_uobject(new_uobj) + * uobj_put_destroy(to_uobj) + * + * to_uobj must have a write get but the put mode switches to destroy once + * this is called. + */ +void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj, + struct uverbs_attr_bundle *attrs) +{ + assert_uverbs_usecnt(new_uobj, UVERBS_LOOKUP_WRITE); + + if (WARN_ON(to_uobj->uapi_object != new_uobj->uapi_object || + !to_uobj->uapi_object->type_class->swap_uobjects)) + return; + + to_uobj->uapi_object->type_class->swap_uobjects(to_uobj, new_uobj); + + /* + * If this fails then the uobject is still completely valid (though with + * a new ID) and we leak it until context close. + */ + uverbs_destroy_uobject(to_uobj, RDMA_REMOVE_DESTROY, attrs); +} + /* * This consumes the kref for uobj. It is up to the caller to unwind the HW * object and anything else connected to uobj before calling this. @@ -747,6 +797,7 @@ const struct uverbs_obj_type_class uverbs_idr_class = { .lookup_put = lookup_put_idr_uobject, .destroy_hw = destroy_hw_idr_uobject, .remove_handle = remove_handle_idr_uobject, + .swap_uobjects = swap_idr_uobjects, }; EXPORT_SYMBOL(uverbs_idr_class); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 817b25045acd..98a5d36813ff 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -764,11 +764,14 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_rereg_mr cmd; struct ib_uverbs_rereg_mr_resp resp; - struct ib_pd *pd = NULL; struct ib_mr *mr; - struct ib_pd *old_pd; int ret; struct ib_uobject *uobj; + struct ib_uobject *new_uobj; + struct ib_device *ib_dev; + struct ib_pd *orig_pd; + struct ib_pd *new_pd; + struct ib_mr *new_mr; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) @@ -801,31 +804,69 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) goto put_uobjs; } + orig_pd = mr->pd; if (cmd.flags & IB_MR_REREG_PD) { - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, - attrs); - if (!pd) { + new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, + attrs); + if (!new_pd) { ret = -EINVAL; goto put_uobjs; } + } else { + new_pd = mr->pd; } - old_pd = mr->pd; - ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start, - cmd.length, cmd.hca_va, - cmd.access_flags, pd, - &attrs->driver_udata); - if (ret) + /* + * The driver might create a new HW object as part of the rereg, we need + * to have a uobject ready to hold it. + */ + new_uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); + if (IS_ERR(new_uobj)) { + ret = PTR_ERR(new_uobj); goto put_uobj_pd; - - if (cmd.flags & IB_MR_REREG_PD) { - atomic_inc(&pd->usecnt); - mr->pd = pd; - atomic_dec(&old_pd->usecnt); } - if (cmd.flags & IB_MR_REREG_TRANS) - mr->iova = cmd.hca_va; + new_mr = ib_dev->ops.rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length, + cmd.hca_va, cmd.access_flags, new_pd, + &attrs->driver_udata); + if (IS_ERR(new_mr)) { + ret = PTR_ERR(new_mr); + goto put_new_uobj; + } + if (new_mr) { + new_mr->device = new_pd->device; + new_mr->pd = new_pd; + new_mr->type = IB_MR_TYPE_USER; + new_mr->dm = NULL; + new_mr->sig_attrs = NULL; + new_mr->uobject = uobj; + atomic_inc(&new_pd->usecnt); + new_mr->iova = cmd.hca_va; + new_uobj->object = new_mr; + + rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR); + rdma_restrack_set_name(&new_mr->res, NULL); + rdma_restrack_add(&new_mr->res); + + /* + * The new uobj for the new HW object is put into the same spot + * in the IDR and the old uobj & HW object is deleted. + */ + rdma_assign_uobject(uobj, new_uobj, attrs); + rdma_alloc_commit_uobject(new_uobj, attrs); + uobj_put_destroy(uobj); + new_uobj = NULL; + uobj = NULL; + mr = new_mr; + } else { + if (cmd.flags & IB_MR_REREG_PD) { + atomic_dec(&orig_pd->usecnt); + mr->pd = new_pd; + atomic_inc(&new_pd->usecnt); + } + if (cmd.flags & IB_MR_REREG_TRANS) + mr->iova = cmd.hca_va; + } memset(&resp, 0, sizeof(resp)); resp.lkey = mr->lkey; @@ -833,12 +874,16 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) ret = uverbs_response(attrs, &resp, sizeof(resp)); +put_new_uobj: + if (new_uobj) + uobj_alloc_abort(new_uobj, attrs); put_uobj_pd: if (cmd.flags & IB_MR_REREG_PD) - uobj_put_obj_read(pd); + uobj_put_obj_read(new_pd); put_uobjs: - uobj_put_write(uobj); + if (uobj) + uobj_put_write(uobj); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a5c6bb073569..70ae37bad77e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1223,9 +1223,10 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); -int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata); +struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 87e2e6236c69..98671925debf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -328,9 +328,10 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, return ret; } -int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata) +struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct ib_device *ib_dev = &hr_dev->ib_dev; @@ -341,11 +342,11 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, int ret; if (!mr->enabled) - return -EINVAL; + return ERR_PTR(-EINVAL); mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + return ERR_CAST(mailbox); mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0, @@ -390,12 +391,12 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return 0; + return NULL; free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return ret; + return ERR_PTR(ret); } int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 58df06492d69..78c9bb79ec75 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -908,10 +908,10 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach); -int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, - u64 start, u64 length, u64 virt_addr, - int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata); +struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, const struct ib_gid_attr *attr); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 426fed005d53..50becc0e4b62 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -456,10 +456,10 @@ err_free: return ERR_PTR(err); } -int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, - u64 start, u64 length, u64 virt_addr, - int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata) +struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(mr->device); struct mlx4_ib_mr *mmr = to_mmr(mr); @@ -472,9 +472,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, * race exists. */ err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry); - if (err) - return err; + return ERR_PTR(err); if (flags & IB_MR_REREG_PD) { err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry, @@ -542,8 +541,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, release_mpt_entry: mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry); - - return err; + if (err) + return ERR_PTR(err); + return NULL; } static int diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 718e59fce006..ab84d4efbda3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1254,9 +1254,9 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); -int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, int access_flags, - struct ib_pd *pd, struct ib_udata *udata); +struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, int access_flags, + struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b6116f6d065d..778cc08e17ad 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1620,9 +1620,10 @@ static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, return err; } -int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, int new_access_flags, - struct ib_pd *new_pd, struct ib_udata *udata) +struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, + int new_access_flags, struct ib_pd *new_pd, + struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); struct mlx5_ib_mr *mr = to_mmr(ib_mr); @@ -1638,10 +1639,10 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, start, virt_addr, length, access_flags); if (!mr->umem) - return -EINVAL; + return ERR_PTR(-EINVAL); if (is_odp_mr(mr)) - return -EOPNOTSUPP; + return ERR_PTR(-EOPNOTSUPP); if (flags & IB_MR_REREG_TRANS) { addr = virt_addr; @@ -1717,14 +1718,14 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, len, access_flags); - return 0; + return NULL; err: ib_umem_release(mr->umem); mr->umem = NULL; clean_mr(dev, mr); - return err; + return ERR_PTR(err); } static int diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4fcbc6d3d0e0..3be1d1194a17 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2433,9 +2433,10 @@ struct ib_device_ops { struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); - int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_pd *pd, struct ib_udata *udata); + struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index a27e9fb4903f..ccd11631c167 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -71,6 +71,8 @@ struct uverbs_obj_type_class { enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs); void (*remove_handle)(struct ib_uobject *uobj); + void (*swap_uobjects)(struct ib_uobject *obj_old, + struct ib_uobject *obj_new); }; struct uverbs_obj_type { @@ -116,6 +118,9 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, bool hw_obj_valid); void rdma_alloc_commit_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); +void rdma_assign_uobject(struct ib_uobject *to_uobj, + struct ib_uobject *new_uobj, + struct uverbs_attr_bundle *attrs); /* * uverbs_uobject_get is called in order to increase the reference count on -- cgit v1.2.3-70-g09d2 From 2988ca08ba65848f2705023b054fd8bfc0109c38 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 1 Dec 2020 13:08:55 +0100 Subject: IB: Fix kernel-doc markups Some functions have different names between their prototypes and the kernel-doc markup. Others need to be fixed, as kernel-doc markups should use this format: identifier - description Link: https://lore.kernel.org/r/78b98c41a5a0f4c0106433d305b143028a4168b0.1606823973.git.mchehab+huawei@kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 5 +++-- drivers/infiniband/core/cq.c | 4 ++-- drivers/infiniband/core/iwpm_util.h | 2 +- drivers/infiniband/core/sa_query.c | 3 ++- drivers/infiniband/core/verbs.c | 4 ++-- drivers/infiniband/sw/rdmavt/ah.c | 2 +- drivers/infiniband/sw/rdmavt/mcast.c | 12 ++++++------ drivers/infiniband/sw/rdmavt/qp.c | 8 ++++---- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +- drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h | 2 +- drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c | 2 +- drivers/infiniband/ulp/srpt/ib_srpt.h | 2 +- include/rdma/ib_verbs.h | 11 +++++++++++ 13 files changed, 36 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 167e436ae11d..46c48690f909 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1251,7 +1251,8 @@ out: EXPORT_SYMBOL(ib_cm_listen); /** - * Create a new listening ib_cm_id and listen on the given service ID. + * ib_cm_insert_listen - Create a new listening ib_cm_id and listen on + * the given service ID. * * If there's an existing ID listening on that same device and service ID, * return it. @@ -1764,7 +1765,7 @@ static u16 cm_get_bth_pkey(struct cm_work *work) } /** - * Convert OPA SGID to IB SGID + * cm_opa_to_ib_sgid - Convert OPA SGID to IB SGID * ULPs (such as IPoIB) do not understand OPA GIDs and will * reject them as the local_gid will not match the sgid. Therefore, * change the pathrec's SGID to an IB SGID. diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 12ebacf52958..d4248bbe74da 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -123,7 +123,7 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs, } /** - * ib_process_direct_cq - process a CQ in caller context + * ib_process_cq_direct - process a CQ in caller context * @cq: CQ to process * @budget: number of CQEs to poll for * @@ -197,7 +197,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) } /** - * __ib_alloc_cq allocate a completion queue + * __ib_alloc_cq - allocate a completion queue * @dev: device to allocate the CQ for * @private: driver private data, accessible from cq->cq_context * @nr_cqe: number of CQEs to allocate diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 1bf87d9fd0bd..eeb8e6010907 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -141,7 +141,7 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request); int iwpm_get_nlmsg_seq(void); /** - * iwpm_add_reminfo - Add remote address info of the connecting peer + * iwpm_add_remote_info - Add remote address info of the connecting peer * to the remote info hash table * @reminfo: The remote info to be added */ diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8c930bf1df89..89a831fa1885 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1435,7 +1435,8 @@ enum opa_pr_supported { }; /** - * Check if current PR query can be an OPA query. + * opa_pr_query_possible - Check if current PR query can be an OPA query. + * * Retuns PR_NOT_SUPPORTED if a path record query is not * possible, PR_OPA_SUPPORTED if an OPA path record query * is possible and PR_IB_SUPPORTED if an IB path record diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5d4c7c263665..54e053b53060 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -244,7 +244,7 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); /* Protection domains */ /** - * ib_alloc_pd - Allocates an unused protection domain. + * __ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. * @flags: protection domain flags * @caller: caller's build-time module name @@ -1666,7 +1666,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp) qp->qp_type == IB_QPT_XRC_TGT); } -/** +/* * IB core internal function to perform QP attributes modification. */ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index f9754dcd250b..a3e5b368c5e7 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -126,7 +126,7 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, } /** - * rvt_destory_ah - Destory an address handle + * rvt_destroy_ah - Destroy an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * Return: 0 on success diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index dd11c6fcd060..5233a63d99a6 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -54,7 +54,7 @@ #include "mcast.h" /** - * rvt_driver_mcast - init resources for multicast + * rvt_driver_mcast_init - init resources for multicast * @rdi: rvt dev struct * * This is per device that registers with rdmavt @@ -69,7 +69,7 @@ void rvt_driver_mcast_init(struct rvt_dev_info *rdi) } /** - * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct + * rvt_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct * @qp: the QP to link */ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) @@ -98,7 +98,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) } /** - * mcast_alloc - allocate the multicast GID structure + * rvt_mcast_alloc - allocate the multicast GID structure * @mgid: the multicast GID * @lid: the muilticast LID (host order) * @@ -181,7 +181,7 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, EXPORT_SYMBOL(rvt_mcast_find); /** - * mcast_add - insert mcast GID into table and attach QP struct + * rvt_mcast_add - insert mcast GID into table and attach QP struct * @mcast: the mcast GID table * @mqp: the QP to attach * @@ -426,8 +426,8 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } /** - *rvt_mast_tree_empty - determine if any qps are attached to any mcast group - *@rdi: rvt dev struct + * rvt_mcast_tree_empty - determine if any qps are attached to any mcast group + * @rdi: rvt dev struct * * Return: in use count */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index e9db6bf10618..22fa9bde5419 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1827,7 +1827,7 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } /** - * rvt_post_receive - post a receive on a QP + * rvt_post_recv - post a receive on a QP * @ibqp: the QP to post the receive on * @wr: the WR to post * @bad_wr: the first bad WR is put here @@ -2249,7 +2249,7 @@ bail: } /** - * rvt_post_srq_receive - post a receive on a shared receive queue + * rvt_post_srq_recv - post a receive on a shared receive queue * @ibsrq: the SRQ to post the receive on * @wr: the list of work requests to post * @bad_wr: A pointer to the first WR to cause a problem is put here @@ -2501,7 +2501,7 @@ bail: EXPORT_SYMBOL(rvt_get_rwqe); /** - * qp_comm_est - handle trap with QP established + * rvt_comm_est - handle trap with QP established * @qp: the QP */ void rvt_comm_est(struct rvt_qp *qp) @@ -2947,7 +2947,7 @@ static enum ib_wc_status loopback_qp_drop(struct rvt_ibport *rvp, } /** - * ruc_loopback - handle UC and RC loopback requests + * rvt_ruc_loopback - handle UC and RC loopback requests * @sqp: the sending QP * * This is called from rvt_do_send() to forward a WQE addressed to the same HFI diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 3690e28cc7ea..1a459b35000c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -739,7 +739,7 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, } /** - * iscsi_iser_set_param() - set class connection parameter + * iscsi_iser_conn_get_stats() - get iscsi connection statistics * @cls_conn: iscsi class connection * @stats: iscsi stats to output * diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h index f64519872297..012fc27c5c93 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h @@ -437,7 +437,7 @@ struct opa_veswport_trap { } __packed; /** - * struct opa_vnic_iface_macs_entry - single entry in the mac list + * struct opa_vnic_iface_mac_entry - single entry in the mac list * @mac_addr: MAC address */ struct opa_vnic_iface_mac_entry { diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c index 868b5aec1537..292c037aa239 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c @@ -74,7 +74,7 @@ void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event) } /** - * opa_vnic_get_error_counters - get summary counters + * opa_vnic_get_summary_counters - get summary counters * @adapter: vnic port adapter * @cntrs: pointer to destination summary counters structure * diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index bdeb010efee6..76e66f630c17 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -347,7 +347,7 @@ struct srpt_nexus { }; /** - * struct srpt_port_attib - attributes for SRPT port + * struct srpt_port_attrib - attributes for SRPT port * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. * @srp_max_rsp_size: Maximum size of SRP response messages in bytes. * @srp_sq_size: Shared receive queue (SRQ) size. diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3be1d1194a17..06a565203cfc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3394,6 +3394,17 @@ enum ib_pd_flags { struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); +/** + * ib_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * @flags: protection domain flags + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + * + * Every PD has a local_dma_lkey which can be used as the lkey value for local + * memory operations. + */ #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), KBUILD_MODNAME) -- cgit v1.2.3-70-g09d2 From 53ef4999f07d9c75cdc8effb0cc8c581dc39b1a1 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 2 Dec 2020 09:29:20 +0800 Subject: RDMA/hns: Move capability flags of QP and CQ to hns-abi.h These flags will be returned to the userspace through ABI, so they should be defined in hns-abi.h. Furthermore, there is no need to include hns-abi.h in every source files, it just needs to be included in the common header file. Link: https://lore.kernel.org/r/1606872560-17823-1-git-send-email-liweihang@huawei.com Reported-by: kernel test robot Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 1 - drivers/infiniband/hw/hns/hns_roce_device.h | 11 +---------- drivers/infiniband/hw/hns/hns_roce_main.c | 1 - drivers/infiniband/hw/hns/hns_roce_pd.c | 1 - drivers/infiniband/hw/hns/hns_roce_qp.c | 1 - drivers/infiniband/hw/hns/hns_roce_srq.c | 1 - include/uapi/rdma/hns-abi.h | 10 ++++++++++ 7 files changed, 11 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 68f355fba425..5e6d68830fa5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -36,7 +36,6 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" -#include #include "hns_roce_common.h" static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 70ae37bad77e..60b8349cd2f8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_DEVICE_H #include +#include #define DRV_NAME "hns_roce" @@ -131,16 +132,6 @@ enum { SERV_TYPE_UD, }; -enum hns_roce_qp_caps { - HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0), - HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), - HNS_ROCE_QP_CAP_OWNER_DB = BIT(2), -}; - -enum hns_roce_cq_flags { - HNS_ROCE_CQ_FLAG_RECORD_DB = BIT(0), -}; - enum hns_roce_qp_state { HNS_ROCE_QP_STATE_RST, HNS_ROCE_QP_STATE_INIT, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index f01590d8c3cf..e8aa8075ffcd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -40,7 +40,6 @@ #include #include "hns_roce_common.h" #include "hns_roce_device.h" -#include #include "hns_roce_hem.h" /** diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 98f69496adb4..45ec91db1553 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -32,7 +32,6 @@ #include #include -#include #include "hns_roce_device.h" static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 34aa086060d3..121d3b4c2edb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -39,7 +39,6 @@ #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" -#include static void flush_work_handle(struct work_struct *work) { diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 27646b9e35df..36c6bcb85269 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -4,7 +4,6 @@ */ #include -#include #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 9ec85f76e9ac..90b739d05adf 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -43,6 +43,10 @@ struct hns_roce_ib_create_cq { __u32 reserved; }; +enum hns_roce_cq_cap_flags { + HNS_ROCE_CQ_FLAG_RECORD_DB = 1 << 0, +}; + struct hns_roce_ib_create_cq_resp { __aligned_u64 cqn; /* Only 32 bits used, 64 for compat */ __aligned_u64 cap_flags; @@ -69,6 +73,12 @@ struct hns_roce_ib_create_qp { __aligned_u64 sdb_addr; }; +enum hns_roce_qp_cap_flags { + HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, + HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, + HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, +}; + struct hns_roce_ib_create_qp_resp { __aligned_u64 cap_flags; }; -- cgit v1.2.3-70-g09d2 From e0da68994d16b46384cce7b86eb645f1ef7c51ef Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Tue, 8 Dec 2020 09:35:45 +0200 Subject: RDMA/uverbs: Fix incorrect variable type Fix incorrect type of max_entries in UVERBS_METHOD_QUERY_GID_TABLE - max_entries is of type size_t although it can take negative values. The following static check revealed it: drivers/infiniband/core/uverbs_std_types_device.c:338 ib_uverbs_handler_UVERBS_METHOD_QUERY_GID_TABLE() warn: 'max_entries' unsigned <= 0 Fixes: 9f85cbe50aa0 ("RDMA/uverbs: Expose the new GID query API to user space") Link: https://lore.kernel.org/r/20201208073545.9723-4-leon@kernel.org Reported-by: Dan Carpenter Signed-off-by: Avihai Horon Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_device.c | 14 +++++--------- include/rdma/uverbs_ioctl.h | 10 ++++++++++ 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 302f898c5833..9ec6971056fa 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -317,8 +317,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( struct ib_device *ib_dev; size_t user_entry_size; ssize_t num_entries; - size_t max_entries; - size_t num_bytes; + int max_entries; u32 flags; int ret; @@ -336,19 +335,16 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, user_entry_size); if (max_entries <= 0) - return -EINVAL; + return max_entries ?: -EINVAL; ucontext = ib_uverbs_get_ucontext(attrs); if (IS_ERR(ucontext)) return PTR_ERR(ucontext); ib_dev = ucontext->device; - if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes)) - return -EINVAL; - - entries = uverbs_zalloc(attrs, num_bytes); - if (!entries) - return -ENOMEM; + entries = uverbs_kcalloc(attrs, max_entries, sizeof(*entries)); + if (IS_ERR(entries)) + return PTR_ERR(entries); num_entries = rdma_query_gid_table(ib_dev, entries, max_entries); if (num_entries < 0) diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index bf167ef6c688..39ef204753ec 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -865,6 +865,16 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, { return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); } + +static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle, + size_t n, size_t size) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return ERR_PTR(-EOVERFLOW); + return uverbs_zalloc(bundle, bytes); +} int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val); -- cgit v1.2.3-70-g09d2 From d21a1240f5169a07a230d72e0e6d3773b2a088b4 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 10 Dec 2020 11:42:59 -0600 Subject: RDMA/rxe: Use acquire/release for memory ordering Change work and completion queues to use smp_load_acquire() and smp_store_release() to synchronize between driver and users. This commit goes with a matching series of commits in the rxe user space provider. Link: https://lore.kernel.org/r/20201210174258.5234-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_cq.c | 5 -- drivers/infiniband/sw/rxe/rxe_queue.h | 94 ++++++++++++++++++++++------------- drivers/infiniband/sw/rxe/rxe_verbs.c | 11 ---- include/uapi/rdma/rdma_user_rxe.h | 21 ++++++++ 4 files changed, 81 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 43394c3f29d4..b315ebf041ac 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -123,11 +123,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe)); - /* make sure all changes to the CQ are written before we update the - * producer pointer - */ - smp_wmb(); - advance_producer(cq->queue); spin_unlock_irqrestore(&cq->cq_lock, flags); diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 7d434a6837a7..2902ca7b288c 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -7,9 +7,11 @@ #ifndef RXE_QUEUE_H #define RXE_QUEUE_H +/* for definition of shared struct rxe_queue_buf */ +#include + /* implements a simple circular buffer that can optionally be * shared between user space and the kernel and can be resized - * the requested element size is rounded up to a power of 2 * and the number of elements in the buffer is also rounded * up to a power of 2. Since the queue is empty when the @@ -17,28 +19,6 @@ * of the queue is one less than the number of element slots */ -/* this data structure is shared between user space and kernel - * space for those cases where the queue is shared. It contains - * the producer and consumer indices. Is also contains a copy - * of the queue size parameters for user space to use but the - * kernel must use the parameters in the rxe_queue struct - * this MUST MATCH the corresponding librxe struct - * for performance reasons arrange to have producer and consumer - * pointers in separate cache lines - * the kernel should always mask the indices to avoid accessing - * memory outside of the data area - */ -struct rxe_queue_buf { - __u32 log2_elem_size; - __u32 index_mask; - __u32 pad_1[30]; - __u32 producer_index; - __u32 pad_2[31]; - __u32 consumer_index; - __u32 pad_3[31]; - __u8 data[]; -}; - struct rxe_queue { struct rxe_dev *rxe; struct rxe_queue_buf *buf; @@ -46,7 +26,7 @@ struct rxe_queue { size_t buf_size; size_t elem_size; unsigned int log2_elem_size; - unsigned int index_mask; + u32 index_mask; }; int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, @@ -76,26 +56,56 @@ static inline int next_index(struct rxe_queue *q, int index) static inline int queue_empty(struct rxe_queue *q) { - return ((q->buf->producer_index - q->buf->consumer_index) - & q->index_mask) == 0; + u32 prod; + u32 cons; + + /* make sure all changes to queue complete before + * testing queue empty + */ + prod = smp_load_acquire(&q->buf->producer_index); + /* same */ + cons = smp_load_acquire(&q->buf->consumer_index); + + return ((prod - cons) & q->index_mask) == 0; } static inline int queue_full(struct rxe_queue *q) { - return ((q->buf->producer_index + 1 - q->buf->consumer_index) - & q->index_mask) == 0; + u32 prod; + u32 cons; + + /* make sure all changes to queue complete before + * testing queue full + */ + prod = smp_load_acquire(&q->buf->producer_index); + /* same */ + cons = smp_load_acquire(&q->buf->consumer_index); + + return ((prod + 1 - cons) & q->index_mask) == 0; } static inline void advance_producer(struct rxe_queue *q) { - q->buf->producer_index = (q->buf->producer_index + 1) - & q->index_mask; + u32 prod; + + prod = (q->buf->producer_index + 1) & q->index_mask; + + /* make sure all changes to queue complete before + * changing producer index + */ + smp_store_release(&q->buf->producer_index, prod); } static inline void advance_consumer(struct rxe_queue *q) { - q->buf->consumer_index = (q->buf->consumer_index + 1) - & q->index_mask; + u32 cons; + + cons = (q->buf->consumer_index + 1) & q->index_mask; + + /* make sure all changes to queue complete before + * changing consumer index + */ + smp_store_release(&q->buf->consumer_index, cons); } static inline void *producer_addr(struct rxe_queue *q) @@ -112,12 +122,28 @@ static inline void *consumer_addr(struct rxe_queue *q) static inline unsigned int producer_index(struct rxe_queue *q) { - return q->buf->producer_index; + u32 index; + + /* make sure all changes to queue + * complete before getting producer index + */ + index = smp_load_acquire(&q->buf->producer_index); + index &= q->index_mask; + + return index; } static inline unsigned int consumer_index(struct rxe_queue *q) { - return q->buf->consumer_index; + u32 index; + + /* make sure all changes to queue + * complete before getting consumer index + */ + index = smp_load_acquire(&q->buf->consumer_index); + index &= q->index_mask; + + return index; } static inline void *addr_from_index(struct rxe_queue *q, unsigned int index) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 2fbea2b2d72a..a031514e2f41 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -244,11 +244,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) recv_wqe->dma.cur_sge = 0; recv_wqe->dma.sge_offset = 0; - /* make sure all changes to the work queue are written before we - * update the producer pointer - */ - smp_wmb(); - advance_producer(rq->queue); return 0; @@ -633,12 +628,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, if (unlikely(err)) goto err1; - /* - * make sure all changes to the work queue are - * written before we update the producer pointer - */ - smp_wmb(); - advance_producer(sq->queue); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index e591d8c1f3cf..068433e2229d 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -181,4 +181,25 @@ struct rxe_modify_srq_cmd { __aligned_u64 mmap_info_addr; }; +/* This data structure is stored at the base of work and + * completion queues shared between user space and kernel space. + * It contains the producer and consumer indices. Is also + * contains a copy of the queue size parameters for user space + * to use but the kernel must use the parameters in the + * rxe_queue struct. For performance reasons arrange to have + * producer and consumer indices in separate cache lines + * the kernel should always mask the indices to avoid accessing + * memory outside of the data area + */ +struct rxe_queue_buf { + __u32 log2_elem_size; + __u32 index_mask; + __u32 pad_1[30]; + __u32 producer_index; + __u32 pad_2[31]; + __u32 consumer_index; + __u32 pad_3[31]; + __u8 data[]; +}; + #endif /* RDMA_USER_RXE_H */ -- cgit v1.2.3-70-g09d2