From 51976c6cd786151b6a1bdf8b8b3334beac0ba99c Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Fri, 27 Sep 2024 18:33:22 +0800 Subject: RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap pages Provide a new api rdma_user_mmap_disassociate() for drivers to disassociate mmap pages for a device. Since drivers can now disassociate mmaps by calling this api, introduce a new disassociation_lock to specifically prevent races between this disassociation process and new mmaps. And thus the old hw_destroy_rwsem is not needed in this api. Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20240927103323.1897094-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index aa8ede439905..9cb8b5fe7eee 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2948,6 +2948,14 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, size_t length, u32 min_pgoff, u32 max_pgoff); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +void rdma_user_mmap_disassociate(struct ib_device *device); +#else +static inline void rdma_user_mmap_disassociate(struct ib_device *device) +{ +} +#endif + static inline int rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext, struct rdma_user_mmap_entry *entry, -- cgit v1.3.1 From 48931f65e9f785b65244550cc8f0c8bf9eab7acd Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Tue, 15 Oct 2024 17:42:42 +0000 Subject: RDMA/efa: Add option to set QP service level on create Using modify QP with AH attributes and IB_QP_AV flag set doesn't make much sense for connectionless QP types like SRD. Add SL parameter to EFA create QP user ABI and pass it to the device. Link: https://patch.msgid.link/r/20241015174242.3490-3-mrgolin@amazon.com Reviewed-by: Firas Jahjah Reviewed-by: Yonatan Nachum Signed-off-by: Michael Margolin Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com_cmd.c | 1 + drivers/infiniband/hw/efa/efa_com_cmd.h | 1 + drivers/infiniband/hw/efa/efa_verbs.c | 4 +++- include/uapi/rdma/efa-abi.h | 3 ++- 4 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 206f377db27e..9e04edb9dbda 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -31,6 +31,7 @@ int efa_com_create_qp(struct efa_com_dev *edev, create_qp_cmd.qp_alloc_size.recv_queue_depth = params->rq_depth; create_qp_cmd.uar = params->uarn; + create_qp_cmd.sl = params->sl; if (params->unsolicited_write_recv) EFA_SET(&create_qp_cmd.flags, EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV, 1); diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 2599f8e58cc4..25f02c0d9698 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -27,6 +27,7 @@ struct efa_com_create_qp_params { u16 pd; u16 uarn; u8 qp_type; + u8 sl; u8 unsolicited_write_recv : 1; }; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index feb04cfdb8da..ca3af866a5df 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -676,7 +676,7 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, goto err_out; } - if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_90)) { + if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_98)) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, unknown fields in udata\n"); err = -EINVAL; @@ -732,6 +732,8 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, create_qp_params.rq_base_addr = qp->rq_dma_addr; } + create_qp_params.sl = cmd.sl; + if (cmd.flags & EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV) create_qp_params.unsolicited_write_recv = true; diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index d689b8b34189..11b94b0b035b 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -95,7 +95,8 @@ struct efa_ibv_create_qp { __u32 sq_ring_size; /* bytes */ __u32 driver_qp_type; __u16 flags; - __u8 reserved_90[6]; + __u8 sl; + __u8 reserved_98[5]; }; struct efa_ibv_create_qp_resp { -- cgit v1.3.1 From 8ab3138a9b2dcb0ddf281240cf8cba414eb1224a Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Tue, 3 Sep 2024 14:37:51 +0300 Subject: net/mlx5: Introduce data placement ordering bits Introduce out-of-order (OOO) data placement (DP) IFC related bits to support OOO DP QP. Signed-off-by: Edward Srouji Reviewed-by: Yishai Hadas Link: https://patch.msgid.link/f30e5cbb5459fd02f27f35909bb545cab346b58b.1725362773.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 96d369112bfa..2a037843b117 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1872,7 +1872,11 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_328[0x2]; u8 relaxed_ordering_read[0x1]; u8 log_max_pd[0x5]; - u8 reserved_at_330[0x5]; + u8 dp_ordering_ooo_all_ud[0x1]; + u8 dp_ordering_ooo_all_uc[0x1]; + u8 dp_ordering_ooo_all_xrc[0x1]; + u8 dp_ordering_ooo_all_dc[0x1]; + u8 dp_ordering_ooo_all_rc[0x1]; u8 pcie_reset_using_hotreset_method[0x1]; u8 pci_sync_for_fw_update_with_driver_unload[0x1]; u8 vnic_env_cnt_steering_fail[0x1]; @@ -2094,7 +2098,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_0[0x80]; u8 migratable[0x1]; - u8 reserved_at_81[0x11]; + u8 reserved_at_81[0x7]; + u8 dp_ordering_force[0x1]; + u8 reserved_at_89[0x9]; u8 query_vuid[0x1]; u8 reserved_at_93[0x5]; u8 umr_log_entity_size_5[0x1]; @@ -3524,7 +3530,8 @@ struct mlx5_ifc_qpc_bits { u8 latency_sensitive[0x1]; u8 reserved_at_24[0x1]; u8 drain_sigerr[0x1]; - u8 reserved_at_26[0x2]; + u8 reserved_at_26[0x1]; + u8 dp_ordering_force[0x1]; u8 pd[0x18]; u8 mtu[0x3]; @@ -3597,7 +3604,8 @@ struct mlx5_ifc_qpc_bits { u8 rae[0x1]; u8 reserved_at_493[0x1]; u8 page_offset[0x6]; - u8 reserved_at_49a[0x3]; + u8 reserved_at_49a[0x2]; + u8 dp_ordering_1[0x1]; u8 cd_slave_receive[0x1]; u8 cd_slave_send[0x1]; u8 cd_master[0x1]; @@ -4507,7 +4515,8 @@ struct mlx5_ifc_dctc_bits { u8 state[0x4]; u8 reserved_at_8[0x18]; - u8 reserved_at_20[0x8]; + u8 reserved_at_20[0x7]; + u8 dp_ordering_force[0x1]; u8 user_index[0x18]; u8 reserved_at_40[0x8]; @@ -4522,7 +4531,9 @@ struct mlx5_ifc_dctc_bits { u8 latency_sensitive[0x1]; u8 rlky[0x1]; u8 free_ar[0x1]; - u8 reserved_at_73[0xd]; + u8 reserved_at_73[0x1]; + u8 dp_ordering_1[0x1]; + u8 reserved_at_75[0xb]; u8 reserved_at_80[0x8]; u8 cs_res[0x8]; -- cgit v1.3.1 From 8b36f7c3c6618dc97697a6a20a13b29651f68ab6 Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Tue, 3 Sep 2024 14:37:52 +0300 Subject: RDMA/mlx5: Support OOO RX WQE consumption Support QP with out-of-order (OOO) capabilities enabled. This allows WRs on the receiver side of the QP to be consumed OOO, permitting the sender side to transmit messages without guaranteeing arrival order on the receiver side. When enabled, the completion ordering of WRs remains in-order, regardless of the Receive WRs consumption order. RDMA Read and RDMA Atomic operations on the responder side continue to be executed in-order, while the ordering of data placement for RDMA Write and Send operations is not guaranteed. Atomic operations larger than 8 bytes are currently not supported. Therefore, when this feature is enabled, the created QP restricts its atomic support to 8 bytes at most. In addition, when querying the device, a new flag is returned in response to indicate that the Kernel supports OOO QP. Signed-off-by: Edward Srouji Reviewed-by: Yishai Hadas Link: https://patch.msgid.link/06ac609a5f358c8fb0a090d22c61a2f9329d82e6.1725362773.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 8 ++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 51 ++++++++++++++++++++++++++++++++---- include/uapi/rdma/mlx5-abi.h | 5 ++++ 4 files changed, 60 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4999239c8f41..b4476df96ed5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1182,6 +1182,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; + + if (MLX5_CAP_GEN_2(mdev, dp_ordering_force) && + (MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_xrc) || + MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_dc) || + MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_rc) || + MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_ud) || + MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_uc))) + resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_OOO_DP; } if (offsetofend(typeof(resp), sw_parsing_caps) <= uhw_outlen) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 23fd72f7f63d..ed4eaaa7ac71 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -521,6 +521,7 @@ struct mlx5_ib_qp { struct mlx5_bf bf; u8 has_rq:1; u8 is_rss:1; + u8 is_ooo_rq:1; /* only for user space QPs. For kernel * we have it from the bf object diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index e39b1a101e97..837b662b41de 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1960,7 +1960,7 @@ static int atomic_size_to_mode(int size_mask) } static int get_atomic_mode(struct mlx5_ib_dev *dev, - enum ib_qp_type qp_type) + struct mlx5_ib_qp *qp) { u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic); @@ -1970,7 +1970,7 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev, if (!atomic) return -EOPNOTSUPP; - if (qp_type == MLX5_IB_QPT_DCT) + if (qp->type == MLX5_IB_QPT_DCT) atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); else atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); @@ -1984,6 +1984,10 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev, atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD)) atomic_mode = MLX5_ATOMIC_MODE_IB_COMP; + /* OOO DP QPs do not support larger than 8-Bytes atomic operations */ + if (atomic_mode > MLX5_ATOMIC_MODE_8B && qp->is_ooo_rq) + atomic_mode = MLX5_ATOMIC_MODE_8B; + return atomic_mode; } @@ -2839,6 +2843,29 @@ static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd, return 0; } +static bool get_dp_ooo_cap(struct mlx5_core_dev *mdev, enum ib_qp_type qp_type) +{ + if (!MLX5_CAP_GEN_2(mdev, dp_ordering_force)) + return false; + + switch (qp_type) { + case IB_QPT_RC: + return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_rc); + case IB_QPT_XRC_INI: + case IB_QPT_XRC_TGT: + return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_xrc); + case IB_QPT_UC: + return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_uc); + case IB_QPT_UD: + return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_ud); + case MLX5_IB_QPT_DCI: + case MLX5_IB_QPT_DCT: + return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_dc); + default: + return false; + } +} + static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag, bool cond, struct mlx5_ib_qp *qp) { @@ -3365,7 +3392,7 @@ static int set_qpc_atomic_flags(struct mlx5_ib_qp *qp, if (access_flags & IB_ACCESS_REMOTE_ATOMIC) { int atomic_mode; - atomic_mode = get_atomic_mode(dev, qp->type); + atomic_mode = get_atomic_mode(dev, qp); if (atomic_mode < 0) return -EOPNOTSUPP; @@ -4316,6 +4343,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) MLX5_SET(qpc, qpc, deth_sqpn, 1); + if (qp->is_ooo_rq && cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { + MLX5_SET(qpc, qpc, dp_ordering_1, 1); + MLX5_SET(qpc, qpc, dp_ordering_force, 1); + } + mlx5_cur = to_mlx5_state(cur_state); mlx5_new = to_mlx5_state(new_state); @@ -4531,7 +4563,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) { int atomic_mode; - atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT); + atomic_mode = get_atomic_mode(dev, qp); if (atomic_mode < 0) return -EOPNOTSUPP; @@ -4573,6 +4605,10 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit); if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7); + if (qp->is_ooo_rq) { + MLX5_SET(dctc, dctc, dp_ordering_1, 1); + MLX5_SET(dctc, dctc, dp_ordering_force, 1); + } err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in, MLX5_ST_SZ_BYTES(create_dct_in), out, @@ -4676,11 +4712,16 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, min(udata->inlen, sizeof(ucmd)))) return -EFAULT; - if (ucmd.comp_mask || + if (ucmd.comp_mask & ~MLX5_IB_MODIFY_QP_OOO_DP || memchr_inv(&ucmd.burst_info.reserved, 0, sizeof(ucmd.burst_info.reserved))) return -EOPNOTSUPP; + if (ucmd.comp_mask & MLX5_IB_MODIFY_QP_OOO_DP) { + if (!get_dp_ooo_cap(dev->mdev, qp->type)) + return -EOPNOTSUPP; + qp->is_ooo_rq = 1; + } } if (qp->type == IB_QPT_GSI) diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index d4f6a36dffb0..8a6ad6c6841c 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -252,6 +252,7 @@ enum mlx5_ib_query_dev_resp_flags { MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2, MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3, + MLX5_IB_QUERY_DEV_RESP_FLAGS_OOO_DP = 1 << 4, }; enum mlx5_ib_tunnel_offloads { @@ -439,6 +440,10 @@ struct mlx5_ib_burst_info { __u16 reserved; }; +enum mlx5_ib_modify_qp_mask { + MLX5_IB_MODIFY_QP_OOO_DP = 1 << 0, +}; + struct mlx5_ib_modify_qp { __u32 comp_mask; struct mlx5_ib_burst_info burst_info; -- cgit v1.3.1 From af7a35bf6c36a77624d3abe46b3830b7c2a5f20c Mon Sep 17 00:00:00 2001 From: Chiara Meiohas Date: Thu, 31 Oct 2024 15:36:51 +0200 Subject: RDMA/core: Implement RoCE GID port rescan and export delete function rdma_roce_rescan_port() scans all network devices in the system and adds the gids if relevant to the RoCE device port. When not in bonding mode it adds the GIDs of the netdevice in this port. When in bonding mode it adds the GIDs of both the port's netdevice and the bond master netdevice. Export roce_del_all_netdev_gids(), which removes all GIDs associated with a specific netdevice for a given port. Signed-off-by: Chiara Meiohas Link: https://patch.msgid.link/674d498da4637a1503ff1367e28bd09ff942fd5e.1730381292.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/roce_gid_mgmt.c | 30 ++++++++++++++++++++++++++---- include/rdma/ib_verbs.h | 3 +++ 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index d5131b3ba8ab..a9f2c6b1b29e 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -515,6 +515,27 @@ void rdma_roce_rescan_device(struct ib_device *ib_dev) } EXPORT_SYMBOL(rdma_roce_rescan_device); +/** + * rdma_roce_rescan_port - Rescan all of the network devices in the system + * and add their gids if relevant to the port of the RoCE device. + * + * @ib_dev: IB device + * @port: Port number + */ +void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port) +{ + struct net_device *ndev = NULL; + + if (rdma_protocol_roce(ib_dev, port)) { + ndev = ib_device_get_netdev(ib_dev, port); + if (!ndev) + return; + enum_all_gids_of_dev_cb(ib_dev, port, ndev, ndev); + dev_put(ndev); + } +} +EXPORT_SYMBOL(rdma_roce_rescan_port); + static void callback_for_addr_gid_device_scan(struct ib_device *device, u32 port, struct net_device *rdma_ndev, @@ -575,16 +596,17 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u32 port, } } -static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u32 port, - struct net_device *event_ndev) +void roce_del_all_netdev_gids(struct ib_device *ib_dev, + u32 port, struct net_device *ndev) { - ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev); + ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev); } +EXPORT_SYMBOL(roce_del_all_netdev_gids); static void del_netdev_upper_ips(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { - handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids); + handle_netdev_upper(ib_dev, port, cookie, roce_del_all_netdev_gids); } static void add_netdev_upper_ips(struct ib_device *ib_dev, u32 port, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9cb8b5fe7eee..67551133b522 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4734,6 +4734,9 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) * @device: the rdma device */ void rdma_roce_rescan_device(struct ib_device *ibdev); +void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port); +void roce_del_all_netdev_gids(struct ib_device *ib_dev, + u32 port, struct net_device *ndev); struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); -- cgit v1.3.1 From dc6be4418a1144cce422093cde0245c76cdcaff0 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 31 Oct 2024 13:22:51 +0200 Subject: RDMA/core: Add device ufile cleanup operation Add a driver operation to allow preemptive cleanup of ufile HW resources before the standard ufile cleanup flow begins. Thus, expediting the final cleanup phase which leads to fast teardown overall. This allows the use of driver specific clean up procedures to make the cleanup process more efficient. Signed-off-by: Patrisious Haddad Link: https://patch.msgid.link/cabe00d75132b5732cb515944e3c500a01fb0b4a.1730373303.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/rdma_core.c | 7 ++++++- include/rdma/ib_verbs.h | 6 ++++++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e029401b5680..de4ffc9eb37d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2759,6 +2759,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, resize_cq); SET_DEVICE_OP(dev_ops, set_vf_guid); SET_DEVICE_OP(dev_ops, set_vf_link_state); + SET_DEVICE_OP(dev_ops, ufile_hw_cleanup); SET_OBJ_SIZE(dev_ops, ib_ah); SET_OBJ_SIZE(dev_ops, ib_counters); diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 29b1ab1d5f93..02ef09e77bf8 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -880,9 +880,14 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason) { + struct uverbs_attr_bundle attrs = { .ufile = ufile }; + struct ib_ucontext *ucontext = ufile->ucontext; + struct ib_device *ib_dev = ucontext->device; struct ib_uobject *obj, *next_obj; int ret = -EINVAL; - struct uverbs_attr_bundle attrs = { .ufile = ufile }; + + if (ib_dev->ops.ufile_hw_cleanup) + ib_dev->ops.ufile_hw_cleanup(ufile); /* * This shouldn't run while executing other commands on this diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 67551133b522..3417636da960 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2675,6 +2675,12 @@ struct ib_device_ops { */ void (*del_sub_dev)(struct ib_device *sub_dev); + /** + * ufile_cleanup - Attempt to cleanup ubojects HW resources inside + * the ufile. + */ + void (*ufile_hw_cleanup)(struct ib_uverbs_file *ufile); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_counters); DECLARE_RDMA_OBJ_SIZE(ib_cq); -- cgit v1.3.1 From 27ed2f00807c2328c99751f9500ce6478f16cf7b Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 31 Oct 2024 13:22:52 +0200 Subject: RDMA/core: Move ib_uverbs_file struct to uverbs_types.h In light of the previous commit, make the ib_uverbs_file accessible to drivers by moving its definition to uverbs_types.h, to allow drivers to freely access the struct argument and create a personalized cleanup flow. For the same reason expose uverbs_try_lock_object function to allow driver to safely access the uverbs objects. Signed-off-by: Patrisious Haddad Link: https://patch.msgid.link/29b718e0dca35daa5f496320a39284fc1f5a1722.1730373303.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/rdma_core.c | 5 +++-- drivers/infiniband/core/uverbs.h | 31 ------------------------------- include/rdma/uverbs_types.h | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 02ef09e77bf8..90c177edf9b0 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -58,8 +58,8 @@ void uverbs_uobject_put(struct ib_uobject *uobject) } EXPORT_SYMBOL(uverbs_uobject_put); -static int uverbs_try_lock_object(struct ib_uobject *uobj, - enum rdma_lookup_mode mode) +int uverbs_try_lock_object(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { /* * When a shared access is required, we use a positive counter. Each @@ -84,6 +84,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, } return 0; } +EXPORT_SYMBOL(uverbs_try_lock_object); static void assert_uverbs_usecnt(struct ib_uobject *uobj, enum rdma_lookup_mode mode) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index dfd2e5a86e6f..797e2fcc8072 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -133,37 +133,6 @@ struct ib_uverbs_completion_event_file { struct ib_uverbs_event_queue ev_queue; }; -struct ib_uverbs_file { - struct kref ref; - struct ib_uverbs_device *device; - struct mutex ucontext_lock; - /* - * ucontext must be accessed via ib_uverbs_get_ucontext() or with - * ucontext_lock held - */ - struct ib_ucontext *ucontext; - struct ib_uverbs_async_event_file *default_async_file; - struct list_head list; - - /* - * To access the uobjects list hw_destroy_rwsem must be held for write - * OR hw_destroy_rwsem held for read AND uobjects_lock held. - * hw_destroy_rwsem should be called across any destruction of the HW - * object of an associated uobject. - */ - struct rw_semaphore hw_destroy_rwsem; - spinlock_t uobjects_lock; - struct list_head uobjects; - - struct mutex umap_lock; - struct list_head umaps; - struct page *disassociate_page; - - struct xarray idr; - - struct mutex disassociation_lock; -}; - struct ib_uverbs_event { union { struct ib_uverbs_async_event_desc async; diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index ccd11631c167..26ba919ac245 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -134,6 +134,8 @@ static inline void uverbs_uobject_get(struct ib_uobject *uobject) } void uverbs_uobject_put(struct ib_uobject *uobject); +int uverbs_try_lock_object(struct ib_uobject *uobj, enum rdma_lookup_mode mode); + struct uverbs_obj_fd_type { /* * In fd based objects, uverbs_obj_type_ops points to generic @@ -150,6 +152,37 @@ struct uverbs_obj_fd_type { int flags; }; +struct ib_uverbs_file { + struct kref ref; + struct ib_uverbs_device *device; + struct mutex ucontext_lock; + /* + * ucontext must be accessed via ib_uverbs_get_ucontext() or with + * ucontext_lock held + */ + struct ib_ucontext *ucontext; + struct ib_uverbs_async_event_file *default_async_file; + struct list_head list; + + /* + * To access the uobjects list hw_destroy_rwsem must be held for write + * OR hw_destroy_rwsem held for read AND uobjects_lock held. + * hw_destroy_rwsem should be called across any destruction of the HW + * object of an associated uobject. + */ + struct rw_semaphore hw_destroy_rwsem; + spinlock_t uobjects_lock; + struct list_head uobjects; + + struct mutex umap_lock; + struct list_head umaps; + struct page *disassociate_page; + + struct xarray idr; + + struct mutex disassociation_lock; +}; + extern const struct uverbs_obj_type_class uverbs_idr_class; extern const struct uverbs_obj_type_class uverbs_fd_class; int uverbs_uobject_fd_release(struct inode *inode, struct file *filp); -- cgit v1.3.1 From 7566752e4d7d7fc0186531aa800068a7243f95c1 Mon Sep 17 00:00:00 2001 From: Chiara Meiohas Date: Thu, 31 Oct 2024 11:31:14 +0200 Subject: RDMA/nldev: Add IB device and net device rename events Implement event sending for IB device rename and IB device port associated netdevice rename. In iproute2, rdma monitor displays the IB device name, port and the netdevice name when displaying event info. Since users can modiy these names, we track and notify on renaming events. Note: In order to receive netdevice rename events, drivers must use the ib_device_set_netdev() API when attaching net devices to IB devices. $ rdma monitor $ rmmod mlx5_ib [UNREGISTER] dev 1 rocep8s0f1 [UNREGISTER] dev 0 rocep8s0f0 $ modprobe mlx5_ib [REGISTER] dev 2 mlx5_0 [NETDEV_ATTACH] dev 2 mlx5_0 port 1 netdev 4 eth2 [REGISTER] dev 3 mlx5_1 [NETDEV_ATTACH] dev 3 mlx5_1 port 1 netdev 5 eth3 [RENAME] dev 2 rocep8s0f0 [RENAME] dev 3 rocep8s0f1 $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev [UNREGISTER] dev 2 rocep8s0f0 [REGISTER] dev 4 mlx5_0 [NETDEV_ATTACH] dev 4 mlx5_0 port 30 netdev 4 eth2 [RENAME] dev 4 rdmap8s0f0 $ echo 4 > /sys/class/net/eth2/device/sriov_numvfs [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 2 netdev 7 eth4 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 3 netdev 8 eth5 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 4 netdev 9 eth6 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 5 netdev 10 eth7 [REGISTER] dev 5 mlx5_0 [NETDEV_ATTACH] dev 5 mlx5_0 port 1 netdev 11 eth8 [REGISTER] dev 6 mlx5_1 [NETDEV_ATTACH] dev 6 mlx5_1 port 1 netdev 12 eth9 [RENAME] dev 5 rocep8s0f0v0 [RENAME] dev 6 rocep8s0f0v1 [REGISTER] dev 7 mlx5_0 [NETDEV_ATTACH] dev 7 mlx5_0 port 1 netdev 13 eth10 [RENAME] dev 7 rocep8s0f0v2 [REGISTER] dev 8 mlx5_0 [NETDEV_ATTACH] dev 8 mlx5_0 port 1 netdev 14 eth11 [RENAME] dev 8 rocep8s0f0v3 $ ip link set eth2 name myeth2 [NETDEV_RENAME] netdev 4 myeth2 $ ip link set eth1 name myeth1 ** no events received, because eth1 is not attached to an IB device ** Signed-off-by: Chiara Meiohas Link: https://patch.msgid.link/093c978ef2766fd3ab4ff8798eeb68f2f11582f6.1730367038.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 38 ++++++++++++++++++++++++++++++++++++++ drivers/infiniband/core/nldev.c | 40 ++++++++++++++++++++++++++++++++++++++-- include/uapi/rdma/rdma_netlink.h | 2 ++ 3 files changed, 78 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index de4ffc9eb37d..ca9b956c034d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -437,6 +437,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) client->rename(ibdev, client_data); } up_read(&ibdev->client_data_rwsem); + rdma_nl_notify_event(ibdev, 0, RDMA_RENAME_EVENT); up_read(&devices_rwsem); return 0; } @@ -2853,6 +2854,40 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { }, }; +static int ib_netdevice_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct net_device *ib_ndev; + struct ib_device *ibdev; + u32 port; + + switch (event) { + case NETDEV_CHANGENAME: + ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN); + if (!ibdev) + return NOTIFY_DONE; + + rdma_for_each_port(ibdev, port) { + ib_ndev = ib_device_get_netdev(ibdev, port); + if (ndev == ib_ndev) + rdma_nl_notify_event(ibdev, port, + RDMA_NETDEV_RENAME_EVENT); + dev_put(ib_ndev); + } + ib_device_put(ibdev); + break; + default: + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block nb_netdevice = { + .notifier_call = ib_netdevice_event, +}; + static int __init ib_core_init(void) { int ret = -ENOMEM; @@ -2924,6 +2959,8 @@ static int __init ib_core_init(void) goto err_parent; } + register_netdevice_notifier(&nb_netdevice); + return 0; err_parent: @@ -2953,6 +2990,7 @@ err: static void __exit ib_core_cleanup(void) { + unregister_netdevice_notifier(&nb_netdevice); roce_gid_mgmt_cleanup(); rdma_nl_unregister(RDMA_NL_LS); nldev_exit(); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 39f89a4b8649..0034c495cbbe 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -2729,6 +2729,25 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { }, }; +static int fill_mon_netdev_rename(struct sk_buff *msg, + struct ib_device *device, u32 port, + const struct net *net) +{ + struct net_device *netdev = ib_device_get_netdev(device, port); + int ret = 0; + + if (!netdev || !net_eq(dev_net(netdev), net)) + goto out; + + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); + if (ret) + goto out; + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); +out: + dev_put(netdev); + return ret; +} + static int fill_mon_netdev_association(struct sk_buff *msg, struct ib_device *device, u32 port, const struct net *net) @@ -2793,6 +2812,18 @@ static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num, "Failed to send RDMA monitor netdev detach event: port %d\n", port_num); break; + case RDMA_RENAME_EVENT: + dev_warn_ratelimited(&device->dev, + "Failed to send RDMA monitor rename device event\n"); + break; + + case RDMA_NETDEV_RENAME_EVENT: + netdev = ib_device_get_netdev(device, port_num); + dev_warn_ratelimited(&device->dev, + "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n", + port_num, netdev->ifindex); + dev_put(netdev); + break; default: break; } @@ -2820,14 +2851,19 @@ int rdma_nl_notify_event(struct ib_device *device, u32 port_num, switch (type) { case RDMA_REGISTER_EVENT: case RDMA_UNREGISTER_EVENT: + case RDMA_RENAME_EVENT: ret = fill_nldev_handle(skb, device); if (ret) goto err_free; break; case RDMA_NETDEV_ATTACH_EVENT: case RDMA_NETDEV_DETACH_EVENT: - ret = fill_mon_netdev_association(skb, device, - port_num, net); + ret = fill_mon_netdev_association(skb, device, port_num, net); + if (ret) + goto err_free; + break; + case RDMA_NETDEV_RENAME_EVENT: + ret = fill_mon_netdev_rename(skb, device, port_num, net); if (ret) goto err_free; break; diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 39be09c0ffbb..9f9cf20c1cd8 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -638,6 +638,8 @@ enum rdma_nl_notify_event_type { RDMA_UNREGISTER_EVENT, RDMA_NETDEV_ATTACH_EVENT, RDMA_NETDEV_DETACH_EVENT, + RDMA_RENAME_EVENT, + RDMA_NETDEV_RENAME_EVENT, }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.3.1