summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-09 09:02:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-09 09:02:46 -0700
commitdce45af5c2e9e85f22578f2f8065f225f5d11764 (patch)
treee01e7a294586c3074142fb485516ce718a1a82d2 /include
parent055128ee008b00fba14e3638e7e84fc2cff8d77d (diff)
parentb79656ed44c6865e17bcd93472ec39488bcc4984 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "This has been a smaller cycle than normal. One new driver was accepted, which is unusual, and at least one more driver remains in review on the list. Summary: - Driver fixes for hns, hfi1, nes, rxe, i40iw, mlx5, cxgb4, vmw_pvrdma - Many patches from MatthewW converting radix tree and IDR users to use xarray - Introduction of tracepoints to the MAD layer - Build large SGLs at the start for DMA mapping and get the driver to split them - Generally clean SGL handling code throughout the subsystem - Support for restricting RDMA devices to net namespaces for containers - Progress to remove object allocation boilerplate code from drivers - Change in how the mlx5 driver shows representor ports linked to VFs - mlx5 uapi feature to access the on chip SW ICM memory - Add a new driver for 'EFA'. This is HW that supports user space packet processing through QPs in Amazon's cloud" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (186 commits) RDMA/ipoib: Allow user space differentiate between valid dev_port IB/core, ipoib: Do not overreact to SM LID change event RDMA/device: Don't fire uevent before device is fully initialized lib/scatterlist: Remove leftover from sg_page_iter comment RDMA/efa: Add driver to Kconfig/Makefile RDMA/efa: Add the efa module RDMA/efa: Add EFA verbs implementation RDMA/efa: Add common command handlers RDMA/efa: Implement functions that submit and complete admin commands RDMA/efa: Add the ABI definitions RDMA/efa: Add the com service API definitions RDMA/efa: Add the efa_com.h file RDMA/efa: Add the efa.h header file RDMA/efa: Add EFA device definitions RDMA: Add EFA related definitions RDMA/umem: Remove hugetlb flag RDMA/bnxt_re: Use core helpers to get aligned DMA address RDMA/i40iw: Use core helpers to get aligned DMA address within a supported page size RDMA/verbs: Add a DMA iterator to return aligned contiguous memory blocks RDMA/umem: Add API to find best driver supported page size in an MR ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/dynamic_debug.h11
-rw-r--r--include/linux/mlx5/driver.h1
-rw-r--r--include/linux/overflow.h12
-rw-r--r--include/linux/scatterlist.h10
-rw-r--r--include/rdma/ib_cache.h4
-rw-r--r--include/rdma/ib_mad.h4
-rw-r--r--include/rdma/ib_smi.h2
-rw-r--r--include/rdma/ib_umem.h12
-rw-r--r--include/rdma/ib_umem_odp.h1
-rw-r--r--include/rdma/ib_verbs.h430
-rw-r--r--include/rdma/iw_cm.h25
-rw-r--r--include/rdma/opa_port_info.h2
-rw-r--r--include/rdma/opa_smi.h4
-rw-r--r--include/rdma/rdma_vt.h78
-rw-r--r--include/rdma/rdmavt_qp.h89
-rw-r--r--include/rdma/uverbs_std_types.h42
-rw-r--r--include/rdma/uverbs_types.h18
-rw-r--r--include/trace/events/ib_mad.h390
-rw-r--r--include/trace/events/ib_umad.h126
-rw-r--r--include/uapi/rdma/efa-abi.h101
-rw-r--r--include/uapi/rdma/mlx5-abi.h2
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h2
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_verbs.h7
-rw-r--r--include/uapi/rdma/rdma_netlink.h31
-rw-r--r--include/uapi/rdma/rdma_user_ioctl_cmds.h1
25 files changed, 1177 insertions, 228 deletions
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index c2be029b9b53..6c809440f319 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -71,6 +71,13 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor,
const struct net_device *dev,
const char *fmt, ...);
+struct ib_device;
+
+extern __printf(3, 4)
+void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
+ const struct ib_device *ibdev,
+ const char *fmt, ...);
+
#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \
static struct _ddebug __aligned(8) \
__attribute__((section("__verbose"))) name = { \
@@ -154,6 +161,10 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor,
_dynamic_func_call(fmt, __dynamic_netdev_dbg, \
dev, fmt, ##__VA_ARGS__)
+#define dynamic_ibdev_dbg(dev, fmt, ...) \
+ _dynamic_func_call(fmt, __dynamic_ibdev_dbg, \
+ dev, fmt, ##__VA_ARGS__)
+
#define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \
groupsize, buf, len, ascii) \
_dynamic_func_call_no_desc(__builtin_constant_p(prefix_str) ? prefix_str : "hexdump", \
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5a39b323c52e..5a27246db883 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -689,7 +689,6 @@ struct mlx5_core_dev {
#endif
struct mlx5_clock clock;
struct mlx5_ib_clock_info *clock_info;
- struct page *clock_info_page;
struct mlx5_fw_tracer *tracer;
};
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 40b48e2133cb..15eb85de9226 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -36,6 +36,12 @@
#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+/*
+ * Avoids triggering -Wtype-limits compilation warning,
+ * while using unsigned data types to check a < 0.
+ */
+#define is_non_negative(a) ((a) > 0 || (a) == 0)
+#define is_negative(a) (!(is_non_negative(a)))
#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
/*
@@ -227,10 +233,10 @@
typeof(d) _d = d; \
u64 _a_full = _a; \
unsigned int _to_shift = \
- _s >= 0 && _s < 8 * sizeof(*d) ? _s : 0; \
+ is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \
*_d = (_a_full << _to_shift); \
- (_to_shift != _s || *_d < 0 || _a < 0 || \
- (*_d >> _to_shift) != _a); \
+ (_to_shift != _s || is_negative(*_d) || is_negative(_a) || \
+ (*_d >> _to_shift) != _a); \
})
/**
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index b4be960c7e5d..30a9a55c28ba 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -340,11 +340,11 @@ int sg_alloc_table_chained(struct sg_table *table, int nents,
* sg page iterator
*
* Iterates over sg entries page-by-page. On each successful iteration, you
- * can call sg_page_iter_page(@piter) to get the current page and its dma
- * address. @piter->sg will point to the sg holding this page and
- * @piter->sg_pgoffset to the page's page offset within the sg. The iteration
- * will stop either when a maximum number of sg entries was reached or a
- * terminating sg (sg_last(sg) == true) was reached.
+ * can call sg_page_iter_page(@piter) to get the current page.
+ * @piter->sg will point to the sg holding this page and @piter->sg_pgoffset to
+ * the page's page offset within the sg. The iteration will stop either when a
+ * maximum number of sg entries was reached or a terminating sg
+ * (sg_last(sg) == true) was reached.
*/
struct sg_page_iter {
struct scatterlist *sg; /* sg holding the page */
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 62e990b620aa..870b5e6c06db 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -54,6 +54,10 @@ const struct ib_gid_attr *rdma_find_gid_by_filter(
void *),
void *context);
+int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
+ u16 *vlan_id, u8 *smac);
+struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
+
/**
* ib_get_cached_pkey - Returns a cached PKey table entry
* @device: The device to query.
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 79ba8219e7dc..eea946fcc819 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -198,7 +198,7 @@ struct ib_sa_hdr {
__be16 attr_offset;
__be16 reserved;
ib_sa_comp_mask comp_mask;
-} __attribute__ ((packed));
+} __packed;
struct ib_mad {
struct ib_mad_hdr mad_hdr;
@@ -227,7 +227,7 @@ struct ib_sa_mad {
struct ib_rmpp_hdr rmpp_hdr;
struct ib_sa_hdr sa_hdr;
u8 data[IB_MGMT_SA_DATA];
-} __attribute__ ((packed));
+} __packed;
struct ib_vendor_mad {
struct ib_mad_hdr mad_hdr;
diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h
index b439e988408e..7be0028f155c 100644
--- a/include/rdma/ib_smi.h
+++ b/include/rdma/ib_smi.h
@@ -61,7 +61,7 @@ struct ib_smp {
u8 data[IB_SMP_DATA_SIZE];
u8 initial_path[IB_SMP_MAX_PATH_HOPS];
u8 return_path[IB_SMP_MAX_PATH_HOPS];
-} __attribute__ ((packed));
+} __packed;
#define IB_SMP_DIRECTION cpu_to_be16(0x8000)
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 73af05db04c7..040d853077c6 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -48,12 +48,11 @@ struct ib_umem {
unsigned long address;
int page_shift;
u32 writable : 1;
- u32 hugetlb : 1;
u32 is_odp : 1;
struct work_struct work;
struct sg_table sg_head;
int nmap;
- int npages;
+ unsigned int sg_nents;
};
/* Returns the offset of the umem start relative to the first page. */
@@ -87,6 +86,9 @@ void ib_umem_release(struct ib_umem *umem);
int ib_umem_page_count(struct ib_umem *umem);
int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length);
+unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ unsigned long virt);
#else /* CONFIG_INFINIBAND_USER_MEM */
@@ -104,6 +106,12 @@ static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offs
size_t length) {
return -EINVAL;
}
+static inline int ib_umem_find_best_pgsz(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ unsigned long virt) {
+ return -EINVAL;
+}
+
#endif /* CONFIG_INFINIBAND_USER_MEM */
#endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index dadc96dea39c..eeec4e53c448 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -69,6 +69,7 @@ struct ib_umem_odp {
int notifiers_seq;
int notifiers_count;
+ int npages;
/* Tree tracking */
struct umem_odp_node interval_tree;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9b9e17bcc201..0742095355f2 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -59,6 +59,8 @@
#include <linux/mmu_notifier.h>
#include <linux/uaccess.h>
#include <linux/cgroup_rdma.h>
+#include <linux/irqflags.h>
+#include <linux/preempt.h>
#include <uapi/rdma/ib_user_verbs.h>
#include <rdma/restrack.h>
#include <uapi/rdma/rdma_user_ioctl.h>
@@ -72,6 +74,36 @@ extern struct workqueue_struct *ib_wq;
extern struct workqueue_struct *ib_comp_wq;
extern struct workqueue_struct *ib_comp_unbound_wq;
+__printf(3, 4) __cold
+void ibdev_printk(const char *level, const struct ib_device *ibdev,
+ const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_alert(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_crit(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_err(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_warn(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_notice(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
+
+#if defined(CONFIG_DYNAMIC_DEBUG)
+#define ibdev_dbg(__dev, format, args...) \
+ dynamic_ibdev_dbg(__dev, format, ##args)
+#elif defined(DEBUG)
+#define ibdev_dbg(__dev, format, args...) \
+ ibdev_printk(KERN_DEBUG, __dev, format, ##args)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
+
union ib_gid {
u8 raw[16];
struct {
@@ -92,7 +124,7 @@ enum ib_gid_type {
#define ROCE_V2_UDP_DPORT 4791
struct ib_gid_attr {
- struct net_device *ndev;
+ struct net_device __rcu *ndev;
struct ib_device *device;
union ib_gid gid;
enum ib_gid_type gid_type;
@@ -108,6 +140,7 @@ enum rdma_node_type {
RDMA_NODE_RNIC,
RDMA_NODE_USNIC,
RDMA_NODE_USNIC_UDP,
+ RDMA_NODE_UNSPECIFIED,
};
enum {
@@ -119,7 +152,8 @@ enum rdma_transport_type {
RDMA_TRANSPORT_IB,
RDMA_TRANSPORT_IWARP,
RDMA_TRANSPORT_USNIC,
- RDMA_TRANSPORT_USNIC_UDP
+ RDMA_TRANSPORT_USNIC_UDP,
+ RDMA_TRANSPORT_UNSPECIFIED,
};
enum rdma_protocol_type {
@@ -2189,8 +2223,6 @@ struct ib_cache {
struct ib_event_handler event_handler;
};
-struct iw_cm_verbs;
-
struct ib_port_immutable {
int pkey_tbl_len;
int gid_tbl_len;
@@ -2272,6 +2304,8 @@ struct ib_counters_read_attr {
};
struct uverbs_attr_bundle;
+struct iw_cm_id;
+struct iw_cm_conn_param;
#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \
.size_##ib_struct = \
@@ -2281,8 +2315,11 @@ struct uverbs_attr_bundle;
!__same_type(((struct drv_struct *)NULL)->member, \
struct ib_struct)))
+#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \
+ ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp))
+
#define rdma_zalloc_drv_obj(ib_dev, ib_type) \
- ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, GFP_KERNEL))
+ rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL)
#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
@@ -2394,23 +2431,21 @@ struct ib_device_ops {
void (*dealloc_ucontext)(struct ib_ucontext *context);
int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
- int (*alloc_pd)(struct ib_pd *pd, struct ib_ucontext *context,
- struct ib_udata *udata);
- void (*dealloc_pd)(struct ib_pd *pd);
- struct ib_ah *(*create_ah)(struct ib_pd *pd,
- struct rdma_ah_attr *ah_attr, u32 flags,
- struct ib_udata *udata);
+ int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+ void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+ int (*create_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
+ u32 flags, struct ib_udata *udata);
int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
- int (*destroy_ah)(struct ib_ah *ah, u32 flags);
- struct ib_srq *(*create_srq)(struct ib_pd *pd,
- struct ib_srq_init_attr *srq_init_attr,
- struct ib_udata *udata);
+ void (*destroy_ah)(struct ib_ah *ah, u32 flags);
+ int (*create_srq)(struct ib_srq *srq,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
- int (*destroy_srq)(struct ib_srq *srq);
+ void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
struct ib_qp *(*create_qp)(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr,
struct ib_udata *udata);
@@ -2418,13 +2453,12 @@ struct ib_device_ops {
int qp_attr_mask, struct ib_udata *udata);
int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
- int (*destroy_qp)(struct ib_qp *qp);
+ int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
struct ib_cq *(*create_cq)(struct ib_device *device,
const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
- int (*destroy_cq)(struct ib_cq *cq);
+ int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
@@ -2433,9 +2467,9 @@ struct ib_device_ops {
int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_pd *pd, struct ib_udata *udata);
- int (*dereg_mr)(struct ib_mr *mr);
+ int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg);
+ u32 max_num_sg, struct ib_udata *udata);
int (*advise_mr)(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice, u32 flags,
struct ib_sge *sg_list, u32 num_sge,
@@ -2456,9 +2490,8 @@ struct ib_device_ops {
int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
- struct ib_ucontext *ucontext,
struct ib_udata *udata);
- int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
+ int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
struct ib_flow *(*create_flow)(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain, struct ib_udata *udata);
@@ -2483,7 +2516,7 @@ struct ib_device_ops {
struct ib_wq *(*create_wq)(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
- int (*destroy_wq)(struct ib_wq *wq);
+ int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
u32 wq_attr_mask, struct ib_udata *udata);
struct ib_rwq_ind_table *(*create_rwq_ind_table)(
@@ -2495,7 +2528,7 @@ struct ib_device_ops {
struct ib_ucontext *context,
struct ib_dm_alloc_attr *attr,
struct uverbs_attr_bundle *attrs);
- int (*dealloc_dm)(struct ib_dm *dm);
+ int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs);
struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
struct ib_dm_mr_attr *attr,
struct uverbs_attr_bundle *attrs);
@@ -2550,12 +2583,37 @@ struct ib_device_ops {
*/
void (*dealloc_driver)(struct ib_device *dev);
+ /* iWarp CM callbacks */
+ void (*iw_add_ref)(struct ib_qp *qp);
+ void (*iw_rem_ref)(struct ib_qp *qp);
+ struct ib_qp *(*iw_get_qp)(struct ib_device *device, int qpn);
+ int (*iw_connect)(struct iw_cm_id *cm_id,
+ struct iw_cm_conn_param *conn_param);
+ int (*iw_accept)(struct iw_cm_id *cm_id,
+ struct iw_cm_conn_param *conn_param);
+ int (*iw_reject)(struct iw_cm_id *cm_id, const void *pdata,
+ u8 pdata_len);
+ int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
+ int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
+
+ DECLARE_RDMA_OBJ_SIZE(ib_ah);
DECLARE_RDMA_OBJ_SIZE(ib_pd);
+ DECLARE_RDMA_OBJ_SIZE(ib_srq);
DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
};
-struct rdma_restrack_root;
+struct ib_core_device {
+ /* device must be the first element in structure until,
+ * union of ib_core_device and device exists in ib_device.
+ */
+ struct device dev;
+ possible_net_t rdma_net;
+ struct kobject *ports_kobj;
+ struct list_head port_list;
+ struct ib_device *owner; /* reach back to owner ib_device */
+};
+struct rdma_restrack_root;
struct ib_device {
/* Do not access @dma_device directly from ULP nor from HW drivers. */
struct device *dma_device;
@@ -2578,19 +2636,18 @@ struct ib_device {
int num_comp_vectors;
- struct iw_cm_verbs *iwcm;
-
struct module *owner;
- struct device dev;
+ union {
+ struct device dev;
+ struct ib_core_device coredev;
+ };
+
/* First group for device attributes,
* Second group for driver provided attributes (optional).
* It is NULL terminated array.
*/
const struct attribute_group *groups[3];
- struct kobject *ports_kobj;
- struct list_head port_list;
-
int uverbs_abi_ver;
u64 uverbs_cmd_mask;
u64 uverbs_ex_cmd_mask;
@@ -2626,6 +2683,15 @@ struct ib_device {
struct work_struct unregistration_work;
const struct rdma_link_ops *link_ops;
+
+ /* Protects compat_devs xarray modifications */
+ struct mutex compat_devs_mutex;
+ /* Maintains compat devices for each net namespace */
+ struct xarray compat_devs;
+
+ /* Used by iWarp CM */
+ char iw_ifname[IFNAMSIZ];
+ u32 iw_driver_flags;
};
struct ib_client {
@@ -2662,6 +2728,21 @@ struct ib_client {
u8 no_kverbs_req:1;
};
+/*
+ * IB block DMA iterator
+ *
+ * Iterates the DMA-mapped SGL in contiguous memory blocks aligned
+ * to a HW supported page size.
+ */
+struct ib_block_iter {
+ /* internal states */
+ struct scatterlist *__sg; /* sg holding the current aligned block */
+ dma_addr_t __dma_addr; /* unaligned DMA address of this block */
+ unsigned int __sg_nents; /* number of SG entries */
+ unsigned int __sg_advance; /* number of bytes to advance in sg in next step */
+ unsigned int __pg_bit; /* alignment of current block */
+};
+
struct ib_device *_ib_alloc_device(size_t size);
#define ib_alloc_device(drv_struct, member) \
container_of(_ib_alloc_device(sizeof(struct drv_struct) + \
@@ -2682,6 +2763,38 @@ void ib_unregister_device_queued(struct ib_device *ib_dev);
int ib_register_client (struct ib_client *client);
void ib_unregister_client(struct ib_client *client);
+void __rdma_block_iter_start(struct ib_block_iter *biter,
+ struct scatterlist *sglist,
+ unsigned int nents,
+ unsigned long pgsz);
+bool __rdma_block_iter_next(struct ib_block_iter *biter);
+
+/**
+ * rdma_block_iter_dma_address - get the aligned dma address of the current
+ * block held by the block iterator.
+ * @biter: block iterator holding the memory block
+ */
+static inline dma_addr_t
+rdma_block_iter_dma_address(struct ib_block_iter *biter)
+{
+ return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1);
+}
+
+/**
+ * rdma_for_each_block - iterate over contiguous memory blocks of the sg list
+ * @sglist: sglist to iterate over
+ * @biter: block iterator holding the memory block
+ * @nents: maximum number of sg entries to iterate over
+ * @pgsz: best HW supported page size to use
+ *
+ * Callers may use rdma_block_iter_dma_address() to get each
+ * blocks aligned DMA address.
+ */
+#define rdma_for_each_block(sglist, biter, nents, pgsz) \
+ for (__rdma_block_iter_start(biter, sglist, nents, \
+ pgsz); \
+ __rdma_block_iter_next(biter);)
+
/**
* ib_get_client_data - Get IB client context
* @device:Device to get context for
@@ -2705,9 +2818,6 @@ void ib_set_device_ops(struct ib_device *device,
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
unsigned long pfn, unsigned long size, pgprot_t prot);
-int rdma_user_mmap_page(struct ib_ucontext *ucontext,
- struct vm_area_struct *vma, struct page *page,
- unsigned long size);
#else
static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
struct vm_area_struct *vma,
@@ -2716,12 +2826,6 @@ static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
{
return -EINVAL;
}
-static inline int rdma_user_mmap_page(struct ib_ucontext *ucontext,
- struct vm_area_struct *vma, struct page *page,
- unsigned long size)
-{
- return -EINVAL;
-}
#endif
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
@@ -2978,8 +3082,8 @@ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
*/
static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
{
- return (device->port_data[port_num].immutable.core_cap_flags &
- RDMA_CORE_CAP_OPA_MAD) == RDMA_CORE_CAP_OPA_MAD;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_OPA_MAD;
}
/**
@@ -3195,6 +3299,30 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
return rdma_protocol_iwarp(dev, port_num);
}
+/**
+ * rdma_find_pg_bit - Find page bit given address and HW supported page sizes
+ *
+ * @addr: address
+ * @pgsz_bitmap: bitmap of HW supported page sizes
+ */
+static inline unsigned int rdma_find_pg_bit(unsigned long addr,
+ unsigned long pgsz_bitmap)
+{
+ unsigned long align;
+ unsigned long pgsz;
+
+ align = addr & -addr;
+
+ /* Find page bit such that addr is aligned to the highest supported
+ * HW page size
+ */
+ pgsz = pgsz_bitmap & ~(-align << 1);
+ if (!pgsz)
+ return __ffs(pgsz_bitmap);
+
+ return __fls(pgsz);
+}
+
int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
int state);
int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
@@ -3236,9 +3364,27 @@ enum ib_pd_flags {
struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
const char *caller);
+
#define ib_alloc_pd(device, flags) \
__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
-void ib_dealloc_pd(struct ib_pd *pd);
+
+/**
+ * ib_dealloc_pd_user - Deallocate kernel/user PD
+ * @pd: The protection domain
+ * @udata: Valid user data or NULL for kernel objects
+ */
+void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
+
+/**
+ * ib_dealloc_pd - Deallocate kernel PD
+ * @pd: The protection domain
+ *
+ * NOTE: for user PD use ib_dealloc_pd_user with valid udata!
+ */
+static inline void ib_dealloc_pd(struct ib_pd *pd)
+{
+ ib_dealloc_pd_user(pd, NULL);
+}
enum rdma_create_ah_flags {
/* In a sleepable context */
@@ -3351,11 +3497,24 @@ enum rdma_destroy_ah_flags {
};
/**
- * rdma_destroy_ah - Destroys an address handle.
+ * rdma_destroy_ah_user - Destroys an address handle.
* @ah: The address handle to destroy.
* @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
+ * @udata: Valid user data or NULL for kernel objects
*/
-int rdma_destroy_ah(struct ib_ah *ah, u32 flags);
+int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
+
+/**
+ * rdma_destroy_ah - Destroys an kernel address handle.
+ * @ah: The address handle to destroy.
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
+ *
+ * NOTE: for user ah use rdma_destroy_ah_user with valid udata!
+ */
+static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return rdma_destroy_ah_user(ah, flags, NULL);
+}
/**
* ib_create_srq - Creates a SRQ associated with the specified protection
@@ -3399,10 +3558,22 @@ int ib_query_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr);
/**
- * ib_destroy_srq - Destroys the specified SRQ.
+ * ib_destroy_srq_user - Destroys the specified SRQ.
* @srq: The SRQ to destroy.
+ * @udata: Valid user data or NULL for kernel objects
*/
-int ib_destroy_srq(struct ib_srq *srq);
+int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
+
+/**
+ * ib_destroy_srq - Destroys the specified kernel SRQ.
+ * @srq: The SRQ to destroy.
+ *
+ * NOTE: for user srq use ib_destroy_srq_user with valid udata!
+ */
+static inline int ib_destroy_srq(struct ib_srq *srq)
+{
+ return ib_destroy_srq_user(srq, NULL);
+}
/**
* ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
@@ -3422,15 +3593,34 @@ static inline int ib_post_srq_recv(struct ib_srq *srq,
}
/**
- * ib_create_qp - Creates a QP associated with the specified protection
+ * ib_create_qp_user - Creates a QP associated with the specified protection
* domain.
* @pd: The protection domain associated with the QP.
* @qp_init_attr: A list of initial attributes required to create the
* QP. If QP creation succeeds, then the attributes are updated to
* the actual capabilities of the created QP.
+ * @udata: Valid user data or NULL for kernel objects
*/
-struct ib_qp *ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr);
+struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+
+/**
+ * ib_create_qp - Creates a kernel QP associated with the specified protection
+ * domain.
+ * @pd: The protection domain associated with the QP.
+ * @qp_init_attr: A list of initial attributes required to create the
+ * QP. If QP creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created QP.
+ * @udata: Valid user data or NULL for kernel objects
+ *
+ * NOTE: for user qp use ib_create_qp_user with valid udata!
+ */
+static inline struct ib_qp *ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ return ib_create_qp_user(pd, qp_init_attr, NULL);
+}
/**
* ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
@@ -3480,8 +3670,20 @@ int ib_query_qp(struct ib_qp *qp,
/**
* ib_destroy_qp - Destroys the specified QP.
* @qp: The QP to destroy.
+ * @udata: Valid udata or NULL for kernel objects
*/
-int ib_destroy_qp(struct ib_qp *qp);
+int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata);
+
+/**
+ * ib_destroy_qp - Destroys the specified kernel QP.
+ * @qp: The QP to destroy.
+ *
+ * NOTE: for user qp use ib_destroy_qp_user with valid udata!
+ */
+static inline int ib_destroy_qp(struct ib_qp *qp)
+{
+ return ib_destroy_qp_user(qp, NULL);
+}
/**
* ib_open_qp - Obtain a reference to an existing sharable QP.
@@ -3541,13 +3743,66 @@ static inline int ib_post_recv(struct ib_qp *qp,
return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
}
-struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector,
- enum ib_poll_context poll_ctx, const char *caller);
-#define ib_alloc_cq(device, priv, nr_cqe, comp_vect, poll_ctx) \
- __ib_alloc_cq((device), (priv), (nr_cqe), (comp_vect), (poll_ctx), KBUILD_MODNAME)
+struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector,
+ enum ib_poll_context poll_ctx,
+ const char *caller, struct ib_udata *udata);
+
+/**
+ * ib_alloc_cq_user: Allocate kernel/user CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @comp_vector: Completion vector used for the IRQs
+ * @poll_ctx: Context used for polling the CQ
+ * @udata: Valid user data or NULL for kernel objects
+ */
+static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev,
+ void *private, int nr_cqe,
+ int comp_vector,
+ enum ib_poll_context poll_ctx,
+ struct ib_udata *udata)
+{
+ return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+ KBUILD_MODNAME, udata);
+}
+
+/**
+ * ib_alloc_cq: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @comp_vector: Completion vector used for the IRQs
+ * @poll_ctx: Context used for polling the CQ
+ *
+ * NOTE: for user cq use ib_alloc_cq_user with valid udata!
+ */
+static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector,
+ enum ib_poll_context poll_ctx)
+{
+ return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+ NULL);
+}
+
+/**
+ * ib_free_cq_user - Free kernel/user CQ
+ * @cq: The CQ to free
+ * @udata: Valid user data or NULL for kernel objects
+ */
+void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata);
+
+/**
+ * ib_free_cq - Free kernel CQ
+ * @cq: The CQ to free
+ *
+ * NOTE: for user cq use ib_free_cq_user with valid udata!
+ */
+static inline void ib_free_cq(struct ib_cq *cq)
+{
+ ib_free_cq_user(cq, NULL);
+}
-void ib_free_cq(struct ib_cq *cq);
int ib_process_cq_direct(struct ib_cq *cq, int budget);
/**
@@ -3591,10 +3846,22 @@ int ib_resize_cq(struct ib_cq *cq, int cqe);
int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period);
/**
- * ib_destroy_cq - Destroys the specified CQ.
+ * ib_destroy_cq_user - Destroys the specified CQ.
* @cq: The CQ to destroy.
+ * @udata: Valid user data or NULL for kernel objects
*/
-int ib_destroy_cq(struct ib_cq *cq);
+int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
+
+/**
+ * ib_destroy_cq - Destroys the specified kernel CQ.
+ * @cq: The CQ to destroy.
+ *
+ * NOTE: for user cq use ib_destroy_cq_user with valid udata!
+ */
+static inline int ib_destroy_cq(struct ib_cq *cq)
+{
+ return ib_destroy_cq_user(cq, NULL);
+}
/**
* ib_poll_cq - poll a CQ for completion(s)
@@ -3848,17 +4115,37 @@ static inline void ib_dma_free_coherent(struct ib_device *dev,
}
/**
- * ib_dereg_mr - Deregisters a memory region and removes it from the
+ * ib_dereg_mr_user - Deregisters a memory region and removes it from the
+ * HCA translation table.
+ * @mr: The memory region to deregister.
+ * @udata: Valid user data or NULL for kernel object
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
+ */
+int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata);
+
+/**
+ * ib_dereg_mr - Deregisters a kernel memory region and removes it from the
* HCA translation table.
* @mr: The memory region to deregister.
*
* This function can fail, if the memory region has memory windows bound to it.
+ *
+ * NOTE: for user mr use ib_dereg_mr_user with valid udata!
*/
-int ib_dereg_mr(struct ib_mr *mr);
+static inline int ib_dereg_mr(struct ib_mr *mr)
+{
+ return ib_dereg_mr_user(mr, NULL);
+}
+
+struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata);
-struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg);
+static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type, u32 max_num_sg)
+{
+ return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL);
+}
/**
* ib_update_fast_reg_key - updates the key portion of the fast_reg MR
@@ -3956,8 +4243,9 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
/**
* ib_dealloc_xrcd - Deallocates an XRC domain.
* @xrcd: The XRC domain to deallocate.
+ * @udata: Valid user data or NULL for kernel object
*/
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
static inline int ib_check_mr_access(int flags)
{
@@ -4033,7 +4321,7 @@ struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr);
-int ib_destroy_wq(struct ib_wq *wq);
+int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
u32 wq_attr_mask);
struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
@@ -4349,7 +4637,10 @@ rdma_set_device_sysfs_group(struct ib_device *dev,
*/
static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
{
- return container_of(device, struct ib_device, dev);
+ struct ib_core_device *coredev =
+ container_of(device, struct ib_core_device, dev);
+
+ return coredev->owner;
}
/**
@@ -4362,4 +4653,7 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
*/
#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \
container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member)
+
+bool rdma_dev_access_netns(const struct ib_device *device,
+ const struct net *net);
#endif /* IB_VERBS_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 0e1f02815643..5aa8a9c76aa0 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -118,31 +118,6 @@ enum iw_flags {
IW_F_NO_PORT_MAP = (1 << 0),
};
-struct iw_cm_verbs {
- void (*add_ref)(struct ib_qp *qp);
-
- void (*rem_ref)(struct ib_qp *qp);
-
- struct ib_qp * (*get_qp)(struct ib_device *device,
- int qpn);
-
- int (*connect)(struct iw_cm_id *cm_id,
- struct iw_cm_conn_param *conn_param);
-
- int (*accept)(struct iw_cm_id *cm_id,
- struct iw_cm_conn_param *conn_param);
-
- int (*reject)(struct iw_cm_id *cm_id,
- const void *pdata, u8 pdata_len);
-
- int (*create_listen)(struct iw_cm_id *cm_id,
- int backlog);
-
- int (*destroy_listen)(struct iw_cm_id *cm_id);
- char ifname[IFNAMSIZ];
- enum iw_flags driver_flags;
-};
-
/**
* iw_create_cm_id - Create an IW CM identifier.
*
diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
index b4f0ac02f283..7147a9263011 100644
--- a/include/rdma/opa_port_info.h
+++ b/include/rdma/opa_port_info.h
@@ -413,6 +413,6 @@ struct opa_port_info {
u8 local_port_num;
u8 reserved12;
u8 reserved13; /* was guid_cap */
-} __attribute__ ((packed));
+} __packed;
#endif /* OPA_PORT_INFO_H */
diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h
index f7896117936e..c7b2ef12792d 100644
--- a/include/rdma/opa_smi.h
+++ b/include/rdma/opa_smi.h
@@ -98,7 +98,7 @@ struct opa_smp {
struct opa_node_description {
u8 data[64];
-} __attribute__ ((packed));
+} __packed;
struct opa_node_info {
u8 base_version;
@@ -114,7 +114,7 @@ struct opa_node_info {
__be32 revision;
u8 local_port_num;
u8 vendor_id[3]; /* network byte order */
-} __attribute__ ((packed));
+} __packed;
#define OPA_PARTITION_TABLE_BLK_SIZE 32
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 4c257aff7d32..b9cd06db1a71 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -59,7 +59,6 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_mad.h>
#include <rdma/rdmavt_mr.h>
-#include <rdma/rdmavt_qp.h>
#define RVT_MAX_PKEY_VALUES 16
@@ -72,6 +71,8 @@ struct trap_list {
struct list_head list;
};
+struct rvt_qp;
+struct rvt_qpn_table;
struct rvt_ibport {
struct rvt_qp __rcu *qp[2];
struct ib_mad_agent *send_agent; /* agent for SMI (traps) */
@@ -206,6 +207,20 @@ struct rvt_ah {
u8 log_pmtu;
};
+/*
+ * This structure is used by rvt_mmap() to validate an offset
+ * when an mmap() request is made. The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct rvt_mmap_info {
+ struct list_head pending_mmaps;
+ struct ib_ucontext *context;
+ void *obj;
+ __u64 offset;
+ struct kref ref;
+ u32 size;
+};
+
/* memory working set size */
struct rvt_wss {
unsigned long *entries;
@@ -501,16 +516,6 @@ static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev)
return container_of(ibdev, struct rvt_dev_info, ibdev);
}
-static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq)
-{
- return container_of(ibsrq, struct rvt_srq, ibsrq);
-}
-
-static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp)
-{
- return container_of(ibqp, struct rvt_qp, ibqp);
-}
-
static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
{
/*
@@ -548,57 +553,6 @@ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi,
return rdi->ports[port_index]->pkey_table[index];
}
-/**
- * rvt_lookup_qpn - return the QP with the given QPN
- * @ibp: the ibport
- * @qpn: the QP number to look up
- *
- * The caller must hold the rcu_read_lock(), and keep the lock until
- * the returned qp is no longer in use.
- */
-/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */
-static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
- struct rvt_ibport *rvp,
- u32 qpn) __must_hold(RCU)
-{
- struct rvt_qp *qp = NULL;
-
- if (unlikely(qpn <= 1)) {
- qp = rcu_dereference(rvp->qp[qpn]);
- } else {
- u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits);
-
- for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp;
- qp = rcu_dereference(qp->next))
- if (qp->ibqp.qp_num == qpn)
- break;
- }
- return qp;
-}
-
-/**
- * rvt_mod_retry_timer - mod a retry timer
- * @qp - the QP
- * @shift - timeout shift to wait for multiple packets
- * Modify a potentially already running retry timer
- */
-static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift)
-{
- struct ib_qp *ibqp = &qp->ibqp;
- struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
-
- lockdep_assert_held(&qp->s_lock);
- qp->s_flags |= RVT_S_TIMER;
- /* 4.096 usec. * (1 << qp->timeout) */
- mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies +
- (qp->timeout_jiffies << shift));
-}
-
-static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
-{
- return rvt_mod_retry_timer_ext(qp, 0);
-}
-
struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
void rvt_dealloc_device(struct rvt_dev_info *rdi);
int rvt_register_device(struct rvt_dev_info *rvd, u32 driver_id);
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index f0fbd4063fef..68e38c20afc0 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -83,7 +83,6 @@
* RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating
* next send completion entry not via send DMA
* RVT_S_WAIT_PIO - waiting for a send buffer to be available
- * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets
* RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available
* RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
* RVT_S_WAIT_KMEM - waiting for kernel memory to be available
@@ -212,20 +211,6 @@ struct rvt_rq {
};
/*
- * This structure is used by rvt_mmap() to validate an offset
- * when an mmap() request is made. The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct rvt_mmap_info {
- struct list_head pending_mmaps;
- struct ib_ucontext *context;
- void *obj;
- __u64 offset;
- struct kref ref;
- unsigned size;
-};
-
-/*
* This structure holds the information that the send tasklet needs
* to send a RDMA read response or atomic operation.
*/
@@ -399,6 +384,16 @@ struct rvt_srq {
u32 limit;
};
+static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct rvt_srq, ibsrq);
+}
+
+static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct rvt_qp, ibqp);
+}
+
#define RVT_QPN_MAX BIT(24)
#define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
#define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
@@ -678,6 +673,70 @@ static inline unsigned long rvt_timeout_to_jiffies(u8 timeout)
return usecs_to_jiffies(1U << timeout) * 4096UL / 1000UL;
}
+/**
+ * rvt_lookup_qpn - return the QP with the given QPN
+ * @ibp: the ibport
+ * @qpn: the QP number to look up
+ *
+ * The caller must hold the rcu_read_lock(), and keep the lock until
+ * the returned qp is no longer in use.
+ */
+static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
+ struct rvt_ibport *rvp,
+ u32 qpn) __must_hold(RCU)
+{
+ struct rvt_qp *qp = NULL;
+
+ if (unlikely(qpn <= 1)) {
+ qp = rcu_dereference(rvp->qp[qpn]);
+ } else {
+ u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits);
+
+ for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp;
+ qp = rcu_dereference(qp->next))
+ if (qp->ibqp.qp_num == qpn)
+ break;
+ }
+ return qp;
+}
+
+/**
+ * rvt_mod_retry_timer - mod a retry timer
+ * @qp - the QP
+ * @shift - timeout shift to wait for multiple packets
+ * Modify a potentially already running retry timer
+ */
+static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift)
+{
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+ lockdep_assert_held(&qp->s_lock);
+ qp->s_flags |= RVT_S_TIMER;
+ /* 4.096 usec. * (1 << qp->timeout) */
+ mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies +
+ (qp->timeout_jiffies << shift));
+}
+
+static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
+{
+ return rvt_mod_retry_timer_ext(qp, 0);
+}
+
+/**
+ * rvt_put_qp_swqe - drop refs held by swqe
+ * @qp: the send qp
+ * @wqe: the send wqe
+ *
+ * This drops any references held by the swqe
+ */
+static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+ rvt_put_swqe(wqe);
+ if (qp->allowed_ops == IB_OPCODE_UD)
+ atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
+}
+
extern const int ib_rvt_state_ops[];
struct rvt_dev_info;
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 794c47565971..05eabfd5d0d3 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -48,17 +48,15 @@
#define uobj_get_type(_attrs, _object) \
uapi_get_object((_attrs)->ufile->device->uapi, _object)
-struct ib_uobject *_uobj_get_read(enum uverbs_default_objects type,
- u32 object_id,
- struct uverbs_attr_bundle *attrs);
-
#define uobj_get_read(_type, _id, _attrs) \
- _uobj_get_read(_type, _uobj_check_id(_id), _attrs)
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
+ _uobj_check_id(_id), UVERBS_LOOKUP_READ, \
+ _attrs)
#define ufd_get_read(_type, _fdnum, _attrs) \
rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
(_fdnum)*typecheck(s32, _fdnum), \
- UVERBS_LOOKUP_READ)
+ UVERBS_LOOKUP_READ, _attrs)
static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
{
@@ -70,22 +68,19 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
((struct ib_##_object *)_uobj_get_obj_read( \
uobj_get_read(_type, _id, _attrs)))
-struct ib_uobject *_uobj_get_write(enum uverbs_default_objects type,
- u32 object_id,
- struct uverbs_attr_bundle *attrs);
-
#define uobj_get_write(_type, _id, _attrs) \
- _uobj_get_write(_type, _uobj_check_id(_id), _attrs)
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
+ _uobj_check_id(_id), UVERBS_LOOKUP_WRITE, \
+ _attrs)
int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
- const struct uverbs_attr_bundle *attrs);
+ struct uverbs_attr_bundle *attrs);
#define uobj_perform_destroy(_type, _id, _attrs) \
__uobj_perform_destroy(uobj_get_type(_attrs, _type), \
_uobj_check_id(_id), _attrs)
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
- u32 id,
- const struct uverbs_attr_bundle *attrs);
+ u32 id, struct uverbs_attr_bundle *attrs);
#define uobj_get_destroy(_type, _id, _attrs) \
__uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id), \
@@ -109,30 +104,31 @@ static inline void uobj_put_write(struct ib_uobject *uobj)
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
}
-static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj)
+static inline int __must_check
+uobj_alloc_commit(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
{
- int ret = rdma_alloc_commit_uobject(uobj);
+ int ret = rdma_alloc_commit_uobject(uobj, attrs);
if (ret)
return ret;
return 0;
}
-static inline void uobj_alloc_abort(struct ib_uobject *uobj)
+static inline void uobj_alloc_abort(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs)
{
- rdma_alloc_abort_uobject(uobj);
+ rdma_alloc_abort_uobject(uobj, attrs);
}
static inline struct ib_uobject *
__uobj_alloc(const struct uverbs_api_object *obj,
struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev)
{
- struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs->ufile);
+ struct ib_uobject *uobj =
+ rdma_alloc_begin_uobject(obj, attrs->ufile, attrs);
- if (!IS_ERR(uobj)) {
- *ib_dev = uobj->context->device;
- attrs->context = uobj->context;
- }
+ if (!IS_ERR(uobj))
+ *ib_dev = attrs->context->device;
return uobj;
}
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 175d761695e1..d57a5ba00c74 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -95,7 +95,8 @@ struct uverbs_obj_type_class {
void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode);
/* This does not consume the kref on uobj */
int __must_check (*destroy_hw)(struct ib_uobject *uobj,
- enum rdma_remove_reason why);
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs);
void (*remove_handle)(struct ib_uobject *uobj);
u8 needs_kfree_rcu;
};
@@ -126,18 +127,23 @@ struct uverbs_obj_idr_type {
* completely unchanged.
*/
int __must_check (*destroy_object)(struct ib_uobject *uobj,
- enum rdma_remove_reason why);
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs);
};
struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
struct ib_uverbs_file *ufile, s64 id,
- enum rdma_lookup_mode mode);
+ enum rdma_lookup_mode mode,
+ struct uverbs_attr_bundle *attrs);
void rdma_lookup_put_uobject(struct ib_uobject *uobj,
enum rdma_lookup_mode mode);
struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
- struct ib_uverbs_file *ufile);
-void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
-int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj);
+ struct ib_uverbs_file *ufile,
+ struct uverbs_attr_bundle *attrs);
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs);
+int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs);
struct uverbs_obj_fd_type {
/*
diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h
new file mode 100644
index 000000000000..59363a083ecb
--- /dev/null
+++ b/include/trace/events/ib_mad.h
@@ -0,0 +1,390 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+
+/*
+ * Copyright (c) 2018 Intel Corporation. All rights reserved.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ib_mad
+
+#if !defined(_TRACE_IB_MAD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IB_MAD_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_mad.h>
+
+#ifdef CONFIG_TRACEPOINTS
+struct trace_event_raw_ib_mad_send_template;
+static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_qp_info *qp_info,
+ struct trace_event_raw_ib_mad_send_template *entry);
+#endif
+
+DECLARE_EVENT_CLASS(ib_mad_send_template,
+ TP_PROTO(struct ib_mad_send_wr_private *wr,
+ struct ib_mad_qp_info *qp_info),
+ TP_ARGS(wr, qp_info),
+
+ TP_STRUCT__entry(
+ __field(u8, base_version)
+ __field(u8, mgmt_class)
+ __field(u8, class_version)
+ __field(u8, port_num)
+ __field(u32, qp_num)
+ __field(u8, method)
+ __field(u8, sl)
+ __field(u16, attr_id)
+ __field(u32, attr_mod)
+ __field(u64, wrtid)
+ __field(u64, tid)
+ __field(u16, status)
+ __field(u16, class_specific)
+ __field(u32, length)
+ __field(u32, dlid)
+ __field(u32, rqpn)
+ __field(u32, rqkey)
+ __field(u32, dev_index)
+ __field(void *, agent_priv)
+ __field(unsigned long, timeout)
+ __field(int, retries_left)
+ __field(int, max_retries)
+ __field(int, retry)
+ __field(u16, pkey)
+ ),
+
+ TP_fast_assign(
+ __entry->dev_index = wr->mad_agent_priv->agent.device->index;
+ __entry->port_num = wr->mad_agent_priv->agent.port_num;
+ __entry->qp_num = wr->mad_agent_priv->qp_info->qp->qp_num;
+ __entry->agent_priv = wr->mad_agent_priv;
+ __entry->wrtid = wr->tid;
+ __entry->max_retries = wr->max_retries;
+ __entry->retries_left = wr->retries_left;
+ __entry->retry = wr->retry;
+ __entry->timeout = wr->timeout;
+ __entry->length = wr->send_buf.hdr_len +
+ wr->send_buf.data_len;
+ __entry->base_version =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->base_version;
+ __entry->mgmt_class =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->mgmt_class;
+ __entry->class_version =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->class_version;
+ __entry->method =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->method;
+ __entry->status =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->status;
+ __entry->class_specific =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->class_specific;
+ __entry->tid = ((struct ib_mad_hdr *)wr->send_buf.mad)->tid;
+ __entry->attr_id =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->attr_id;
+ __entry->attr_mod =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->attr_mod;
+ create_mad_addr_info(wr, qp_info, __entry);
+ ),
+
+ TP_printk("%d:%d QP%d agent %p: " \
+ "wrtid 0x%llx; %d/%d retries(%d); timeout %lu length %d : " \
+ "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \
+ "method 0x%x status 0x%x class_specific 0x%x tid 0x%llx " \
+ "attr_id 0x%x attr_mod 0x%x => dlid 0x%08x sl %d "\
+ "pkey 0x%x rpqn 0x%x rqpkey 0x%x",
+ __entry->dev_index, __entry->port_num, __entry->qp_num,
+ __entry->agent_priv, be64_to_cpu(__entry->wrtid),
+ __entry->retries_left, __entry->max_retries,
+ __entry->retry, __entry->timeout, __entry->length,
+ __entry->base_version, __entry->mgmt_class,
+ __entry->class_version,
+ __entry->method, be16_to_cpu(__entry->status),
+ be16_to_cpu(__entry->class_specific),
+ be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
+ be32_to_cpu(__entry->attr_mod),
+ be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey,
+ __entry->rqpn, __entry->rqkey
+ )
+);
+
+DEFINE_EVENT(ib_mad_send_template, ib_mad_error_handler,
+ TP_PROTO(struct ib_mad_send_wr_private *wr,
+ struct ib_mad_qp_info *qp_info),
+ TP_ARGS(wr, qp_info));
+DEFINE_EVENT(ib_mad_send_template, ib_mad_ib_send_mad,
+ TP_PROTO(struct ib_mad_send_wr_private *wr,
+ struct ib_mad_qp_info *qp_info),
+ TP_ARGS(wr, qp_info));
+DEFINE_EVENT(ib_mad_send_template, ib_mad_send_done_resend,
+ TP_PROTO(struct ib_mad_send_wr_private *wr,
+ struct ib_mad_qp_info *qp_info),
+ TP_ARGS(wr, qp_info));
+
+TRACE_EVENT(ib_mad_send_done_handler,
+ TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_wc *wc),
+ TP_ARGS(wr, wc),
+
+ TP_STRUCT__entry(
+ __field(u8, port_num)
+ __field(u8, base_version)
+ __field(u8, mgmt_class)
+ __field(u8, class_version)
+ __field(u32, qp_num)
+ __field(u64, wrtid)
+ __field(u16, status)
+ __field(u16, wc_status)
+ __field(u32, length)
+ __field(void *, agent_priv)
+ __field(unsigned long, timeout)
+ __field(u32, dev_index)
+ __field(int, retries_left)
+ __field(int, max_retries)
+ __field(int, retry)
+ __field(u8, method)
+ ),
+
+ TP_fast_assign(
+ __entry->dev_index = wr->mad_agent_priv->agent.device->index;
+ __entry->port_num = wr->mad_agent_priv->agent.port_num;
+ __entry->qp_num = wr->mad_agent_priv->qp_info->qp->qp_num;
+ __entry->agent_priv = wr->mad_agent_priv;
+ __entry->wrtid = wr->tid;
+ __entry->max_retries = wr->max_retries;
+ __entry->retries_left = wr->retries_left;
+ __entry->retry = wr->retry;
+ __entry->timeout = wr->timeout;
+ __entry->base_version =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->base_version;
+ __entry->mgmt_class =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->mgmt_class;
+ __entry->class_version =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->class_version;
+ __entry->method =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->method;
+ __entry->status =
+ ((struct ib_mad_hdr *)wr->send_buf.mad)->status;
+ __entry->wc_status = wc->status;
+ __entry->length = wc->byte_len;
+ ),
+
+ TP_printk("%d:%d QP%d : SEND WC Status %d : agent %p: " \
+ "wrtid 0x%llx %d/%d retries(%d) timeout %lu length %d: " \
+ "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \
+ "method 0x%x status 0x%x",
+ __entry->dev_index, __entry->port_num, __entry->qp_num,
+ __entry->wc_status,
+ __entry->agent_priv, be64_to_cpu(__entry->wrtid),
+ __entry->retries_left, __entry->max_retries,
+ __entry->retry, __entry->timeout,
+ __entry->length,
+ __entry->base_version, __entry->mgmt_class,
+ __entry->class_version, __entry->method,
+ be16_to_cpu(__entry->status)
+ )
+);
+
+TRACE_EVENT(ib_mad_recv_done_handler,
+ TP_PROTO(struct ib_mad_qp_info *qp_info, struct ib_wc *wc,
+ struct ib_mad_hdr *mad_hdr),
+ TP_ARGS(qp_info, wc, mad_hdr),
+
+ TP_STRUCT__entry(
+ __field(u8, base_version)
+ __field(u8, mgmt_class)
+ __field(u8, class_version)
+ __field(u8, port_num)
+ __field(u32, qp_num)
+ __field(u16, status)
+ __field(u16, class_specific)
+ __field(u32, length)
+ __field(u64, tid)
+ __field(u8, method)
+ __field(u8, sl)
+ __field(u16, attr_id)
+ __field(u32, attr_mod)
+ __field(u16, src_qp)
+ __field(u16, wc_status)
+ __field(u32, slid)
+ __field(u32, dev_index)
+ __field(u16, pkey)
+ ),
+
+ TP_fast_assign(
+ __entry->dev_index = qp_info->port_priv->device->index;
+ __entry->port_num = qp_info->port_priv->port_num;
+ __entry->qp_num = qp_info->qp->qp_num;
+ __entry->length = wc->byte_len;
+ __entry->base_version = mad_hdr->base_version;
+ __entry->mgmt_class = mad_hdr->mgmt_class;
+ __entry->class_version = mad_hdr->class_version;
+ __entry->method = mad_hdr->method;
+ __entry->status = mad_hdr->status;
+ __entry->class_specific = mad_hdr->class_specific;
+ __entry->tid = mad_hdr->tid;
+ __entry->attr_id = mad_hdr->attr_id;
+ __entry->attr_mod = mad_hdr->attr_mod;
+ __entry->slid = wc->slid;
+ __entry->src_qp = wc->src_qp;
+ __entry->sl = wc->sl;
+ ib_query_pkey(qp_info->port_priv->device,
+ qp_info->port_priv->port_num,
+ wc->pkey_index, &__entry->pkey);
+ __entry->wc_status = wc->status;
+ ),
+
+ TP_printk("%d:%d QP%d : RECV WC Status %d : length %d : hdr : " \
+ "base_ver 0x%02x class 0x%02x class_ver 0x%02x " \
+ "method 0x%02x status 0x%04x class_specific 0x%04x " \
+ "tid 0x%016llx attr_id 0x%04x attr_mod 0x%08x " \
+ "slid 0x%08x src QP%d, sl %d pkey 0x%04x",
+ __entry->dev_index, __entry->port_num, __entry->qp_num,
+ __entry->wc_status,
+ __entry->length,
+ __entry->base_version, __entry->mgmt_class,
+ __entry->class_version, __entry->method,
+ be16_to_cpu(__entry->status),
+ be16_to_cpu(__entry->class_specific),
+ be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
+ be32_to_cpu(__entry->attr_mod),
+ __entry->slid, __entry->src_qp, __entry->sl, __entry->pkey
+ )
+);
+
+DECLARE_EVENT_CLASS(ib_mad_agent_template,
+ TP_PROTO(struct ib_mad_agent_private *agent),
+ TP_ARGS(agent),
+
+ TP_STRUCT__entry(
+ __field(u32, dev_index)
+ __field(u32, hi_tid)
+ __field(u8, port_num)
+ __field(u8, mgmt_class)
+ __field(u8, mgmt_class_version)
+ ),
+
+ TP_fast_assign(
+ __entry->dev_index = agent->agent.device->index;
+ __entry->port_num = agent->agent.port_num;
+ __entry->hi_tid = agent->agent.hi_tid;
+
+ if (agent->reg_req) {
+ __entry->mgmt_class = agent->reg_req->mgmt_class;
+ __entry->mgmt_class_version =
+ agent->reg_req->mgmt_class_version;
+ } else {
+ __entry->mgmt_class = 0;
+ __entry->mgmt_class_version = 0;
+ }
+ ),
+
+ TP_printk("%d:%d mad agent : hi_tid 0x%08x class 0x%02x class_ver 0x%02x",
+ __entry->dev_index, __entry->port_num,
+ __entry->hi_tid, __entry->mgmt_class,
+ __entry->mgmt_class_version
+ )
+);
+DEFINE_EVENT(ib_mad_agent_template, ib_mad_recv_done_agent,
+ TP_PROTO(struct ib_mad_agent_private *agent),
+ TP_ARGS(agent));
+DEFINE_EVENT(ib_mad_agent_template, ib_mad_send_done_agent,
+ TP_PROTO(struct ib_mad_agent_private *agent),
+ TP_ARGS(agent));
+DEFINE_EVENT(ib_mad_agent_template, ib_mad_create_agent,
+ TP_PROTO(struct ib_mad_agent_private *agent),
+ TP_ARGS(agent));
+DEFINE_EVENT(ib_mad_agent_template, ib_mad_unregister_agent,
+ TP_PROTO(struct ib_mad_agent_private *agent),
+ TP_ARGS(agent));
+
+
+
+DECLARE_EVENT_CLASS(ib_mad_opa_smi_template,
+ TP_PROTO(struct opa_smp *smp),
+ TP_ARGS(smp),
+
+ TP_STRUCT__entry(
+ __field(u64, mkey)
+ __field(u32, dr_slid)
+ __field(u32, dr_dlid)
+ __field(u8, hop_ptr)
+ __field(u8, hop_cnt)
+ __array(u8, initial_path, OPA_SMP_MAX_PATH_HOPS)
+ __array(u8, return_path, OPA_SMP_MAX_PATH_HOPS)
+ ),
+
+ TP_fast_assign(
+ __entry->hop_ptr = smp->hop_ptr;
+ __entry->hop_cnt = smp->hop_cnt;
+ __entry->mkey = smp->mkey;
+ __entry->dr_slid = smp->route.dr.dr_slid;
+ __entry->dr_dlid = smp->route.dr.dr_dlid;
+ memcpy(__entry->initial_path, smp->route.dr.initial_path,
+ OPA_SMP_MAX_PATH_HOPS);
+ memcpy(__entry->return_path, smp->route.dr.return_path,
+ OPA_SMP_MAX_PATH_HOPS);
+ ),
+
+ TP_printk("OPA SMP: hop_ptr %d hop_cnt %d " \
+ "mkey 0x%016llx dr_slid 0x%08x dr_dlid 0x%08x " \
+ "initial_path %*ph return_path %*ph ",
+ __entry->hop_ptr, __entry->hop_cnt,
+ be64_to_cpu(__entry->mkey), be32_to_cpu(__entry->dr_slid),
+ be32_to_cpu(__entry->dr_dlid),
+ OPA_SMP_MAX_PATH_HOPS, __entry->initial_path,
+ OPA_SMP_MAX_PATH_HOPS, __entry->return_path
+ )
+);
+
+DEFINE_EVENT(ib_mad_opa_smi_template, ib_mad_handle_opa_smi,
+ TP_PROTO(struct opa_smp *smp),
+ TP_ARGS(smp));
+DEFINE_EVENT(ib_mad_opa_smi_template, ib_mad_handle_out_opa_smi,
+ TP_PROTO(struct opa_smp *smp),
+ TP_ARGS(smp));
+
+
+DECLARE_EVENT_CLASS(ib_mad_opa_ib_template,
+ TP_PROTO(struct ib_smp *smp),
+ TP_ARGS(smp),
+
+ TP_STRUCT__entry(
+ __field(u64, mkey)
+ __field(u32, dr_slid)
+ __field(u32, dr_dlid)
+ __field(u8, hop_ptr)
+ __field(u8, hop_cnt)
+ __array(u8, initial_path, IB_SMP_MAX_PATH_HOPS)
+ __array(u8, return_path, IB_SMP_MAX_PATH_HOPS)
+ ),
+
+ TP_fast_assign(
+ __entry->hop_ptr = smp->hop_ptr;
+ __entry->hop_cnt = smp->hop_cnt;
+ __entry->mkey = smp->mkey;
+ __entry->dr_slid = smp->dr_slid;
+ __entry->dr_dlid = smp->dr_dlid;
+ memcpy(__entry->initial_path, smp->initial_path,
+ IB_SMP_MAX_PATH_HOPS);
+ memcpy(__entry->return_path, smp->return_path,
+ IB_SMP_MAX_PATH_HOPS);
+ ),
+
+ TP_printk("OPA SMP: hop_ptr %d hop_cnt %d " \
+ "mkey 0x%016llx dr_slid 0x%04x dr_dlid 0x%04x " \
+ "initial_path %*ph return_path %*ph ",
+ __entry->hop_ptr, __entry->hop_cnt,
+ be64_to_cpu(__entry->mkey), be16_to_cpu(__entry->dr_slid),
+ be16_to_cpu(__entry->dr_dlid),
+ IB_SMP_MAX_PATH_HOPS, __entry->initial_path,
+ IB_SMP_MAX_PATH_HOPS, __entry->return_path
+ )
+);
+
+DEFINE_EVENT(ib_mad_opa_ib_template, ib_mad_handle_ib_smi,
+ TP_PROTO(struct ib_smp *smp),
+ TP_ARGS(smp));
+DEFINE_EVENT(ib_mad_opa_ib_template, ib_mad_handle_out_ib_smi,
+ TP_PROTO(struct ib_smp *smp),
+ TP_ARGS(smp));
+
+#endif /* _TRACE_IB_MAD_H */
+
+#include <trace/define_trace.h>
diff --git a/include/trace/events/ib_umad.h b/include/trace/events/ib_umad.h
new file mode 100644
index 000000000000..c393a19a0f60
--- /dev/null
+++ b/include/trace/events/ib_umad.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+
+/*
+ * Copyright (c) 2018 Intel Corporation. All rights reserved.
+ *
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ib_umad
+
+#if !defined(_TRACE_IB_UMAD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IB_UMAD_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(ib_umad_template,
+ TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr,
+ struct ib_mad_hdr *mad_hdr),
+ TP_ARGS(file, umad_hdr, mad_hdr),
+
+ TP_STRUCT__entry(
+ __field(u8, port_num)
+ __field(u8, sl)
+ __field(u8, path_bits)
+ __field(u8, grh_present)
+ __field(u32, id)
+ __field(u32, status)
+ __field(u32, timeout_ms)
+ __field(u32, retires)
+ __field(u32, length)
+ __field(u32, qpn)
+ __field(u32, qkey)
+ __field(u8, gid_index)
+ __field(u8, hop_limit)
+ __field(u16, lid)
+ __field(u16, attr_id)
+ __field(u16, pkey_index)
+ __field(u8, base_version)
+ __field(u8, mgmt_class)
+ __field(u8, class_version)
+ __field(u8, method)
+ __field(u32, flow_label)
+ __field(u16, mad_status)
+ __field(u16, class_specific)
+ __field(u32, attr_mod)
+ __field(u64, tid)
+ __array(u8, gid, 16)
+ __field(u32, dev_index)
+ __field(u8, traffic_class)
+ ),
+
+ TP_fast_assign(
+ __entry->dev_index = file->port->ib_dev->index;
+ __entry->port_num = file->port->port_num;
+
+ __entry->id = umad_hdr->id;
+ __entry->status = umad_hdr->status;
+ __entry->timeout_ms = umad_hdr->timeout_ms;
+ __entry->retires = umad_hdr->retries;
+ __entry->length = umad_hdr->length;
+ __entry->qpn = umad_hdr->qpn;
+ __entry->qkey = umad_hdr->qkey;
+ __entry->lid = umad_hdr->lid;
+ __entry->sl = umad_hdr->sl;
+ __entry->path_bits = umad_hdr->path_bits;
+ __entry->grh_present = umad_hdr->grh_present;
+ __entry->gid_index = umad_hdr->gid_index;
+ __entry->hop_limit = umad_hdr->hop_limit;
+ __entry->traffic_class = umad_hdr->traffic_class;
+ memcpy(__entry->gid, umad_hdr->gid, sizeof(umad_hdr->gid));
+ __entry->flow_label = umad_hdr->flow_label;
+ __entry->pkey_index = umad_hdr->pkey_index;
+
+ __entry->base_version = mad_hdr->base_version;
+ __entry->mgmt_class = mad_hdr->mgmt_class;
+ __entry->class_version = mad_hdr->class_version;
+ __entry->method = mad_hdr->method;
+ __entry->mad_status = mad_hdr->status;
+ __entry->class_specific = mad_hdr->class_specific;
+ __entry->tid = mad_hdr->tid;
+ __entry->attr_id = mad_hdr->attr_id;
+ __entry->attr_mod = mad_hdr->attr_mod;
+ ),
+
+ TP_printk("%d:%d umad_hdr: id 0x%08x status 0x%08x ms %u ret %u " \
+ "len %u QP%u qkey 0x%08x lid 0x%04x sl %u path_bits 0x%x " \
+ "grh 0x%x gidi %u hop_lim %u traf_cl %u gid %pI6c " \
+ "flow 0x%08x pkeyi %u MAD: base_ver 0x%x class 0x%x " \
+ "class_ver 0x%x method 0x%x status 0x%04x " \
+ "class_specific 0x%04x tid 0x%016llx attr_id 0x%04x " \
+ "attr_mod 0x%08x ",
+ __entry->dev_index, __entry->port_num,
+ __entry->id, __entry->status, __entry->timeout_ms,
+ __entry->retires, __entry->length, be32_to_cpu(__entry->qpn),
+ be32_to_cpu(__entry->qkey), be16_to_cpu(__entry->lid),
+ __entry->sl, __entry->path_bits, __entry->grh_present,
+ __entry->gid_index, __entry->hop_limit,
+ __entry->traffic_class, &__entry->gid,
+ be32_to_cpu(__entry->flow_label), __entry->pkey_index,
+ __entry->base_version, __entry->mgmt_class,
+ __entry->class_version, __entry->method,
+ be16_to_cpu(__entry->mad_status),
+ be16_to_cpu(__entry->class_specific),
+ be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
+ be32_to_cpu(__entry->attr_mod)
+ )
+);
+
+DEFINE_EVENT(ib_umad_template, ib_umad_write,
+ TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr,
+ struct ib_mad_hdr *mad_hdr),
+ TP_ARGS(file, umad_hdr, mad_hdr));
+
+DEFINE_EVENT(ib_umad_template, ib_umad_read_recv,
+ TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr,
+ struct ib_mad_hdr *mad_hdr),
+ TP_ARGS(file, umad_hdr, mad_hdr));
+
+DEFINE_EVENT(ib_umad_template, ib_umad_read_send,
+ TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr,
+ struct ib_mad_hdr *mad_hdr),
+ TP_ARGS(file, umad_hdr, mad_hdr));
+
+#endif /* _TRACE_IB_UMAD_H */
+
+#include <trace/define_trace.h>
diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h
new file mode 100644
index 000000000000..9599a2a62be8
--- /dev/null
+++ b/include/uapi/rdma/efa-abi.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/*
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef EFA_ABI_USER_H
+#define EFA_ABI_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define EFA_UVERBS_ABI_VERSION 1
+
+/*
+ * Keep structs aligned to 8 bytes.
+ * Keep reserved fields as arrays of __u8 named reserved_XXX where XXX is the
+ * hex bit offset of the field.
+ */
+
+enum efa_ibv_user_cmds_supp_udata {
+ EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE = 1 << 0,
+ EFA_USER_CMDS_SUPP_UDATA_CREATE_AH = 1 << 1,
+};
+
+struct efa_ibv_alloc_ucontext_resp {
+ __u32 comp_mask;
+ __u32 cmds_supp_udata_mask;
+ __u16 sub_cqs_per_cq;
+ __u16 inline_buf_size;
+ __u32 max_llq_size; /* bytes */
+};
+
+struct efa_ibv_alloc_pd_resp {
+ __u32 comp_mask;
+ __u16 pdn;
+ __u8 reserved_30[2];
+};
+
+struct efa_ibv_create_cq {
+ __u32 comp_mask;
+ __u32 cq_entry_size;
+ __u16 num_sub_cqs;
+ __u8 reserved_50[6];
+};
+
+struct efa_ibv_create_cq_resp {
+ __u32 comp_mask;
+ __u8 reserved_20[4];
+ __aligned_u64 q_mmap_key;
+ __aligned_u64 q_mmap_size;
+ __u16 cq_idx;
+ __u8 reserved_d0[6];
+};
+
+enum {
+ EFA_QP_DRIVER_TYPE_SRD = 0,
+};
+
+struct efa_ibv_create_qp {
+ __u32 comp_mask;
+ __u32 rq_ring_size; /* bytes */
+ __u32 sq_ring_size; /* bytes */
+ __u32 driver_qp_type;
+};
+
+struct efa_ibv_create_qp_resp {
+ __u32 comp_mask;
+ /* the offset inside the page of the rq db */
+ __u32 rq_db_offset;
+ /* the offset inside the page of the sq db */
+ __u32 sq_db_offset;
+ /* the offset inside the page of descriptors buffer */
+ __u32 llq_desc_offset;
+ __aligned_u64 rq_mmap_key;
+ __aligned_u64 rq_mmap_size;
+ __aligned_u64 rq_db_mmap_key;
+ __aligned_u64 sq_db_mmap_key;
+ __aligned_u64 llq_desc_mmap_key;
+ __u16 send_sub_cq_idx;
+ __u16 recv_sub_cq_idx;
+ __u8 reserved_1e0[4];
+};
+
+struct efa_ibv_create_ah_resp {
+ __u32 comp_mask;
+ __u16 efa_address_handle;
+ __u8 reserved_30[2];
+};
+
+struct efa_ibv_ex_query_device_resp {
+ __u32 comp_mask;
+ __u32 max_sq_wr;
+ __u32 max_rq_wr;
+ __u16 max_sq_sge;
+ __u16 max_rq_sge;
+};
+
+#endif /* EFA_ABI_USER_H */
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index f4d4010b7e3e..624f5b53eb1f 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -360,6 +360,7 @@ enum mlx5_ib_create_qp_resp_mask {
MLX5_IB_CREATE_QP_RESP_MASK_TISN = 1UL << 1,
MLX5_IB_CREATE_QP_RESP_MASK_RQN = 1UL << 2,
MLX5_IB_CREATE_QP_RESP_MASK_SQN = 1UL << 3,
+ MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR = 1UL << 4,
};
struct mlx5_ib_create_qp_resp {
@@ -371,6 +372,7 @@ struct mlx5_ib_create_qp_resp {
__u32 rqn;
__u32 sqn;
__u32 reserved1;
+ __u64 tir_icm_addr;
};
struct mlx5_ib_alloc_mw {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 8149d224030b..d404c951954c 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -44,6 +44,7 @@ enum mlx5_ib_create_flow_action_attrs {
enum mlx5_ib_alloc_dm_attrs {
MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT),
MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
};
enum mlx5_ib_devx_methods {
@@ -144,6 +145,7 @@ enum mlx5_ib_flow_matcher_create_attrs {
MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
};
enum mlx5_ib_flow_matcher_destroy_attrs {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index 4a701033b93f..a8f34c237458 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -42,6 +42,7 @@ enum mlx5_ib_uapi_flow_action_flags {
enum mlx5_ib_uapi_flow_table_type {
MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0,
MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1,
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB = 0x2,
};
enum mlx5_ib_uapi_flow_action_packet_reformat_type {
@@ -56,5 +57,11 @@ struct mlx5_ib_uapi_devx_async_cmd_hdr {
__u8 out_data[];
};
+enum mlx5_ib_uapi_dm_type {
+ MLX5_IB_UAPI_DM_TYPE_MEMIC,
+ MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM,
+ MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM,
+};
+
#endif
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 5cc592728071..42a8bdc40a14 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -49,17 +49,6 @@ enum {
RDMA_NL_IWPM_NUM_OPS
};
-struct rdma_cm_id_stats {
- __u32 qp_num;
- __u32 bound_dev_if;
- __u32 port_space;
- __s32 pid;
- __u8 cm_state;
- __u8 node_type;
- __u8 port_num;
- __u8 qp_type;
-};
-
enum {
IWPM_NLA_REG_PID_UNSPEC = 0,
IWPM_NLA_REG_PID_SEQ,
@@ -261,7 +250,10 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_PORT_GET, /* can dump */
- /* 6 - 8 are free to use */
+ RDMA_NLDEV_CMD_SYS_GET, /* can dump */
+ RDMA_NLDEV_CMD_SYS_SET,
+
+ /* 8 is free to use */
RDMA_NLDEV_CMD_RES_GET = 9, /* can dump */
@@ -473,6 +465,21 @@ enum rdma_nldev_attr {
RDMA_NLDEV_ATTR_LINK_TYPE, /* string */
/*
+ * net namespace mode for rdma subsystem:
+ * either shared or exclusive among multiple net namespaces.
+ */
+ RDMA_NLDEV_SYS_ATTR_NETNS_MODE, /* u8 */
+ /*
+ * Device protocol, e.g. ib, iw, usnic, roce and opa
+ */
+ RDMA_NLDEV_ATTR_DEV_PROTOCOL, /* string */
+
+ /*
+ * File descriptor handle of the net namespace object
+ */
+ RDMA_NLDEV_NET_NS_FD, /* u32 */
+
+ /*
* Always the end
*/
RDMA_NLDEV_ATTR_MAX
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 06c34d99be85..26213f49f5c8 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -102,6 +102,7 @@ enum rdma_driver_id {
RDMA_DRIVER_RXE,
RDMA_DRIVER_HFI1,
RDMA_DRIVER_QIB,
+ RDMA_DRIVER_EFA,
};
#endif