diff options
Diffstat (limited to 'drivers/infiniband/hw/mana')
| -rw-r--r-- | drivers/infiniband/hw/mana/Kconfig | 10 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mana/Makefile | 4 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mana/cq.c | 79 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mana/device.c | 117 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mana/main.c | 521 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mana/mana_ib.h | 162 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mana/mr.c | 197 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mana/qp.c | 506 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mana/wq.c | 115 | 
9 files changed, 1711 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mana/Kconfig b/drivers/infiniband/hw/mana/Kconfig new file mode 100644 index 000000000000..546640657bac --- /dev/null +++ b/drivers/infiniband/hw/mana/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +config MANA_INFINIBAND +	tristate "Microsoft Azure Network Adapter support" +	depends on NETDEVICES && ETHERNET && PCI && MICROSOFT_MANA +	help +	  This driver provides low-level RDMA support for Microsoft Azure +	  Network Adapter (MANA). MANA supports RDMA features that can be used +	  for workloads (e.g. DPDK, MPI etc) that uses RDMA verbs to directly +	  access hardware from user-mode processes in Microsoft Azure cloud +	  environment. diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile new file mode 100644 index 000000000000..88655fe5e398 --- /dev/null +++ b/drivers/infiniband/hw/mana/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o + +mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c new file mode 100644 index 000000000000..d141cab8a1e6 --- /dev/null +++ b/drivers/infiniband/hw/mana/cq.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, +		      struct ib_udata *udata) +{ +	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); +	struct ib_device *ibdev = ibcq->device; +	struct mana_ib_create_cq ucmd = {}; +	struct mana_ib_dev *mdev; +	int err; + +	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + +	if (udata->inlen < sizeof(ucmd)) +		return -EINVAL; + +	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); +	if (err) { +		ibdev_dbg(ibdev, +			  "Failed to copy from udata for create cq, %d\n", err); +		return err; +	} + +	if (attr->cqe > MAX_SEND_BUFFERS_PER_QUEUE) { +		ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); +		return -EINVAL; +	} + +	cq->cqe = attr->cqe; +	cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, +			       IB_ACCESS_LOCAL_WRITE); +	if (IS_ERR(cq->umem)) { +		err = PTR_ERR(cq->umem); +		ibdev_dbg(ibdev, "Failed to get umem for create cq, err %d\n", +			  err); +		return err; +	} + +	err = mana_ib_gd_create_dma_region(mdev, cq->umem, &cq->gdma_region); +	if (err) { +		ibdev_dbg(ibdev, +			  "Failed to create dma region for create cq, %d\n", +			  err); +		goto err_release_umem; +	} + +	ibdev_dbg(ibdev, +		  "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n", +		  err, cq->gdma_region); + +	/* +	 * The CQ ID is not known at this time. The ID is generated at create_qp +	 */ + +	return 0; + +err_release_umem: +	ib_umem_release(cq->umem); +	return err; +} + +int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +{ +	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); +	struct ib_device *ibdev = ibcq->device; +	struct mana_ib_dev *mdev; + +	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + +	mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region); +	ib_umem_release(cq->umem); + +	return 0; +} diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c new file mode 100644 index 000000000000..d4541b8707e4 --- /dev/null +++ b/drivers/infiniband/hw/mana/device.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" +#include <net/mana/mana_auxiliary.h> + +MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver"); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(NET_MANA); + +static const struct ib_device_ops mana_ib_dev_ops = { +	.owner = THIS_MODULE, +	.driver_id = RDMA_DRIVER_MANA, +	.uverbs_abi_ver = MANA_IB_UVERBS_ABI_VERSION, + +	.alloc_pd = mana_ib_alloc_pd, +	.alloc_ucontext = mana_ib_alloc_ucontext, +	.create_cq = mana_ib_create_cq, +	.create_qp = mana_ib_create_qp, +	.create_rwq_ind_table = mana_ib_create_rwq_ind_table, +	.create_wq = mana_ib_create_wq, +	.dealloc_pd = mana_ib_dealloc_pd, +	.dealloc_ucontext = mana_ib_dealloc_ucontext, +	.dereg_mr = mana_ib_dereg_mr, +	.destroy_cq = mana_ib_destroy_cq, +	.destroy_qp = mana_ib_destroy_qp, +	.destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table, +	.destroy_wq = mana_ib_destroy_wq, +	.disassociate_ucontext = mana_ib_disassociate_ucontext, +	.get_port_immutable = mana_ib_get_port_immutable, +	.mmap = mana_ib_mmap, +	.modify_qp = mana_ib_modify_qp, +	.modify_wq = mana_ib_modify_wq, +	.query_device = mana_ib_query_device, +	.query_gid = mana_ib_query_gid, +	.query_port = mana_ib_query_port, +	.reg_user_mr = mana_ib_reg_user_mr, + +	INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq), +	INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd), +	INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp), +	INIT_RDMA_OBJ_SIZE(ib_ucontext, mana_ib_ucontext, ibucontext), +	INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mana_ib_rwq_ind_table, +			   ib_ind_table), +}; + +static int mana_ib_probe(struct auxiliary_device *adev, +			 const struct auxiliary_device_id *id) +{ +	struct mana_adev *madev = container_of(adev, struct mana_adev, adev); +	struct gdma_dev *mdev = madev->mdev; +	struct mana_context *mc; +	struct mana_ib_dev *dev; +	int ret; + +	mc = mdev->driver_data; + +	dev = ib_alloc_device(mana_ib_dev, ib_dev); +	if (!dev) +		return -ENOMEM; + +	ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops); + +	dev->ib_dev.phys_port_cnt = mc->num_ports; + +	ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev, +		  mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt); + +	dev->gdma_dev = mdev; +	dev->ib_dev.node_type = RDMA_NODE_IB_CA; + +	/* +	 * num_comp_vectors needs to set to the max MSIX index +	 * when interrupts and event queues are implemented +	 */ +	dev->ib_dev.num_comp_vectors = 1; +	dev->ib_dev.dev.parent = mdev->gdma_context->dev; + +	ret = ib_register_device(&dev->ib_dev, "mana_%d", +				 mdev->gdma_context->dev); +	if (ret) { +		ib_dealloc_device(&dev->ib_dev); +		return ret; +	} + +	dev_set_drvdata(&adev->dev, dev); + +	return 0; +} + +static void mana_ib_remove(struct auxiliary_device *adev) +{ +	struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev); + +	ib_unregister_device(&dev->ib_dev); +	ib_dealloc_device(&dev->ib_dev); +} + +static const struct auxiliary_device_id mana_id_table[] = { +	{ +		.name = "mana.rdma", +	}, +	{}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mana_id_table); + +static struct auxiliary_driver mana_driver = { +	.name = "rdma", +	.probe = mana_ib_probe, +	.remove = mana_ib_remove, +	.id_table = mana_id_table, +}; + +module_auxiliary_driver(mana_driver); diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c new file mode 100644 index 000000000000..8b3bc302d6f3 --- /dev/null +++ b/drivers/infiniband/hw/mana/main.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd, +			 u32 port) +{ +	struct gdma_dev *gd = dev->gdma_dev; +	struct mana_port_context *mpc; +	struct net_device *ndev; +	struct mana_context *mc; + +	mc = gd->driver_data; +	ndev = mc->ports[port]; +	mpc = netdev_priv(ndev); + +	mutex_lock(&pd->vport_mutex); + +	pd->vport_use_count--; +	WARN_ON(pd->vport_use_count < 0); + +	if (!pd->vport_use_count) +		mana_uncfg_vport(mpc); + +	mutex_unlock(&pd->vport_mutex); +} + +int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd, +		      u32 doorbell_id) +{ +	struct gdma_dev *mdev = dev->gdma_dev; +	struct mana_port_context *mpc; +	struct mana_context *mc; +	struct net_device *ndev; +	int err; + +	mc = mdev->driver_data; +	ndev = mc->ports[port]; +	mpc = netdev_priv(ndev); + +	mutex_lock(&pd->vport_mutex); + +	pd->vport_use_count++; +	if (pd->vport_use_count > 1) { +		ibdev_dbg(&dev->ib_dev, +			  "Skip as this PD is already configured vport\n"); +		mutex_unlock(&pd->vport_mutex); +		return 0; +	} + +	err = mana_cfg_vport(mpc, pd->pdn, doorbell_id); +	if (err) { +		pd->vport_use_count--; +		mutex_unlock(&pd->vport_mutex); + +		ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n", err); +		return err; +	} + +	mutex_unlock(&pd->vport_mutex); + +	pd->tx_shortform_allowed = mpc->tx_shortform_allowed; +	pd->tx_vp_offset = mpc->tx_vp_offset; + +	ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x doorbell_id %x\n", +		  mpc->port_handle, pd->pdn, doorbell_id); + +	return 0; +} + +int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ +	struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); +	struct ib_device *ibdev = ibpd->device; +	struct gdma_create_pd_resp resp = {}; +	struct gdma_create_pd_req req = {}; +	enum gdma_pd_flags flags = 0; +	struct mana_ib_dev *dev; +	struct gdma_dev *mdev; +	int err; + +	dev = container_of(ibdev, struct mana_ib_dev, ib_dev); +	mdev = dev->gdma_dev; + +	mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req), +			     sizeof(resp)); + +	req.flags = flags; +	err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req, +				   sizeof(resp), &resp); + +	if (err || resp.hdr.status) { +		ibdev_dbg(&dev->ib_dev, +			  "Failed to get pd_id err %d status %u\n", err, +			  resp.hdr.status); +		if (!err) +			err = -EPROTO; + +		return err; +	} + +	pd->pd_handle = resp.pd_handle; +	pd->pdn = resp.pd_id; +	ibdev_dbg(&dev->ib_dev, "pd_handle 0x%llx pd_id %d\n", +		  pd->pd_handle, pd->pdn); + +	mutex_init(&pd->vport_mutex); +	pd->vport_use_count = 0; +	return 0; +} + +int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ +	struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); +	struct ib_device *ibdev = ibpd->device; +	struct gdma_destory_pd_resp resp = {}; +	struct gdma_destroy_pd_req req = {}; +	struct mana_ib_dev *dev; +	struct gdma_dev *mdev; +	int err; + +	dev = container_of(ibdev, struct mana_ib_dev, ib_dev); +	mdev = dev->gdma_dev; + +	mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req), +			     sizeof(resp)); + +	req.pd_handle = pd->pd_handle; +	err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req, +				   sizeof(resp), &resp); + +	if (err || resp.hdr.status) { +		ibdev_dbg(&dev->ib_dev, +			  "Failed to destroy pd_handle 0x%llx err %d status %u", +			  pd->pd_handle, err, resp.hdr.status); +		if (!err) +			err = -EPROTO; +	} + +	return err; +} + +static int mana_gd_destroy_doorbell_page(struct gdma_context *gc, +					 int doorbell_page) +{ +	struct gdma_destroy_resource_range_req req = {}; +	struct gdma_resp_hdr resp = {}; +	int err; + +	mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_RESOURCE_RANGE, +			     sizeof(req), sizeof(resp)); + +	req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; +	req.num_resources = 1; +	req.allocated_resources = doorbell_page; + +	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); +	if (err || resp.status) { +		dev_err(gc->dev, +			"Failed to destroy doorbell page: ret %d, 0x%x\n", +			err, resp.status); +		return err ?: -EPROTO; +	} + +	return 0; +} + +static int mana_gd_allocate_doorbell_page(struct gdma_context *gc, +					  int *doorbell_page) +{ +	struct gdma_allocate_resource_range_req req = {}; +	struct gdma_allocate_resource_range_resp resp = {}; +	int err; + +	mana_gd_init_req_hdr(&req.hdr, GDMA_ALLOCATE_RESOURCE_RANGE, +			     sizeof(req), sizeof(resp)); + +	req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; +	req.num_resources = 1; +	req.alignment = 1; + +	/* Have GDMA start searching from 0 */ +	req.allocated_resources = 0; + +	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); +	if (err || resp.hdr.status) { +		dev_err(gc->dev, +			"Failed to allocate doorbell page: ret %d, 0x%x\n", +			err, resp.hdr.status); +		return err ?: -EPROTO; +	} + +	*doorbell_page = resp.allocated_resources; + +	return 0; +} + +int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext, +			   struct ib_udata *udata) +{ +	struct mana_ib_ucontext *ucontext = +		container_of(ibcontext, struct mana_ib_ucontext, ibucontext); +	struct ib_device *ibdev = ibcontext->device; +	struct mana_ib_dev *mdev; +	struct gdma_context *gc; +	struct gdma_dev *dev; +	int doorbell_page; +	int ret; + +	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); +	dev = mdev->gdma_dev; +	gc = dev->gdma_context; + +	/* Allocate a doorbell page index */ +	ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page); +	if (ret) { +		ibdev_dbg(ibdev, "Failed to allocate doorbell page %d\n", ret); +		return ret; +	} + +	ibdev_dbg(ibdev, "Doorbell page allocated %d\n", doorbell_page); + +	ucontext->doorbell = doorbell_page; + +	return 0; +} + +void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ +	struct mana_ib_ucontext *mana_ucontext = +		container_of(ibcontext, struct mana_ib_ucontext, ibucontext); +	struct ib_device *ibdev = ibcontext->device; +	struct mana_ib_dev *mdev; +	struct gdma_context *gc; +	int ret; + +	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); +	gc = mdev->gdma_dev->gdma_context; + +	ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell); +	if (ret) +		ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret); +} + +static int +mana_ib_gd_first_dma_region(struct mana_ib_dev *dev, +			    struct gdma_context *gc, +			    struct gdma_create_dma_region_req *create_req, +			    size_t num_pages, mana_handle_t *gdma_region) +{ +	struct gdma_create_dma_region_resp create_resp = {}; +	unsigned int create_req_msg_size; +	int err; + +	create_req_msg_size = +		struct_size(create_req, page_addr_list, num_pages); +	create_req->page_addr_list_len = num_pages; + +	err = mana_gd_send_request(gc, create_req_msg_size, create_req, +				   sizeof(create_resp), &create_resp); +	if (err || create_resp.hdr.status) { +		ibdev_dbg(&dev->ib_dev, +			  "Failed to create DMA region: %d, 0x%x\n", +			  err, create_resp.hdr.status); +		if (!err) +			err = -EPROTO; + +		return err; +	} + +	*gdma_region = create_resp.dma_region_handle; +	ibdev_dbg(&dev->ib_dev, "Created DMA region handle 0x%llx\n", +		  *gdma_region); + +	return 0; +} + +static int +mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc, +			  struct gdma_dma_region_add_pages_req *add_req, +			  unsigned int num_pages, u32 expected_status) +{ +	unsigned int add_req_msg_size = +		struct_size(add_req, page_addr_list, num_pages); +	struct gdma_general_resp add_resp = {}; +	int err; + +	mana_gd_init_req_hdr(&add_req->hdr, GDMA_DMA_REGION_ADD_PAGES, +			     add_req_msg_size, sizeof(add_resp)); +	add_req->page_addr_list_len = num_pages; + +	err = mana_gd_send_request(gc, add_req_msg_size, add_req, +				   sizeof(add_resp), &add_resp); +	if (err || add_resp.hdr.status != expected_status) { +		ibdev_dbg(&dev->ib_dev, +			  "Failed to create DMA region: %d, 0x%x\n", +			  err, add_resp.hdr.status); + +		if (!err) +			err = -EPROTO; + +		return err; +	} + +	return 0; +} + +int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, +				 mana_handle_t *gdma_region) +{ +	struct gdma_dma_region_add_pages_req *add_req = NULL; +	size_t num_pages_processed = 0, num_pages_to_handle; +	struct gdma_create_dma_region_req *create_req; +	unsigned int create_req_msg_size; +	struct hw_channel_context *hwc; +	struct ib_block_iter biter; +	size_t max_pgs_add_cmd = 0; +	size_t max_pgs_create_cmd; +	struct gdma_context *gc; +	size_t num_pages_total; +	struct gdma_dev *mdev; +	unsigned long page_sz; +	unsigned int tail = 0; +	u64 *page_addr_list; +	void *request_buf; +	int err; + +	mdev = dev->gdma_dev; +	gc = mdev->gdma_context; +	hwc = gc->hwc.driver_data; + +	/* Hardware requires dma region to align to chosen page size */ +	page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0); +	if (!page_sz) { +		ibdev_dbg(&dev->ib_dev, "failed to find page size.\n"); +		return -ENOMEM; +	} +	num_pages_total = ib_umem_num_dma_blocks(umem, page_sz); + +	max_pgs_create_cmd = +		(hwc->max_req_msg_size - sizeof(*create_req)) / sizeof(u64); +	num_pages_to_handle = +		min_t(size_t, num_pages_total, max_pgs_create_cmd); +	create_req_msg_size = +		struct_size(create_req, page_addr_list, num_pages_to_handle); + +	request_buf = kzalloc(hwc->max_req_msg_size, GFP_KERNEL); +	if (!request_buf) +		return -ENOMEM; + +	create_req = request_buf; +	mana_gd_init_req_hdr(&create_req->hdr, GDMA_CREATE_DMA_REGION, +			     create_req_msg_size, +			     sizeof(struct gdma_create_dma_region_resp)); + +	create_req->length = umem->length; +	create_req->offset_in_page = umem->address & (page_sz - 1); +	create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT; +	create_req->page_count = num_pages_total; + +	ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n", +		  umem->length, num_pages_total); + +	ibdev_dbg(&dev->ib_dev, "page_sz %lu offset_in_page %u\n", +		  page_sz, create_req->offset_in_page); + +	ibdev_dbg(&dev->ib_dev, "num_pages_to_handle %lu, gdma_page_type %u", +		  num_pages_to_handle, create_req->gdma_page_type); + +	page_addr_list = create_req->page_addr_list; +	rdma_umem_for_each_dma_block(umem, &biter, page_sz) { +		page_addr_list[tail++] = rdma_block_iter_dma_address(&biter); +		if (tail < num_pages_to_handle) +			continue; + +		if (!num_pages_processed) { +			/* First create message */ +			err = mana_ib_gd_first_dma_region(dev, gc, create_req, +							  tail, gdma_region); +			if (err) +				goto out; + +			max_pgs_add_cmd = (hwc->max_req_msg_size - +				sizeof(*add_req)) / sizeof(u64); + +			add_req = request_buf; +			add_req->dma_region_handle = *gdma_region; +			add_req->reserved3 = 0; +			page_addr_list = add_req->page_addr_list; +		} else { +			/* Subsequent create messages */ +			u32 expected_s = 0; + +			if (num_pages_processed + num_pages_to_handle < +			    num_pages_total) +				expected_s = GDMA_STATUS_MORE_ENTRIES; + +			err = mana_ib_gd_add_dma_region(dev, gc, add_req, tail, +							expected_s); +			if (err) +				break; +		} + +		num_pages_processed += tail; +		tail = 0; + +		/* The remaining pages to create */ +		num_pages_to_handle = +			min_t(size_t, +			      num_pages_total - num_pages_processed, +			      max_pgs_add_cmd); +	} + +	if (err) +		mana_ib_gd_destroy_dma_region(dev, *gdma_region); + +out: +	kfree(request_buf); +	return err; +} + +int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64 gdma_region) +{ +	struct gdma_dev *mdev = dev->gdma_dev; +	struct gdma_context *gc; + +	gc = mdev->gdma_context; +	ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region); + +	return mana_gd_destroy_dma_region(gc, gdma_region); +} + +int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) +{ +	struct mana_ib_ucontext *mana_ucontext = +		container_of(ibcontext, struct mana_ib_ucontext, ibucontext); +	struct ib_device *ibdev = ibcontext->device; +	struct mana_ib_dev *mdev; +	struct gdma_context *gc; +	phys_addr_t pfn; +	pgprot_t prot; +	int ret; + +	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); +	gc = mdev->gdma_dev->gdma_context; + +	if (vma->vm_pgoff != 0) { +		ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff); +		return -EINVAL; +	} + +	/* Map to the page indexed by ucontext->doorbell */ +	pfn = (gc->phys_db_page_base + +	       gc->db_page_size * mana_ucontext->doorbell) >> +	      PAGE_SHIFT; +	prot = pgprot_writecombine(vma->vm_page_prot); + +	ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot, +				NULL); +	if (ret) +		ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret); +	else +		ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n", +			  pfn, gc->db_page_size, ret); + +	return ret; +} + +int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, +			       struct ib_port_immutable *immutable) +{ +	/* +	 * This version only support RAW_PACKET +	 * other values need to be filled for other types +	 */ +	immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; + +	return 0; +} + +int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, +			 struct ib_udata *uhw) +{ +	props->max_qp = MANA_MAX_NUM_QUEUES; +	props->max_qp_wr = MAX_SEND_BUFFERS_PER_QUEUE; + +	/* +	 * max_cqe could be potentially much bigger. +	 * As this version of driver only support RAW QP, set it to the same +	 * value as max_qp_wr +	 */ +	props->max_cqe = MAX_SEND_BUFFERS_PER_QUEUE; + +	props->max_mr_size = MANA_IB_MAX_MR_SIZE; +	props->max_mr = MANA_IB_MAX_MR; +	props->max_send_sge = MAX_TX_WQE_SGL_ENTRIES; +	props->max_recv_sge = MAX_RX_WQE_SGL_ENTRIES; + +	return 0; +} + +int mana_ib_query_port(struct ib_device *ibdev, u32 port, +		       struct ib_port_attr *props) +{ +	/* This version doesn't return port properties */ +	return 0; +} + +int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index, +		      union ib_gid *gid) +{ +	/* This version doesn't return GID properties */ +	return 0; +} + +void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ +} diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h new file mode 100644 index 000000000000..502cc8672eef --- /dev/null +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 Microsoft Corporation. All rights reserved. + */ + +#ifndef _MANA_IB_H_ +#define _MANA_IB_H_ + +#include <rdma/ib_verbs.h> +#include <rdma/ib_mad.h> +#include <rdma/ib_umem.h> +#include <rdma/mana-abi.h> +#include <rdma/uverbs_ioctl.h> + +#include <net/mana/mana.h> + +#define PAGE_SZ_BM                                                             \ +	(SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K |        \ +	 SZ_512K | SZ_1M | SZ_2M) + +/* MANA doesn't have any limit for MR size */ +#define MANA_IB_MAX_MR_SIZE	U64_MAX + +/* + * The hardware limit of number of MRs is greater than maximum number of MRs + * that can possibly represent in 24 bits + */ +#define MANA_IB_MAX_MR		0xFFFFFFu + +struct mana_ib_dev { +	struct ib_device ib_dev; +	struct gdma_dev *gdma_dev; +}; + +struct mana_ib_wq { +	struct ib_wq ibwq; +	struct ib_umem *umem; +	int wqe; +	u32 wq_buf_size; +	u64 gdma_region; +	u64 id; +	mana_handle_t rx_object; +}; + +struct mana_ib_pd { +	struct ib_pd ibpd; +	u32 pdn; +	mana_handle_t pd_handle; + +	/* Mutex for sharing access to vport_use_count */ +	struct mutex vport_mutex; +	int vport_use_count; + +	bool tx_shortform_allowed; +	u32 tx_vp_offset; +}; + +struct mana_ib_mr { +	struct ib_mr ibmr; +	struct ib_umem *umem; +	mana_handle_t mr_handle; +}; + +struct mana_ib_cq { +	struct ib_cq ibcq; +	struct ib_umem *umem; +	int cqe; +	u64 gdma_region; +	u64 id; +}; + +struct mana_ib_qp { +	struct ib_qp ibqp; + +	/* Work queue info */ +	struct ib_umem *sq_umem; +	int sqe; +	u64 sq_gdma_region; +	u64 sq_id; +	mana_handle_t tx_object; + +	/* The port on the IB device, starting with 1 */ +	u32 port; +}; + +struct mana_ib_ucontext { +	struct ib_ucontext ibucontext; +	u32 doorbell; +}; + +struct mana_ib_rwq_ind_table { +	struct ib_rwq_ind_table ib_ind_table; +}; + +int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, +				 mana_handle_t *gdma_region); + +int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, +				  mana_handle_t gdma_region); + +struct ib_wq *mana_ib_create_wq(struct ib_pd *pd, +				struct ib_wq_init_attr *init_attr, +				struct ib_udata *udata); + +int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, +		      u32 wq_attr_mask, struct ib_udata *udata); + +int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata); + +int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table, +				 struct ib_rwq_ind_table_init_attr *init_attr, +				 struct ib_udata *udata); + +int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl); + +struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags); + +struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, +				  u64 iova, int access_flags, +				  struct ib_udata *udata); + +int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); + +int mana_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr, +		      struct ib_udata *udata); + +int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, +		      int attr_mask, struct ib_udata *udata); + +int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); + +int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port_id, +		      struct mana_ib_pd *pd, u32 doorbell_id); +void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd, +			 u32 port); + +int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, +		      struct ib_udata *udata); + +int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); + +int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); + +int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext, +			   struct ib_udata *udata); +void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); + +int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma); + +int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, +			       struct ib_port_immutable *immutable); +int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, +			 struct ib_udata *uhw); +int mana_ib_query_port(struct ib_device *ibdev, u32 port, +		       struct ib_port_attr *props); +int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index, +		      union ib_gid *gid); + +void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext); + +#endif diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c new file mode 100644 index 000000000000..351207c60eb6 --- /dev/null +++ b/drivers/infiniband/hw/mana/mr.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +#define VALID_MR_FLAGS                                                         \ +	(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ) + +static enum gdma_mr_access_flags +mana_ib_verbs_to_gdma_access_flags(int access_flags) +{ +	enum gdma_mr_access_flags flags = GDMA_ACCESS_FLAG_LOCAL_READ; + +	if (access_flags & IB_ACCESS_LOCAL_WRITE) +		flags |= GDMA_ACCESS_FLAG_LOCAL_WRITE; + +	if (access_flags & IB_ACCESS_REMOTE_WRITE) +		flags |= GDMA_ACCESS_FLAG_REMOTE_WRITE; + +	if (access_flags & IB_ACCESS_REMOTE_READ) +		flags |= GDMA_ACCESS_FLAG_REMOTE_READ; + +	return flags; +} + +static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr, +				struct gdma_create_mr_params *mr_params) +{ +	struct gdma_create_mr_response resp = {}; +	struct gdma_create_mr_request req = {}; +	struct gdma_dev *mdev = dev->gdma_dev; +	struct gdma_context *gc; +	int err; + +	gc = mdev->gdma_context; + +	mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req), +			     sizeof(resp)); +	req.pd_handle = mr_params->pd_handle; +	req.mr_type = mr_params->mr_type; + +	switch (mr_params->mr_type) { +	case GDMA_MR_TYPE_GVA: +		req.gva.dma_region_handle = mr_params->gva.dma_region_handle; +		req.gva.virtual_address = mr_params->gva.virtual_address; +		req.gva.access_flags = mr_params->gva.access_flags; +		break; + +	default: +		ibdev_dbg(&dev->ib_dev, +			  "invalid param (GDMA_MR_TYPE) passed, type %d\n", +			  req.mr_type); +		return -EINVAL; +	} + +	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + +	if (err || resp.hdr.status) { +		ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err, +			  resp.hdr.status); +		if (!err) +			err = -EPROTO; + +		return err; +	} + +	mr->ibmr.lkey = resp.lkey; +	mr->ibmr.rkey = resp.rkey; +	mr->mr_handle = resp.mr_handle; + +	return 0; +} + +static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle) +{ +	struct gdma_destroy_mr_response resp = {}; +	struct gdma_destroy_mr_request req = {}; +	struct gdma_dev *mdev = dev->gdma_dev; +	struct gdma_context *gc; +	int err; + +	gc = mdev->gdma_context; + +	mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req), +			     sizeof(resp)); + +	req.mr_handle = mr_handle; + +	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); +	if (err || resp.hdr.status) { +		dev_err(gc->dev, "Failed to destroy MR: %d, 0x%x\n", err, +			resp.hdr.status); +		if (!err) +			err = -EPROTO; +		return err; +	} + +	return 0; +} + +struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length, +				  u64 iova, int access_flags, +				  struct ib_udata *udata) +{ +	struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); +	struct gdma_create_mr_params mr_params = {}; +	struct ib_device *ibdev = ibpd->device; +	struct mana_ib_dev *dev; +	struct mana_ib_mr *mr; +	u64 dma_region_handle; +	int err; + +	dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + +	ibdev_dbg(ibdev, +		  "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x", +		  start, iova, length, access_flags); + +	if (access_flags & ~VALID_MR_FLAGS) +		return ERR_PTR(-EINVAL); + +	mr = kzalloc(sizeof(*mr), GFP_KERNEL); +	if (!mr) +		return ERR_PTR(-ENOMEM); + +	mr->umem = ib_umem_get(ibdev, start, length, access_flags); +	if (IS_ERR(mr->umem)) { +		err = PTR_ERR(mr->umem); +		ibdev_dbg(ibdev, +			  "Failed to get umem for register user-mr, %d\n", err); +		goto err_free; +	} + +	err = mana_ib_gd_create_dma_region(dev, mr->umem, &dma_region_handle); +	if (err) { +		ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n", +			  err); +		goto err_umem; +	} + +	ibdev_dbg(ibdev, +		  "mana_ib_gd_create_dma_region ret %d gdma_region %llx\n", err, +		  dma_region_handle); + +	mr_params.pd_handle = pd->pd_handle; +	mr_params.mr_type = GDMA_MR_TYPE_GVA; +	mr_params.gva.dma_region_handle = dma_region_handle; +	mr_params.gva.virtual_address = iova; +	mr_params.gva.access_flags = +		mana_ib_verbs_to_gdma_access_flags(access_flags); + +	err = mana_ib_gd_create_mr(dev, mr, &mr_params); +	if (err) +		goto err_dma_region; + +	/* +	 * There is no need to keep track of dma_region_handle after MR is +	 * successfully created. The dma_region_handle is tracked in the PF +	 * as part of the lifecycle of this MR. +	 */ + +	return &mr->ibmr; + +err_dma_region: +	mana_gd_destroy_dma_region(dev->gdma_dev->gdma_context, +				   dma_region_handle); + +err_umem: +	ib_umem_release(mr->umem); + +err_free: +	kfree(mr); +	return ERR_PTR(err); +} + +int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) +{ +	struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr); +	struct ib_device *ibdev = ibmr->device; +	struct mana_ib_dev *dev; +	int err; + +	dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + +	err = mana_ib_gd_destroy_mr(dev, mr->mr_handle); +	if (err) +		return err; + +	if (mr->umem) +		ib_umem_release(mr->umem); + +	kfree(mr); + +	return 0; +} diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c new file mode 100644 index 000000000000..ea15ec77e321 --- /dev/null +++ b/drivers/infiniband/hw/mana/qp.c @@ -0,0 +1,506 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, +				      struct net_device *ndev, +				      mana_handle_t default_rxobj, +				      mana_handle_t ind_table[], +				      u32 log_ind_tbl_size, u32 rx_hash_key_len, +				      u8 *rx_hash_key) +{ +	struct mana_port_context *mpc = netdev_priv(ndev); +	struct mana_cfg_rx_steer_req *req = NULL; +	struct mana_cfg_rx_steer_resp resp = {}; +	mana_handle_t *req_indir_tab; +	struct gdma_context *gc; +	struct gdma_dev *mdev; +	u32 req_buf_size; +	int i, err; + +	mdev = dev->gdma_dev; +	gc = mdev->gdma_context; + +	req_buf_size = +		sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE; +	req = kzalloc(req_buf_size, GFP_KERNEL); +	if (!req) +		return -ENOMEM; + +	mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, +			     sizeof(resp)); + +	req->vport = mpc->port_handle; +	req->rx_enable = 1; +	req->update_default_rxobj = 1; +	req->default_rxobj = default_rxobj; +	req->hdr.dev_id = mdev->dev_id; + +	/* If there are more than 1 entries in indirection table, enable RSS */ +	if (log_ind_tbl_size) +		req->rss_enable = true; + +	req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE; +	req->indir_tab_offset = sizeof(*req); +	req->update_indir_tab = true; + +	req_indir_tab = (mana_handle_t *)(req + 1); +	/* The ind table passed to the hardware must have +	 * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb +	 * ind_table to MANA_INDIRECT_TABLE_SIZE if required +	 */ +	ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size); +	for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { +		req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)]; +		ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i, +			  req_indir_tab[i]); +	} + +	req->update_hashkey = true; +	if (rx_hash_key_len) +		memcpy(req->hashkey, rx_hash_key, rx_hash_key_len); +	else +		netdev_rss_key_fill(req->hashkey, MANA_HASH_KEY_SIZE); + +	ibdev_dbg(&dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n", +		  req->vport, default_rxobj); + +	err = mana_gd_send_request(gc, req_buf_size, req, sizeof(resp), &resp); +	if (err) { +		netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); +		goto out; +	} + +	if (resp.hdr.status) { +		netdev_err(ndev, "vPort RX configuration failed: 0x%x\n", +			   resp.hdr.status); +		err = -EPROTO; +		goto out; +	} + +	netdev_info(ndev, "Configured steering vPort %llu log_entries %u\n", +		    mpc->port_handle, log_ind_tbl_size); + +out: +	kfree(req); +	return err; +} + +static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, +				 struct ib_qp_init_attr *attr, +				 struct ib_udata *udata) +{ +	struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); +	struct mana_ib_dev *mdev = +		container_of(pd->device, struct mana_ib_dev, ib_dev); +	struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl; +	struct mana_ib_create_qp_rss_resp resp = {}; +	struct mana_ib_create_qp_rss ucmd = {}; +	struct gdma_dev *gd = mdev->gdma_dev; +	mana_handle_t *mana_ind_table; +	struct mana_port_context *mpc; +	struct mana_context *mc; +	struct net_device *ndev; +	struct mana_ib_cq *cq; +	struct mana_ib_wq *wq; +	unsigned int ind_tbl_size; +	struct ib_cq *ibcq; +	struct ib_wq *ibwq; +	int i = 0; +	u32 port; +	int ret; + +	mc = gd->driver_data; + +	if (!udata || udata->inlen < sizeof(ucmd)) +		return -EINVAL; + +	ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); +	if (ret) { +		ibdev_dbg(&mdev->ib_dev, +			  "Failed copy from udata for create rss-qp, err %d\n", +			  ret); +		return ret; +	} + +	if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) { +		ibdev_dbg(&mdev->ib_dev, +			  "Requested max_recv_wr %d exceeding limit\n", +			  attr->cap.max_recv_wr); +		return -EINVAL; +	} + +	if (attr->cap.max_recv_sge > MAX_RX_WQE_SGL_ENTRIES) { +		ibdev_dbg(&mdev->ib_dev, +			  "Requested max_recv_sge %d exceeding limit\n", +			  attr->cap.max_recv_sge); +		return -EINVAL; +	} + +	ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size; +	if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) { +		ibdev_dbg(&mdev->ib_dev, +			  "Indirect table size %d exceeding limit\n", +			  ind_tbl_size); +		return -EINVAL; +	} + +	if (ucmd.rx_hash_function != MANA_IB_RX_HASH_FUNC_TOEPLITZ) { +		ibdev_dbg(&mdev->ib_dev, +			  "RX Hash function is not supported, %d\n", +			  ucmd.rx_hash_function); +		return -EINVAL; +	} + +	/* IB ports start with 1, MANA start with 0 */ +	port = ucmd.port; +	if (port < 1 || port > mc->num_ports) { +		ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n", +			  port); +		return -EINVAL; +	} +	ndev = mc->ports[port - 1]; +	mpc = netdev_priv(ndev); + +	ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n", +		  ucmd.rx_hash_function, port); + +	mana_ind_table = kcalloc(ind_tbl_size, sizeof(mana_handle_t), +				 GFP_KERNEL); +	if (!mana_ind_table) { +		ret = -ENOMEM; +		goto fail; +	} + +	qp->port = port; + +	for (i = 0; i < ind_tbl_size; i++) { +		struct mana_obj_spec wq_spec = {}; +		struct mana_obj_spec cq_spec = {}; + +		ibwq = ind_tbl->ind_tbl[i]; +		wq = container_of(ibwq, struct mana_ib_wq, ibwq); + +		ibcq = ibwq->cq; +		cq = container_of(ibcq, struct mana_ib_cq, ibcq); + +		wq_spec.gdma_region = wq->gdma_region; +		wq_spec.queue_size = wq->wq_buf_size; + +		cq_spec.gdma_region = cq->gdma_region; +		cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE; +		cq_spec.modr_ctx_id = 0; +		cq_spec.attached_eq = GDMA_CQ_NO_EQ; + +		ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ, +					 &wq_spec, &cq_spec, &wq->rx_object); +		if (ret) +			goto fail; + +		/* The GDMA regions are now owned by the WQ object */ +		wq->gdma_region = GDMA_INVALID_DMA_REGION; +		cq->gdma_region = GDMA_INVALID_DMA_REGION; + +		wq->id = wq_spec.queue_index; +		cq->id = cq_spec.queue_index; + +		ibdev_dbg(&mdev->ib_dev, +			  "ret %d rx_object 0x%llx wq id %llu cq id %llu\n", +			  ret, wq->rx_object, wq->id, cq->id); + +		resp.entries[i].cqid = cq->id; +		resp.entries[i].wqid = wq->id; + +		mana_ind_table[i] = wq->rx_object; +	} +	resp.num_entries = i; + +	ret = mana_ib_cfg_vport_steering(mdev, ndev, wq->rx_object, +					 mana_ind_table, +					 ind_tbl->log_ind_tbl_size, +					 ucmd.rx_hash_key_len, +					 ucmd.rx_hash_key); +	if (ret) +		goto fail; + +	ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); +	if (ret) { +		ibdev_dbg(&mdev->ib_dev, +			  "Failed to copy to udata create rss-qp, %d\n", +			  ret); +		goto fail; +	} + +	kfree(mana_ind_table); + +	return 0; + +fail: +	while (i-- > 0) { +		ibwq = ind_tbl->ind_tbl[i]; +		wq = container_of(ibwq, struct mana_ib_wq, ibwq); +		mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); +	} + +	kfree(mana_ind_table); + +	return ret; +} + +static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, +				 struct ib_qp_init_attr *attr, +				 struct ib_udata *udata) +{ +	struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); +	struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); +	struct mana_ib_dev *mdev = +		container_of(ibpd->device, struct mana_ib_dev, ib_dev); +	struct mana_ib_cq *send_cq = +		container_of(attr->send_cq, struct mana_ib_cq, ibcq); +	struct mana_ib_ucontext *mana_ucontext = +		rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, +					  ibucontext); +	struct mana_ib_create_qp_resp resp = {}; +	struct gdma_dev *gd = mdev->gdma_dev; +	struct mana_ib_create_qp ucmd = {}; +	struct mana_obj_spec wq_spec = {}; +	struct mana_obj_spec cq_spec = {}; +	struct mana_port_context *mpc; +	struct mana_context *mc; +	struct net_device *ndev; +	struct ib_umem *umem; +	int err; +	u32 port; + +	mc = gd->driver_data; + +	if (!mana_ucontext || udata->inlen < sizeof(ucmd)) +		return -EINVAL; + +	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); +	if (err) { +		ibdev_dbg(&mdev->ib_dev, +			  "Failed to copy from udata create qp-raw, %d\n", err); +		return err; +	} + +	/* IB ports start with 1, MANA Ethernet ports start with 0 */ +	port = ucmd.port; +	if (ucmd.port > mc->num_ports) +		return -EINVAL; + +	if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) { +		ibdev_dbg(&mdev->ib_dev, +			  "Requested max_send_wr %d exceeding limit\n", +			  attr->cap.max_send_wr); +		return -EINVAL; +	} + +	if (attr->cap.max_send_sge > MAX_TX_WQE_SGL_ENTRIES) { +		ibdev_dbg(&mdev->ib_dev, +			  "Requested max_send_sge %d exceeding limit\n", +			  attr->cap.max_send_sge); +		return -EINVAL; +	} + +	ndev = mc->ports[port - 1]; +	mpc = netdev_priv(ndev); +	ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc); + +	err = mana_ib_cfg_vport(mdev, port - 1, pd, mana_ucontext->doorbell); +	if (err) +		return -ENODEV; + +	qp->port = port; + +	ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n", +		  ucmd.sq_buf_addr, ucmd.port); + +	umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr, ucmd.sq_buf_size, +			   IB_ACCESS_LOCAL_WRITE); +	if (IS_ERR(umem)) { +		err = PTR_ERR(umem); +		ibdev_dbg(&mdev->ib_dev, +			  "Failed to get umem for create qp-raw, err %d\n", +			  err); +		goto err_free_vport; +	} +	qp->sq_umem = umem; + +	err = mana_ib_gd_create_dma_region(mdev, qp->sq_umem, +					   &qp->sq_gdma_region); +	if (err) { +		ibdev_dbg(&mdev->ib_dev, +			  "Failed to create dma region for create qp-raw, %d\n", +			  err); +		goto err_release_umem; +	} + +	ibdev_dbg(&mdev->ib_dev, +		  "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n", +		  err, qp->sq_gdma_region); + +	/* Create a WQ on the same port handle used by the Ethernet */ +	wq_spec.gdma_region = qp->sq_gdma_region; +	wq_spec.queue_size = ucmd.sq_buf_size; + +	cq_spec.gdma_region = send_cq->gdma_region; +	cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE; +	cq_spec.modr_ctx_id = 0; +	cq_spec.attached_eq = GDMA_CQ_NO_EQ; + +	err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec, +				 &cq_spec, &qp->tx_object); +	if (err) { +		ibdev_dbg(&mdev->ib_dev, +			  "Failed to create wq for create raw-qp, err %d\n", +			  err); +		goto err_destroy_dma_region; +	} + +	/* The GDMA regions are now owned by the WQ object */ +	qp->sq_gdma_region = GDMA_INVALID_DMA_REGION; +	send_cq->gdma_region = GDMA_INVALID_DMA_REGION; + +	qp->sq_id = wq_spec.queue_index; +	send_cq->id = cq_spec.queue_index; + +	ibdev_dbg(&mdev->ib_dev, +		  "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err, +		  qp->tx_object, qp->sq_id, send_cq->id); + +	resp.sqid = qp->sq_id; +	resp.cqid = send_cq->id; +	resp.tx_vp_offset = pd->tx_vp_offset; + +	err = ib_copy_to_udata(udata, &resp, sizeof(resp)); +	if (err) { +		ibdev_dbg(&mdev->ib_dev, +			  "Failed copy udata for create qp-raw, %d\n", +			  err); +		goto err_destroy_wq_obj; +	} + +	return 0; + +err_destroy_wq_obj: +	mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object); + +err_destroy_dma_region: +	mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region); + +err_release_umem: +	ib_umem_release(umem); + +err_free_vport: +	mana_ib_uncfg_vport(mdev, pd, port - 1); + +	return err; +} + +int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, +		      struct ib_udata *udata) +{ +	switch (attr->qp_type) { +	case IB_QPT_RAW_PACKET: +		/* When rwq_ind_tbl is used, it's for creating WQs for RSS */ +		if (attr->rwq_ind_tbl) +			return mana_ib_create_qp_rss(ibqp, ibqp->pd, attr, +						     udata); + +		return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata); +	default: +		/* Creating QP other than IB_QPT_RAW_PACKET is not supported */ +		ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n", +			  attr->qp_type); +	} + +	return -EINVAL; +} + +int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, +		      int attr_mask, struct ib_udata *udata) +{ +	/* modify_qp is not supported by this version of the driver */ +	return -EOPNOTSUPP; +} + +static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp, +				  struct ib_rwq_ind_table *ind_tbl, +				  struct ib_udata *udata) +{ +	struct mana_ib_dev *mdev = +		container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); +	struct gdma_dev *gd = mdev->gdma_dev; +	struct mana_port_context *mpc; +	struct mana_context *mc; +	struct net_device *ndev; +	struct mana_ib_wq *wq; +	struct ib_wq *ibwq; +	int i; + +	mc = gd->driver_data; +	ndev = mc->ports[qp->port - 1]; +	mpc = netdev_priv(ndev); + +	for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { +		ibwq = ind_tbl->ind_tbl[i]; +		wq = container_of(ibwq, struct mana_ib_wq, ibwq); +		ibdev_dbg(&mdev->ib_dev, "destroying wq->rx_object %llu\n", +			  wq->rx_object); +		mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); +	} + +	return 0; +} + +static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata) +{ +	struct mana_ib_dev *mdev = +		container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); +	struct gdma_dev *gd = mdev->gdma_dev; +	struct ib_pd *ibpd = qp->ibqp.pd; +	struct mana_port_context *mpc; +	struct mana_context *mc; +	struct net_device *ndev; +	struct mana_ib_pd *pd; + +	mc = gd->driver_data; +	ndev = mc->ports[qp->port - 1]; +	mpc = netdev_priv(ndev); +	pd = container_of(ibpd, struct mana_ib_pd, ibpd); + +	mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object); + +	if (qp->sq_umem) { +		mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region); +		ib_umem_release(qp->sq_umem); +	} + +	mana_ib_uncfg_vport(mdev, pd, qp->port - 1); + +	return 0; +} + +int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) +{ +	struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + +	switch (ibqp->qp_type) { +	case IB_QPT_RAW_PACKET: +		if (ibqp->rwq_ind_tbl) +			return mana_ib_destroy_qp_rss(qp, ibqp->rwq_ind_tbl, +						      udata); + +		return mana_ib_destroy_qp_raw(qp, udata); + +	default: +		ibdev_dbg(ibqp->device, "Unexpected QP type %u\n", +			  ibqp->qp_type); +	} + +	return -ENOENT; +} diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c new file mode 100644 index 000000000000..372d361510e0 --- /dev/null +++ b/drivers/infiniband/hw/mana/wq.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +struct ib_wq *mana_ib_create_wq(struct ib_pd *pd, +				struct ib_wq_init_attr *init_attr, +				struct ib_udata *udata) +{ +	struct mana_ib_dev *mdev = +		container_of(pd->device, struct mana_ib_dev, ib_dev); +	struct mana_ib_create_wq ucmd = {}; +	struct mana_ib_wq *wq; +	struct ib_umem *umem; +	int err; + +	if (udata->inlen < sizeof(ucmd)) +		return ERR_PTR(-EINVAL); + +	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); +	if (err) { +		ibdev_dbg(&mdev->ib_dev, +			  "Failed to copy from udata for create wq, %d\n", err); +		return ERR_PTR(err); +	} + +	wq = kzalloc(sizeof(*wq), GFP_KERNEL); +	if (!wq) +		return ERR_PTR(-ENOMEM); + +	ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr); + +	umem = ib_umem_get(pd->device, ucmd.wq_buf_addr, ucmd.wq_buf_size, +			   IB_ACCESS_LOCAL_WRITE); +	if (IS_ERR(umem)) { +		err = PTR_ERR(umem); +		ibdev_dbg(&mdev->ib_dev, +			  "Failed to get umem for create wq, err %d\n", err); +		goto err_free_wq; +	} + +	wq->umem = umem; +	wq->wqe = init_attr->max_wr; +	wq->wq_buf_size = ucmd.wq_buf_size; +	wq->rx_object = INVALID_MANA_HANDLE; + +	err = mana_ib_gd_create_dma_region(mdev, wq->umem, &wq->gdma_region); +	if (err) { +		ibdev_dbg(&mdev->ib_dev, +			  "Failed to create dma region for create wq, %d\n", +			  err); +		goto err_release_umem; +	} + +	ibdev_dbg(&mdev->ib_dev, +		  "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n", +		  err, wq->gdma_region); + +	/* WQ ID is returned at wq_create time, doesn't know the value yet */ + +	return &wq->ibwq; + +err_release_umem: +	ib_umem_release(umem); + +err_free_wq: +	kfree(wq); + +	return ERR_PTR(err); +} + +int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, +		      u32 wq_attr_mask, struct ib_udata *udata) +{ +	/* modify_wq is not supported by this version of the driver */ +	return -EOPNOTSUPP; +} + +int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) +{ +	struct mana_ib_wq *wq = container_of(ibwq, struct mana_ib_wq, ibwq); +	struct ib_device *ib_dev = ibwq->device; +	struct mana_ib_dev *mdev; + +	mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev); + +	mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region); +	ib_umem_release(wq->umem); + +	kfree(wq); + +	return 0; +} + +int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table, +				 struct ib_rwq_ind_table_init_attr *init_attr, +				 struct ib_udata *udata) +{ +	/* +	 * There is no additional data in ind_table to be maintained by this +	 * driver, do nothing +	 */ +	return 0; +} + +int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) +{ +	/* +	 * There is no additional data in ind_table to be maintained by this +	 * driver, do nothing +	 */ +	return 0; +}  | 
