summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/i915/gvt/kvmgt.c3
-rw-r--r--drivers/iommu/iommufd/Kconfig20
-rw-r--r--drivers/iommu/iommufd/main.c36
-rw-r--r--drivers/s390/cio/vfio_ccw_ops.c3
-rw-r--r--drivers/s390/crypto/vfio_ap_ops.c3
-rw-r--r--drivers/vfio/Kconfig36
-rw-r--r--drivers/vfio/Makefile5
-rw-r--r--drivers/vfio/container.c141
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc.c3
-rw-r--r--drivers/vfio/iommufd.c158
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c6
-rw-r--r--drivers/vfio/pci/mlx5/main.c3
-rw-r--r--drivers/vfio/pci/vfio_pci.c3
-rw-r--r--drivers/vfio/platform/vfio_amba.c3
-rw-r--r--drivers/vfio/platform/vfio_platform.c3
-rw-r--r--drivers/vfio/vfio.h98
-rw-r--r--drivers/vfio/vfio_main.c347
-rw-r--r--include/linux/vfio.h39
18 files changed, 714 insertions, 196 deletions
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index aaf0d9e8da95..f5164099c264 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1484,6 +1484,9 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
.mmap = intel_vgpu_mmap,
.ioctl = intel_vgpu_ioctl,
.dma_unmap = intel_vgpu_dma_unmap,
+ .bind_iommufd = vfio_iommufd_emulated_bind,
+ .unbind_iommufd = vfio_iommufd_emulated_unbind,
+ .attach_ioas = vfio_iommufd_emulated_attach_ioas,
};
static int intel_vgpu_probe(struct mdev_device *mdev)
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
index 871244f2443f..8306616b6d81 100644
--- a/drivers/iommu/iommufd/Kconfig
+++ b/drivers/iommu/iommufd/Kconfig
@@ -12,6 +12,26 @@ config IOMMUFD
If you don't know what to do here, say N.
if IOMMUFD
+config IOMMUFD_VFIO_CONTAINER
+ bool "IOMMUFD provides the VFIO container /dev/vfio/vfio"
+ depends on VFIO && !VFIO_CONTAINER
+ default VFIO && !VFIO_CONTAINER
+ help
+ IOMMUFD will provide /dev/vfio/vfio instead of VFIO. This relies on
+ IOMMUFD providing compatibility emulation to give the same ioctls.
+ It provides an option to build a kernel with legacy VFIO components
+ removed.
+
+ IOMMUFD VFIO container emulation is known to lack certain features
+ of the native VFIO container, such as no-IOMMU support, peer-to-peer
+ DMA mapping, PPC IOMMU support, as well as other potentially
+ undiscovered gaps. This option is currently intended for the
+ purpose of testing IOMMUFD with unmodified userspace supporting VFIO
+ and making use of the Type1 VFIO IOMMU backend. General purpose
+ enabling of this option is currently discouraged.
+
+ Unless testing IOMMUFD, say N here.
+
config IOMMUFD_TEST
bool "IOMMU Userspace API Test support"
depends on DEBUG_KERNEL
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index bcb463e58100..083e6fcbe10a 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -18,6 +18,7 @@
#include <uapi/linux/iommufd.h>
#include <linux/iommufd.h>
+#include "io_pagetable.h"
#include "iommufd_private.h"
#include "iommufd_test.h"
@@ -25,6 +26,7 @@ struct iommufd_object_ops {
void (*destroy)(struct iommufd_object *obj);
};
static const struct iommufd_object_ops iommufd_object_ops[];
+static struct miscdevice vfio_misc_dev;
struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
size_t size,
@@ -170,6 +172,16 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
if (!ictx)
return -ENOMEM;
+ /*
+ * For compatibility with VFIO when /dev/vfio/vfio is opened we default
+ * to the same rlimit accounting as vfio uses.
+ */
+ if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) &&
+ filp->private_data == &vfio_misc_dev) {
+ ictx->account_mode = IOPT_PAGES_ACCOUNT_MM;
+ pr_info_once("IOMMUFD is providing /dev/vfio/vfio, not VFIO.\n");
+ }
+
xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
ictx->file = filp;
filp->private_data = ictx;
@@ -400,6 +412,15 @@ static struct miscdevice iommu_misc_dev = {
.mode = 0660,
};
+
+static struct miscdevice vfio_misc_dev = {
+ .minor = VFIO_MINOR,
+ .name = "vfio",
+ .fops = &iommufd_fops,
+ .nodename = "vfio/vfio",
+ .mode = 0666,
+};
+
static int __init iommufd_init(void)
{
int ret;
@@ -407,18 +428,33 @@ static int __init iommufd_init(void)
ret = misc_register(&iommu_misc_dev);
if (ret)
return ret;
+
+ if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) {
+ ret = misc_register(&vfio_misc_dev);
+ if (ret)
+ goto err_misc;
+ }
iommufd_test_init();
return 0;
+err_misc:
+ misc_deregister(&iommu_misc_dev);
+ return ret;
}
static void __exit iommufd_exit(void)
{
iommufd_test_exit();
+ if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER))
+ misc_deregister(&vfio_misc_dev);
misc_deregister(&iommu_misc_dev);
}
module_init(iommufd_init);
module_exit(iommufd_exit);
+#if IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)
+MODULE_ALIAS_MISCDEV(VFIO_MINOR);
+MODULE_ALIAS("devname:vfio/vfio");
+#endif
MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
MODULE_LICENSE("GPL");
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 6ae4d012d800..560453d99c24 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -588,6 +588,9 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = {
.ioctl = vfio_ccw_mdev_ioctl,
.request = vfio_ccw_mdev_request,
.dma_unmap = vfio_ccw_dma_unmap,
+ .bind_iommufd = vfio_iommufd_emulated_bind,
+ .unbind_iommufd = vfio_iommufd_emulated_unbind,
+ .attach_ioas = vfio_iommufd_emulated_attach_ioas,
};
struct mdev_driver vfio_ccw_mdev_driver = {
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 8bf353d46820..68eeb25fb661 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1805,6 +1805,9 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
.close_device = vfio_ap_mdev_close_device,
.ioctl = vfio_ap_mdev_ioctl,
.dma_unmap = vfio_ap_mdev_dma_unmap,
+ .bind_iommufd = vfio_iommufd_emulated_bind,
+ .unbind_iommufd = vfio_iommufd_emulated_unbind,
+ .attach_ioas = vfio_iommufd_emulated_attach_ioas,
};
static struct mdev_driver vfio_ap_matrix_driver = {
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 86c381ceb9a1..286c1663bd75 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -2,8 +2,9 @@
menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
select IOMMU_API
- select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
+ depends on IOMMUFD || !IOMMUFD
select INTERVAL_TREE
+ select VFIO_CONTAINER if IOMMUFD=n
help
VFIO provides a framework for secure userspace device drivers.
See Documentation/driver-api/vfio.rst for more details.
@@ -11,6 +12,18 @@ menuconfig VFIO
If you don't know what to do here, say N.
if VFIO
+config VFIO_CONTAINER
+ bool "Support for the VFIO container /dev/vfio/vfio"
+ select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
+ default y
+ help
+ The VFIO container is the classic interface to VFIO for establishing
+ IOMMU mappings. If N is selected here then IOMMUFD must be used to
+ manage the mappings.
+
+ Unless testing IOMMUFD say Y here.
+
+if VFIO_CONTAINER
config VFIO_IOMMU_TYPE1
tristate
default n
@@ -20,16 +33,6 @@ config VFIO_IOMMU_SPAPR_TCE
depends on SPAPR_TCE_IOMMU
default VFIO
-config VFIO_SPAPR_EEH
- tristate
- depends on EEH && VFIO_IOMMU_SPAPR_TCE
- default VFIO
-
-config VFIO_VIRQFD
- tristate
- select EVENTFD
- default n
-
config VFIO_NOIOMMU
bool "VFIO No-IOMMU support"
help
@@ -43,6 +46,17 @@ config VFIO_NOIOMMU
this mode since there is no IOMMU to provide DMA translation.
If you don't know what to do here, say N.
+endif
+
+config VFIO_SPAPR_EEH
+ tristate
+ depends on EEH && VFIO_IOMMU_SPAPR_TCE
+ default VFIO
+
+config VFIO_VIRQFD
+ tristate
+ select EVENTFD
+ default n
source "drivers/vfio/pci/Kconfig"
source "drivers/vfio/platform/Kconfig"
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index b693a1169286..b953517dc70f 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -4,8 +4,9 @@ vfio_virqfd-y := virqfd.o
obj-$(CONFIG_VFIO) += vfio.o
vfio-y += vfio_main.o \
- iova_bitmap.o \
- container.o
+ iova_bitmap.o
+vfio-$(CONFIG_IOMMUFD) += iommufd.o
+vfio-$(CONFIG_VFIO_CONTAINER) += container.o
obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index d74164abbf40..6b362d97d682 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -188,8 +188,9 @@ void vfio_device_container_unregister(struct vfio_device *device)
device->group->container->iommu_data, device);
}
-long vfio_container_ioctl_check_extension(struct vfio_container *container,
- unsigned long arg)
+static long
+vfio_container_ioctl_check_extension(struct vfio_container *container,
+ unsigned long arg)
{
struct vfio_iommu_driver *driver;
long ret = 0;
@@ -511,14 +512,15 @@ void vfio_group_detach_container(struct vfio_group *group)
vfio_container_put(container);
}
-int vfio_device_assign_container(struct vfio_device *device)
+int vfio_group_use_container(struct vfio_group *group)
{
- struct vfio_group *group = device->group;
-
lockdep_assert_held(&group->group_lock);
- if (!group->container || !group->container->iommu_driver ||
- WARN_ON(!group->container_users))
+ /*
+ * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
+ * VFIO_SET_IOMMU hasn't been done yet.
+ */
+ if (!group->container->iommu_driver)
return -EINVAL;
if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
@@ -529,122 +531,50 @@ int vfio_device_assign_container(struct vfio_device *device)
return 0;
}
-void vfio_device_unassign_container(struct vfio_device *device)
+void vfio_group_unuse_container(struct vfio_group *group)
{
- mutex_lock(&device->group->group_lock);
- WARN_ON(device->group->container_users <= 1);
- device->group->container_users--;
- fput(device->group->opened_file);
- mutex_unlock(&device->group->group_lock);
+ lockdep_assert_held(&group->group_lock);
+
+ WARN_ON(group->container_users <= 1);
+ group->container_users--;
+ fput(group->opened_file);
}
-/*
- * Pin contiguous user pages and return their associated host pages for local
- * domain only.
- * @device [in] : device
- * @iova [in] : starting IOVA of user pages to be pinned.
- * @npage [in] : count of pages to be pinned. This count should not
- * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
- * @prot [in] : protection flags
- * @pages[out] : array of host pages
- * Return error or number of pages pinned.
- *
- * A driver may only call this function if the vfio_device was created
- * by vfio_register_emulated_iommu_dev().
- */
-int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
- int npage, int prot, struct page **pages)
+int vfio_container_pin_pages(struct vfio_container *container,
+ struct iommu_group *iommu_group, dma_addr_t iova,
+ int npage, int prot, struct page **pages)
{
- struct vfio_container *container;
- struct vfio_group *group = device->group;
- struct vfio_iommu_driver *driver;
- int ret;
-
- if (!pages || !npage || !vfio_assert_device_open(device))
- return -EINVAL;
+ struct vfio_iommu_driver *driver = container->iommu_driver;
if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
return -E2BIG;
- /* group->container cannot change while a vfio device is open */
- container = group->container;
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->pin_pages))
- ret = driver->ops->pin_pages(container->iommu_data,
- group->iommu_group, iova,
- npage, prot, pages);
- else
- ret = -ENOTTY;
-
- return ret;
+ if (unlikely(!driver || !driver->ops->pin_pages))
+ return -ENOTTY;
+ return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
+ npage, prot, pages);
}
-EXPORT_SYMBOL(vfio_pin_pages);
-/*
- * Unpin contiguous host pages for local domain only.
- * @device [in] : device
- * @iova [in] : starting address of user pages to be unpinned.
- * @npage [in] : count of pages to be unpinned. This count should not
- * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
- */
-void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
+void vfio_container_unpin_pages(struct vfio_container *container,
+ dma_addr_t iova, int npage)
{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
-
if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
return;
- if (WARN_ON(!vfio_assert_device_open(device)))
- return;
-
- /* group->container cannot change while a vfio device is open */
- container = device->group->container;
- driver = container->iommu_driver;
-
- driver->ops->unpin_pages(container->iommu_data, iova, npage);
+ container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
+ npage);
}
-EXPORT_SYMBOL(vfio_unpin_pages);
-/*
- * This interface allows the CPUs to perform some sort of virtual DMA on
- * behalf of the device.
- *
- * CPUs read/write from/into a range of IOVAs pointing to user space memory
- * into/from a kernel buffer.
- *
- * As the read/write of user space memory is conducted via the CPUs and is
- * not a real device DMA, it is not necessary to pin the user space memory.
- *
- * @device [in] : VFIO device
- * @iova [in] : base IOVA of a user space buffer
- * @data [in] : pointer to kernel buffer
- * @len [in] : kernel buffer length
- * @write : indicate read or write
- * Return error code on failure or 0 on success.
- */
-int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
- size_t len, bool write)
+int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova,
+ void *data, size_t len, bool write)
{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret = 0;
-
- if (!data || len <= 0 || !vfio_assert_device_open(device))
- return -EINVAL;
-
- /* group->container cannot change while a vfio device is open */
- container = device->group->container;
- driver = container->iommu_driver;
+ struct vfio_iommu_driver *driver = container->iommu_driver;
- if (likely(driver && driver->ops->dma_rw))
- ret = driver->ops->dma_rw(container->iommu_data,
- iova, data, len, write);
- else
- ret = -ENOTTY;
- return ret;
+ if (unlikely(!driver || !driver->ops->dma_rw))
+ return -ENOTTY;
+ return driver->ops->dma_rw(container->iommu_data, iova, data, len,
+ write);
}
-EXPORT_SYMBOL(vfio_dma_rw);
int __init vfio_container_init(void)
{
@@ -678,3 +608,6 @@ void vfio_container_cleanup(void)
misc_deregister(&vfio_dev);
mutex_destroy(&vfio.iommu_drivers_lock);
}
+
+MODULE_ALIAS_MISCDEV(VFIO_MINOR);
+MODULE_ALIAS("devname:vfio/vfio");
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index b16874e913e4..5cd4bb476440 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -592,6 +592,9 @@ static const struct vfio_device_ops vfio_fsl_mc_ops = {
.read = vfio_fsl_mc_read,
.write = vfio_fsl_mc_write,
.mmap = vfio_fsl_mc_mmap,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static struct fsl_mc_driver vfio_fsl_mc_driver = {
diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
new file mode 100644
index 000000000000..4f82a6fa7c6c
--- /dev/null
+++ b/drivers/vfio/iommufd.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+
+#include "vfio.h"
+
+MODULE_IMPORT_NS(IOMMUFD);
+MODULE_IMPORT_NS(IOMMUFD_VFIO);
+
+int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
+{
+ u32 ioas_id;
+ u32 device_id;
+ int ret;
+
+ lockdep_assert_held(&vdev->dev_set->lock);
+
+ /*
+ * If the driver doesn't provide this op then it means the device does
+ * not do DMA at all. So nothing to do.
+ */
+ if (!vdev->ops->bind_iommufd)
+ return 0;
+
+ ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id);
+ if (ret)
+ return ret;
+
+ ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
+ if (ret)
+ goto err_unbind;
+ ret = vdev->ops->attach_ioas(vdev, &ioas_id);
+ if (ret)
+ goto err_unbind;
+
+ /*
+ * The legacy path has no way to return the device id or the selected
+ * pt_id
+ */
+ return 0;
+
+err_unbind:
+ if (vdev->ops->unbind_iommufd)
+ vdev->ops->unbind_iommufd(vdev);
+ return ret;
+}
+
+void vfio_iommufd_unbind(struct vfio_device *vdev)
+{
+ lockdep_assert_held(&vdev->dev_set->lock);
+
+ if (vdev->ops->unbind_iommufd)
+ vdev->ops->unbind_iommufd(vdev);
+}
+
+/*
+ * The physical standard ops mean that the iommufd_device is bound to the
+ * physical device vdev->dev that was provided to vfio_init_group_dev(). Drivers
+ * using this ops set should call vfio_register_group_dev()
+ */
+int vfio_iommufd_physical_bind(struct vfio_device *vdev,
+ struct iommufd_ctx *ictx, u32 *out_device_id)
+{
+ struct iommufd_device *idev;
+
+ idev = iommufd_device_bind(ictx, vdev->dev, out_device_id);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+ vdev->iommufd_device = idev;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind);
+
+void vfio_iommufd_physical_unbind(struct vfio_device *vdev)
+{
+ lockdep_assert_held(&vdev->dev_set->lock);
+
+ if (vdev->iommufd_attached) {
+ iommufd_device_detach(vdev->iommufd_device);
+ vdev->iommufd_attached = false;
+ }
+ iommufd_device_unbind(vdev->iommufd_device);
+ vdev->iommufd_device = NULL;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_unbind);
+
+int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id)
+{
+ int rc;
+
+ rc = iommufd_device_attach(vdev->iommufd_device, pt_id);
+ if (rc)
+ return rc;
+ vdev->iommufd_attached = true;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas);
+
+/*
+ * The emulated standard ops mean that vfio_device is going to use the
+ * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using this
+ * ops set should call vfio_register_emulated_iommu_dev().
+ */
+
+static void vfio_emulated_unmap(void *data, unsigned long iova,
+ unsigned long length)
+{
+ struct vfio_device *vdev = data;
+
+ vdev->ops->dma_unmap(vdev, iova, length);
+}
+
+static const struct iommufd_access_ops vfio_user_ops = {
+ .needs_pin_pages = 1,
+ .unmap = vfio_emulated_unmap,
+};
+
+int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
+ struct iommufd_ctx *ictx, u32 *out_device_id)
+{
+ lockdep_assert_held(&vdev->dev_set->lock);
+
+ vdev->iommufd_ictx = ictx;
+ iommufd_ctx_get(ictx);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_bind);
+
+void vfio_iommufd_emulated_unbind(struct vfio_device *vdev)
+{
+ lockdep_assert_held(&vdev->dev_set->lock);
+
+ if (vdev->iommufd_access) {
+ iommufd_access_destroy(vdev->iommufd_access);
+ vdev->iommufd_access = NULL;
+ }
+ iommufd_ctx_put(vdev->iommufd_ictx);
+ vdev->iommufd_ictx = NULL;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_unbind);
+
+int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id)
+{
+ struct iommufd_access *user;
+
+ lockdep_assert_held(&vdev->dev_set->lock);
+
+ user = iommufd_access_create(vdev->iommufd_ictx, *pt_id, &vfio_user_ops,
+ vdev);
+ if (IS_ERR(user))
+ return PTR_ERR(user);
+ vdev->iommufd_access = user;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_attach_ioas);
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 39eeca18a0f7..40019b11c5a9 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -1246,6 +1246,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = {
.mmap = hisi_acc_vfio_pci_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
@@ -1261,6 +1264,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
.mmap = vfio_pci_core_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index fd6ccb8454a2..32d1f38d351e 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -623,6 +623,9 @@ static const struct vfio_device_ops mlx5vf_pci_ops = {
.mmap = vfio_pci_core_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static int mlx5vf_pci_probe(struct pci_dev *pdev,
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 1d4919edfbde..29091ee2e984 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -138,6 +138,9 @@ static const struct vfio_device_ops vfio_pci_ops = {
.mmap = vfio_pci_core_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c
index eaea63e5294c..5a046098d0bd 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -117,6 +117,9 @@ static const struct vfio_device_ops vfio_amba_ops = {
.read = vfio_platform_read,
.write = vfio_platform_write,
.mmap = vfio_platform_mmap,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static const struct amba_id pl330_ids[] = {
diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c
index 82cedcebfd90..b87c3b708783 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -106,6 +106,9 @@ static const struct vfio_device_ops vfio_platform_ops = {
.read = vfio_platform_read,
.write = vfio_platform_write,
.mmap = vfio_platform_mmap,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static struct platform_driver vfio_platform_driver = {
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index bcad54bbab08..ce5fe3fc493b 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -10,6 +10,7 @@
#include <linux/cdev.h>
#include <linux/module.h>
+struct iommufd_ctx;
struct iommu_group;
struct vfio_device;
struct vfio_container;
@@ -54,14 +55,18 @@ struct vfio_group {
struct list_head device_list;
struct mutex device_lock;
struct list_head vfio_next;
+#if IS_ENABLED(CONFIG_VFIO_CONTAINER)
struct list_head container_next;
+#endif
enum vfio_group_type type;
struct mutex group_lock;
struct kvm *kvm;
struct file *opened_file;
struct blocking_notifier_head notifier;
+ struct iommufd_ctx *iommufd;
};
+#if IS_ENABLED(CONFIG_VFIO_CONTAINER)
/* events for the backend driver notify callback */
enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0,
@@ -109,20 +114,101 @@ struct vfio_iommu_driver {
int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops);
-bool vfio_assert_device_open(struct vfio_device *device);
-
struct vfio_container *vfio_container_from_file(struct file *filep);
-int vfio_device_assign_container(struct vfio_device *device);
-void vfio_device_unassign_container(struct vfio_device *device);
+int vfio_group_use_container(struct vfio_group *group);
+void vfio_group_unuse_container(struct vfio_group *group);
int vfio_container_attach_group(struct vfio_container *container,
struct vfio_group *group);
void vfio_group_detach_container(struct vfio_group *group);
void vfio_device_container_register(struct vfio_device *device);
void vfio_device_container_unregister(struct vfio_device *device);
-long vfio_container_ioctl_check_extension(struct vfio_container *container,
- unsigned long arg);
+int vfio_container_pin_pages(struct vfio_container *container,
+ struct iommu_group *iommu_group, dma_addr_t iova,
+ int npage, int prot, struct page **pages);
+void vfio_container_unpin_pages(struct vfio_container *container,
+ dma_addr_t iova, int npage);
+int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova,
+ void *data, size_t len, bool write);
+
int __init vfio_container_init(void);
void vfio_container_cleanup(void);
+#else
+static inline struct vfio_container *
+vfio_container_from_file(struct file *filep)
+{
+ return NULL;
+}
+
+static inline int vfio_group_use_container(struct vfio_group *group)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void vfio_group_unuse_container(struct vfio_group *group)
+{
+}
+
+static inline int vfio_container_attach_group(struct vfio_container *container,
+ struct vfio_group *group)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void vfio_group_detach_container(struct vfio_group *group)
+{
+}
+
+static inline void vfio_device_container_register(struct vfio_device *device)
+{
+}
+
+static inline void vfio_device_container_unregister(struct vfio_device *device)
+{
+}
+
+static inline int vfio_container_pin_pages(struct vfio_container *container,
+ struct iommu_group *iommu_group,
+ dma_addr_t iova, int npage, int prot,
+ struct page **pages)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void vfio_container_unpin_pages(struct vfio_container *container,
+ dma_addr_t iova, int npage)
+{
+}
+
+static inline int vfio_container_dma_rw(struct vfio_container *container,
+ dma_addr_t iova, void *data, size_t len,
+ bool write)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int vfio_container_init(void)
+{
+ return 0;
+}
+static inline void vfio_container_cleanup(void)
+{
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IOMMUFD)
+int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx);
+void vfio_iommufd_unbind(struct vfio_device *device);
+#else
+static inline int vfio_iommufd_bind(struct vfio_device *device,
+ struct iommufd_ctx *ictx)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void vfio_iommufd_unbind(struct vfio_device *device)
+{
+}
+#endif
#ifdef CONFIG_VFIO_NOIOMMU
extern bool vfio_noiommu __read_mostly;
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 2d168793d4e1..ce6e6a560c70 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -35,6 +35,7 @@
#include <linux/pm_runtime.h>
#include <linux/interval_tree.h>
#include <linux/iova_bitmap.h>
+#include <linux/iommufd.h>
#include "vfio.h"
#define DRIVER_VERSION "0.3"
@@ -524,6 +525,11 @@ static int __vfio_register_dev(struct vfio_device *device,
if (IS_ERR(group))
return PTR_ERR(group);
+ if (WARN_ON(device->ops->bind_iommufd &&
+ (!device->ops->unbind_iommufd ||
+ !device->ops->attach_ioas)))
+ return -EINVAL;
+
/*
* If the driver doesn't specify a set then the device is added to a
* singleton set just for itself.
@@ -662,6 +668,18 @@ EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
/*
* VFIO Group fd, /dev/vfio/$GROUP
*/
+static bool vfio_group_has_iommu(struct vfio_group *group)
+{
+ lockdep_assert_held(&group->group_lock);
+ /*
+ * There can only be users if there is a container, and if there is a
+ * container there must be users.
+ */
+ WARN_ON(!group->container != !group->container_users);
+
+ return group->container || group->iommufd;
+}
+
/*
* VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
* if there was no container to unset. Since the ioctl is called on
@@ -673,15 +691,21 @@ static int vfio_group_ioctl_unset_container(struct vfio_group *group)
int ret = 0;
mutex_lock(&group->group_lock);
- if (!group->container) {
+ if (!vfio_group_has_iommu(group)) {
ret = -EINVAL;
goto out_unlock;
}
- if (group->container_users != 1) {
- ret = -EBUSY;
- goto out_unlock;
+ if (group->container) {
+ if (group->container_users != 1) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ vfio_group_detach_container(group);
+ }
+ if (group->iommufd) {
+ iommufd_ctx_put(group->iommufd);
+ group->iommufd = NULL;
}
- vfio_group_detach_container(group);
out_unlock:
mutex_unlock(&group->group_lock);
@@ -692,6 +716,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
int __user *arg)
{
struct vfio_container *container;
+ struct iommufd_ctx *iommufd;
struct fd f;
int ret;
int fd;
@@ -704,7 +729,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
return -EBADF;
mutex_lock(&group->group_lock);
- if (group->container || WARN_ON(group->container_users)) {
+ if (vfio_group_has_iommu(group)) {
ret = -EINVAL;
goto out_unlock;
}
@@ -714,12 +739,28 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
}
container = vfio_container_from_file(f.file);
- ret = -EINVAL;
if (container) {
ret = vfio_container_attach_group(container, group);
goto out_unlock;
}
+ iommufd = iommufd_ctx_from_file(f.file);
+ if (!IS_ERR(iommufd)) {
+ u32 ioas_id;
+
+ ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
+ if (ret) {
+ iommufd_ctx_put(group->iommufd);
+ goto out_unlock;
+ }
+
+ group->iommufd = iommufd;
+ goto out_unlock;
+ }
+
+ /* The FD passed is not recognized. */
+ ret = -EBADFD;
+
out_unlock:
mutex_unlock(&group->group_lock);
fdput(f);
@@ -729,45 +770,93 @@ out_unlock:
static const struct file_operations vfio_device_fops;
/* true if the vfio_device has open_device() called but not close_device() */
-bool vfio_assert_device_open(struct vfio_device *device)
+static bool vfio_assert_device_open(struct vfio_device *device)
{
return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
}
-static struct file *vfio_device_open(struct vfio_device *device)
+static int vfio_device_first_open(struct vfio_device *device)
{
- struct file *filep;
int ret;
+ lockdep_assert_held(&device->dev_set->lock);
+
+ if (!try_module_get(device->dev->driver->owner))
+ return -ENODEV;
+
+ /*
+ * Here we pass the KVM pointer with the group under the lock. If the
+ * device driver will use it, it must obtain a reference and release it
+ * during close_device.
+ */
mutex_lock(&device->group->group_lock);
- ret = vfio_device_assign_container(device);
- mutex_unlock(&device->group->group_lock);
- if (ret)
- return ERR_PTR(ret);
+ if (!vfio_group_has_iommu(device->group)) {
+ ret = -EINVAL;
+ goto err_module_put;
+ }
- if (!try_module_get(device->dev->driver->owner)) {
- ret = -ENODEV;
- goto err_unassign_container;
+ if (device->group->container) {
+ ret = vfio_group_use_container(device->group);
+ if (ret)
+ goto err_module_put;
+ } else if (device->group->iommufd) {
+ ret = vfio_iommufd_bind(device, device->group->iommufd);
+ if (ret)
+ goto err_module_put;
}
+ device->kvm = device->group->kvm;
+ if (device->ops->open_device) {
+ ret = device->ops->open_device(device);
+ if (ret)
+ goto err_container;
+ }
+ if (device->group->container)
+ vfio_device_container_register(device);
+ mutex_unlock(&device->group->group_lock);
+ return 0;
+
+err_container:
+ device->kvm = NULL;
+ if (device->group->container)
+ vfio_group_unuse_container(device->group);
+ else if (device->group->iommufd)
+ vfio_iommufd_unbind(device);
+err_module_put:
+ mutex_unlock(&device->group->group_lock);
+ module_put(device->dev->driver->owner);
+ return ret;
+}
+
+static void vfio_device_last_close(struct vfio_device *device)
+{
+ lockdep_assert_held(&device->dev_set->lock);
+
+ mutex_lock(&device->group->group_lock);
+ if (device->group->container)
+ vfio_device_container_unregister(device);
+ if (device->ops->close_device)
+ device->ops->close_device(device);
+ device->kvm = NULL;
+ if (device->group->container)
+ vfio_group_unuse_container(device->group);
+ else if (device->group->iommufd)
+ vfio_iommufd_unbind(device);
+ mutex_unlock(&device->group->group_lock);
+ module_put(device->dev->driver->owner);
+}
+
+static struct file *vfio_device_open(struct vfio_device *device)
+{
+ struct file *filep;
+ int ret;
+
mutex_lock(&device->dev_set->lock);
device->open_count++;
if (device->open_count == 1) {
- /*
- * Here we pass the KVM pointer with the group under the read
- * lock. If the device driver will use it, it must obtain a
- * reference and release it during close_device.
- */
- mutex_lock(&device->group->group_lock);
- device->kvm = device->group->kvm;
-
- if (device->ops->open_device) {
- ret = device->ops->open_device(device);
- if (ret)
- goto err_undo_count;
- }
- vfio_device_container_register(device);
- mutex_unlock(&device->group->group_lock);
+ ret = vfio_device_first_open(device);
+ if (ret)
+ goto err_unlock;
}
mutex_unlock(&device->dev_set->lock);
@@ -800,21 +889,11 @@ static struct file *vfio_device_open(struct vfio_device *device)
err_close_device:
mutex_lock(&device->dev_set->lock);
- mutex_lock(&device->group->group_lock);
- if (device->open_count == 1 && device->ops->close_device) {
- device->ops->close_device(device);
-
- vfio_device_container_unregister(device);
- }
-err_undo_count:
- mutex_unlock(&device->group->group_lock);
+ if (device->open_count == 1)
+ vfio_device_last_close(device);
+err_unlock:
device->open_count--;
- if (device->open_count == 0 && device->kvm)
- device->kvm = NULL;
mutex_unlock(&device->dev_set->lock);
- module_put(device->dev->driver->owner);
-err_unassign_container:
- vfio_device_unassign_container(device);
return ERR_PTR(ret);
}
@@ -878,7 +957,14 @@ static int vfio_group_ioctl_get_status(struct vfio_group *group,
return -ENODEV;
}
- if (group->container)
+ /*
+ * With the container FD the iommu_group_claim_dma_owner() is done
+ * during SET_CONTAINER but for IOMMFD this is done during
+ * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd
+ * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due
+ * to viability.
+ */
+ if (vfio_group_has_iommu(group))
status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
VFIO_GROUP_FLAGS_VIABLE;
else if (!iommu_group_dma_owner_claimed(group->iommu_group))
@@ -961,6 +1047,10 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep)
WARN_ON(group->notifier.head);
if (group->container)
vfio_group_detach_container(group);
+ if (group->iommufd) {
+ iommufd_ctx_put(group->iommufd);
+ group->iommufd = NULL;
+ }
group->opened_file = NULL;
mutex_unlock(&group->group_lock);
return 0;
@@ -1016,21 +1106,11 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
mutex_lock(&device->dev_set->lock);
vfio_assert_device_open(device);
- mutex_lock(&device->group->group_lock);
- if (device->open_count == 1 && device->ops->close_device)
- device->ops->close_device(device);
-
- vfio_device_container_unregister(device);
- mutex_unlock(&device->group->group_lock);
+ if (device->open_count == 1)
+ vfio_device_last_close(device);
device->open_count--;
- if (device->open_count == 0)
- device->kvm = NULL;
mutex_unlock(&device->dev_set->lock);
- module_put(device->dev->driver->owner);
-
- vfio_device_unassign_container(device);
-
vfio_device_put_registration(device);
return 0;
@@ -1613,24 +1693,27 @@ EXPORT_SYMBOL_GPL(vfio_file_is_group);
bool vfio_file_enforced_coherent(struct file *file)
{
struct vfio_group *group = file->private_data;
- bool ret;
+ struct vfio_device *device;
+ bool ret = true;
if (!vfio_file_is_group(file))
return true;
- mutex_lock(&group->group_lock);
- if (group->container) {
- ret = vfio_container_ioctl_check_extension(group->container,
- VFIO_DMA_CC_IOMMU);
- } else {
- /*
- * Since the coherency state is determined only once a container
- * is attached the user must do so before they can prove they
- * have permission.
- */
- ret = true;
+ /*
+ * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then
+ * any domain later attached to it will also not support it. If the cap
+ * is set then the iommu_domain eventually attached to the device/group
+ * must use a domain with enforce_cache_coherency().
+ */
+ mutex_lock(&group->device_lock);
+ list_for_each_entry(device, &group->device_list, group_next) {
+ if (!device_iommu_capable(device->dev,
+ IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) {
+ ret = false;
+ break;
+ }
}
- mutex_unlock(&group->group_lock);
+ mutex_unlock(&group->device_lock);
return ret;
}
EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
@@ -1794,6 +1877,126 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
/*
+ * Pin contiguous user pages and return their associated host pages for local
+ * domain only.
+ * @device [in] : device
+ * @iova [in] : starting IOVA of user pages to be pinned.
+ * @npage [in] : count of pages to be pinned. This count should not
+ * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
+ * @prot [in] : protection flags
+ * @pages[out] : array of host pages
+ * Return error or number of pages pinned.
+ *
+ * A driver may only call this function if the vfio_device was created
+ * by vfio_register_emulated_iommu_dev() due to vfio_container_pin_pages().
+ */
+int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
+ int npage, int prot, struct page **pages)
+{
+ /* group->container cannot change while a vfio device is open */
+ if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device)))
+ return -EINVAL;
+ if (device->group->container)
+ return vfio_container_pin_pages(device->group->container,
+ device->group->iommu_group,
+ iova, npage, prot, pages);
+ if (device->iommufd_access) {
+ int ret;
+
+ if (iova > ULONG_MAX)
+ return -EINVAL;
+ /*
+ * VFIO ignores the sub page offset, npages is from the start of
+ * a PAGE_SIZE chunk of IOVA. The caller is expected to recover
+ * the sub page offset by doing:
+ * pages[0] + (iova % PAGE_SIZE)
+ */
+ ret = iommufd_access_pin_pages(
+ device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE),
+ npage * PAGE_SIZE, pages,
+ (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0);
+ if (ret)
+ return ret;
+ return npage;
+ }
+ return -EINVAL;
+}
+EXPORT_SYMBOL(vfio_pin_pages);
+
+/*
+ * Unpin contiguous host pages for local domain only.
+ * @device [in] : device
+ * @iova [in] : starting address of user pages to be unpinned.
+ * @npage [in] : count of pages to be unpinned. This count should not
+ * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
+ */
+void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
+{
+ if (WARN_ON(!vfio_assert_device_open(device)))
+ return;
+
+ if (device->group->container) {
+ vfio_container_unpin_pages(device->group->container, iova,
+ npage);
+ return;
+ }
+ if (device->iommufd_access) {
+ if (WARN_ON(iova > ULONG_MAX))
+ return;
+ iommufd_access_unpin_pages(device->iommufd_access,
+ ALIGN_DOWN(iova, PAGE_SIZE),
+ npage * PAGE_SIZE);
+ return;
+ }
+}
+EXPORT_SYMBOL(vfio_unpin_pages);
+
+/*
+ * This interface allows the CPUs to perform some sort of virtual DMA on
+ * behalf of the device.
+ *
+ * CPUs read/write from/into a range of IOVAs pointing to user space memory
+ * into/from a kernel buffer.
+ *
+ * As the read/write of user space memory is conducted via the CPUs and is
+ * not a real device DMA, it is not necessary to pin the user space memory.
+ *
+ * @device [in] : VFIO device
+ * @iova [in] : base IOVA of a user space buffer
+ * @data [in] : pointer to kernel buffer
+ * @len [in] : kernel buffer length
+ * @write : indicate read or write
+ * Return error code on failure or 0 on success.
+ */
+int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
+ size_t len, bool write)
+{
+ if (!data || len <= 0 || !vfio_assert_device_open(device))
+ return -EINVAL;
+
+ if (device->group->container)
+ return vfio_container_dma_rw(device->group->container, iova,
+ data, len, write);
+
+ if (device->iommufd_access) {
+ unsigned int flags = 0;
+
+ if (iova > ULONG_MAX)
+ return -EINVAL;
+
+ /* VFIO historically tries to auto-detect a kthread */
+ if (!current->mm)
+ flags |= IOMMUFD_ACCESS_RW_KTHREAD;
+ if (write)
+ flags |= IOMMUFD_ACCESS_RW_WRITE;
+ return iommufd_access_rw(device->iommufd_access, iova, data,
+ len, flags);
+ }
+ return -EINVAL;
+}
+EXPORT_SYMBOL(vfio_dma_rw);
+
+/*
* Module/class support
*/
static char *vfio_devnode(struct device *dev, umode_t *mode)
@@ -1870,6 +2073,4 @@ MODULE_VERSION(DRIVER_VERSION);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_ALIAS_MISCDEV(VFIO_MINOR);
-MODULE_ALIAS("devname:vfio/vfio");
MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e7cebeb875dd..d5f84f98c0fa 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -17,6 +17,9 @@
#include <linux/iova_bitmap.h>
struct kvm;
+struct iommufd_ctx;
+struct iommufd_device;
+struct iommufd_access;
/*
* VFIO devices can be placed in a set, this allows all devices to share this
@@ -54,6 +57,12 @@ struct vfio_device {
struct completion comp;
struct list_head group_next;
struct list_head iommu_entry;
+ struct iommufd_access *iommufd_access;
+#if IS_ENABLED(CONFIG_IOMMUFD)
+ struct iommufd_device *iommufd_device;
+ struct iommufd_ctx *iommufd_ictx;
+ bool iommufd_attached;
+#endif
};
/**
@@ -80,6 +89,10 @@ struct vfio_device_ops {
char *name;
int (*init)(struct vfio_device *vdev);
void (*release)(struct vfio_device *vdev);
+ int (*bind_iommufd)(struct vfio_device *vdev,
+ struct iommufd_ctx *ictx, u32 *out_device_id);
+ void (*unbind_iommufd)(struct vfio_device *vdev);
+ int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id);
int (*open_device)(struct vfio_device *vdev);
void (*close_device)(struct vfio_device *vdev);
ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
@@ -96,6 +109,32 @@ struct vfio_device_ops {
void __user *arg, size_t argsz);
};
+#if IS_ENABLED(CONFIG_IOMMUFD)
+int vfio_iommufd_physical_bind(struct vfio_device *vdev,
+ struct iommufd_ctx *ictx, u32 *out_device_id);
+void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
+int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
+int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
+ struct iommufd_ctx *ictx, u32 *out_device_id);
+void vfio_iommufd_emulated_unbind(struct vfio_device *vdev);
+int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
+#else
+#define vfio_iommufd_physical_bind \
+ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
+ u32 *out_device_id)) NULL)
+#define vfio_iommufd_physical_unbind \
+ ((void (*)(struct vfio_device *vdev)) NULL)
+#define vfio_iommufd_physical_attach_ioas \
+ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
+#define vfio_iommufd_emulated_bind \
+ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
+ u32 *out_device_id)) NULL)
+#define vfio_iommufd_emulated_unbind \
+ ((void (*)(struct vfio_device *vdev)) NULL)
+#define vfio_iommufd_emulated_attach_ioas \
+ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
+#endif
+
/**
* @migration_set_state: Optional callback to change the migration state for
* devices that support migration. It's mandatory for