diff options
author | Jason Gunthorpe <jgg@nvidia.com> | 2022-11-29 16:39:33 -0400 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2022-12-02 11:52:04 -0400 |
commit | 169dd5c987e60e62aa5785b30d22ded2ae000286 (patch) | |
tree | 5e00d30219e9bf021b635c54184ab9381d5e00ac /drivers/vfio | |
parent | 2a54e347d990574ceb047b71ea0b03979232b85e (diff) | |
parent | 01f70cbb26eadb5959344598977cb7159948263a (diff) |
Merge patch series "Connect VFIO to IOMMUFD"
Jason Gunthorpe <jgg@nvidia.com> says:
==================
This series provides an alternative container layer for VFIO implemented
using iommufd. This is optional, if CONFIG_IOMMUFD is not set then it will
not be compiled in.
At this point iommufd can be injected by passing in a iommfd FD to
VFIO_GROUP_SET_CONTAINER which will use the VFIO compat layer in iommufd
to obtain the compat IOAS and then connect up all the VFIO drivers as
appropriate.
This is temporary stopping point, a following series will provide a way to
directly open a VFIO device FD and directly connect it to IOMMUFD using
native ioctls that can expose the IOMMUFD features like hwpt, future
vPASID and dynamic attachment.
This series, in compat mode, has passed all the qemu tests we have
available, including the test suites for the Intel GVT mdev. Aside from
the temporary limitation with P2P memory this is belived to be fully
compatible with VFIO.
This is on github: https://github.com/jgunthorpe/linux/commits/vfio_iommufd
It requires the iommufd series:
https://lore.kernel.org/r/0-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
==================
Link: https://lore.kernel.org/r/0-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/vfio')
-rw-r--r-- | drivers/vfio/Kconfig | 36 | ||||
-rw-r--r-- | drivers/vfio/Makefile | 5 | ||||
-rw-r--r-- | drivers/vfio/container.c | 141 | ||||
-rw-r--r-- | drivers/vfio/fsl-mc/vfio_fsl_mc.c | 3 | ||||
-rw-r--r-- | drivers/vfio/iommufd.c | 158 | ||||
-rw-r--r-- | drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 6 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/main.c | 3 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 3 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_amba.c | 3 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform.c | 3 | ||||
-rw-r--r-- | drivers/vfio/vfio.h | 98 | ||||
-rw-r--r-- | drivers/vfio/vfio_main.c | 347 |
12 files changed, 610 insertions, 196 deletions
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 86c381ceb9a1..286c1663bd75 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -2,8 +2,9 @@ menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" select IOMMU_API - select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) + depends on IOMMUFD || !IOMMUFD select INTERVAL_TREE + select VFIO_CONTAINER if IOMMUFD=n help VFIO provides a framework for secure userspace device drivers. See Documentation/driver-api/vfio.rst for more details. @@ -11,6 +12,18 @@ menuconfig VFIO If you don't know what to do here, say N. if VFIO +config VFIO_CONTAINER + bool "Support for the VFIO container /dev/vfio/vfio" + select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) + default y + help + The VFIO container is the classic interface to VFIO for establishing + IOMMU mappings. If N is selected here then IOMMUFD must be used to + manage the mappings. + + Unless testing IOMMUFD say Y here. + +if VFIO_CONTAINER config VFIO_IOMMU_TYPE1 tristate default n @@ -20,16 +33,6 @@ config VFIO_IOMMU_SPAPR_TCE depends on SPAPR_TCE_IOMMU default VFIO -config VFIO_SPAPR_EEH - tristate - depends on EEH && VFIO_IOMMU_SPAPR_TCE - default VFIO - -config VFIO_VIRQFD - tristate - select EVENTFD - default n - config VFIO_NOIOMMU bool "VFIO No-IOMMU support" help @@ -43,6 +46,17 @@ config VFIO_NOIOMMU this mode since there is no IOMMU to provide DMA translation. If you don't know what to do here, say N. +endif + +config VFIO_SPAPR_EEH + tristate + depends on EEH && VFIO_IOMMU_SPAPR_TCE + default VFIO + +config VFIO_VIRQFD + tristate + select EVENTFD + default n source "drivers/vfio/pci/Kconfig" source "drivers/vfio/platform/Kconfig" diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index b693a1169286..b953517dc70f 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -4,8 +4,9 @@ vfio_virqfd-y := virqfd.o obj-$(CONFIG_VFIO) += vfio.o vfio-y += vfio_main.o \ - iova_bitmap.o \ - container.o + iova_bitmap.o +vfio-$(CONFIG_IOMMUFD) += iommufd.o +vfio-$(CONFIG_VFIO_CONTAINER) += container.o obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index d74164abbf40..6b362d97d682 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -188,8 +188,9 @@ void vfio_device_container_unregister(struct vfio_device *device) device->group->container->iommu_data, device); } -long vfio_container_ioctl_check_extension(struct vfio_container *container, - unsigned long arg) +static long +vfio_container_ioctl_check_extension(struct vfio_container *container, + unsigned long arg) { struct vfio_iommu_driver *driver; long ret = 0; @@ -511,14 +512,15 @@ void vfio_group_detach_container(struct vfio_group *group) vfio_container_put(container); } -int vfio_device_assign_container(struct vfio_device *device) +int vfio_group_use_container(struct vfio_group *group) { - struct vfio_group *group = device->group; - lockdep_assert_held(&group->group_lock); - if (!group->container || !group->container->iommu_driver || - WARN_ON(!group->container_users)) + /* + * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but + * VFIO_SET_IOMMU hasn't been done yet. + */ + if (!group->container->iommu_driver) return -EINVAL; if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) @@ -529,122 +531,50 @@ int vfio_device_assign_container(struct vfio_device *device) return 0; } -void vfio_device_unassign_container(struct vfio_device *device) +void vfio_group_unuse_container(struct vfio_group *group) { - mutex_lock(&device->group->group_lock); - WARN_ON(device->group->container_users <= 1); - device->group->container_users--; - fput(device->group->opened_file); - mutex_unlock(&device->group->group_lock); + lockdep_assert_held(&group->group_lock); + + WARN_ON(group->container_users <= 1); + group->container_users--; + fput(group->opened_file); } -/* - * Pin contiguous user pages and return their associated host pages for local - * domain only. - * @device [in] : device - * @iova [in] : starting IOVA of user pages to be pinned. - * @npage [in] : count of pages to be pinned. This count should not - * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. - * @prot [in] : protection flags - * @pages[out] : array of host pages - * Return error or number of pages pinned. - * - * A driver may only call this function if the vfio_device was created - * by vfio_register_emulated_iommu_dev(). - */ -int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, - int npage, int prot, struct page **pages) +int vfio_container_pin_pages(struct vfio_container *container, + struct iommu_group *iommu_group, dma_addr_t iova, + int npage, int prot, struct page **pages) { - struct vfio_container *container; - struct vfio_group *group = device->group; - struct vfio_iommu_driver *driver; - int ret; - - if (!pages || !npage || !vfio_assert_device_open(device)) - return -EINVAL; + struct vfio_iommu_driver *driver = container->iommu_driver; if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) return -E2BIG; - /* group->container cannot change while a vfio device is open */ - container = group->container; - driver = container->iommu_driver; - if (likely(driver && driver->ops->pin_pages)) - ret = driver->ops->pin_pages(container->iommu_data, - group->iommu_group, iova, - npage, prot, pages); - else - ret = -ENOTTY; - - return ret; + if (unlikely(!driver || !driver->ops->pin_pages)) + return -ENOTTY; + return driver->ops->pin_pages(container->iommu_data, iommu_group, iova, + npage, prot, pages); } -EXPORT_SYMBOL(vfio_pin_pages); -/* - * Unpin contiguous host pages for local domain only. - * @device [in] : device - * @iova [in] : starting address of user pages to be unpinned. - * @npage [in] : count of pages to be unpinned. This count should not - * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. - */ -void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) +void vfio_container_unpin_pages(struct vfio_container *container, + dma_addr_t iova, int npage) { - struct vfio_container *container; - struct vfio_iommu_driver *driver; - if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) return; - if (WARN_ON(!vfio_assert_device_open(device))) - return; - - /* group->container cannot change while a vfio device is open */ - container = device->group->container; - driver = container->iommu_driver; - - driver->ops->unpin_pages(container->iommu_data, iova, npage); + container->iommu_driver->ops->unpin_pages(container->iommu_data, iova, + npage); } -EXPORT_SYMBOL(vfio_unpin_pages); -/* - * This interface allows the CPUs to perform some sort of virtual DMA on - * behalf of the device. - * - * CPUs read/write from/into a range of IOVAs pointing to user space memory - * into/from a kernel buffer. - * - * As the read/write of user space memory is conducted via the CPUs and is - * not a real device DMA, it is not necessary to pin the user space memory. - * - * @device [in] : VFIO device - * @iova [in] : base IOVA of a user space buffer - * @data [in] : pointer to kernel buffer - * @len [in] : kernel buffer length - * @write : indicate read or write - * Return error code on failure or 0 on success. - */ -int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, - size_t len, bool write) +int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova, + void *data, size_t len, bool write) { - struct vfio_container *container; - struct vfio_iommu_driver *driver; - int ret = 0; - - if (!data || len <= 0 || !vfio_assert_device_open(device)) - return -EINVAL; - - /* group->container cannot change while a vfio device is open */ - container = device->group->container; - driver = container->iommu_driver; + struct vfio_iommu_driver *driver = container->iommu_driver; - if (likely(driver && driver->ops->dma_rw)) - ret = driver->ops->dma_rw(container->iommu_data, - iova, data, len, write); - else - ret = -ENOTTY; - return ret; + if (unlikely(!driver || !driver->ops->dma_rw)) + return -ENOTTY; + return driver->ops->dma_rw(container->iommu_data, iova, data, len, + write); } -EXPORT_SYMBOL(vfio_dma_rw); int __init vfio_container_init(void) { @@ -678,3 +608,6 @@ void vfio_container_cleanup(void) misc_deregister(&vfio_dev); mutex_destroy(&vfio.iommu_drivers_lock); } + +MODULE_ALIAS_MISCDEV(VFIO_MINOR); +MODULE_ALIAS("devname:vfio/vfio"); diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index b16874e913e4..5cd4bb476440 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -592,6 +592,9 @@ static const struct vfio_device_ops vfio_fsl_mc_ops = { .read = vfio_fsl_mc_read, .write = vfio_fsl_mc_write, .mmap = vfio_fsl_mc_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static struct fsl_mc_driver vfio_fsl_mc_driver = { diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c new file mode 100644 index 000000000000..4f82a6fa7c6c --- /dev/null +++ b/drivers/vfio/iommufd.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include <linux/vfio.h> +#include <linux/iommufd.h> + +#include "vfio.h" + +MODULE_IMPORT_NS(IOMMUFD); +MODULE_IMPORT_NS(IOMMUFD_VFIO); + +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) +{ + u32 ioas_id; + u32 device_id; + int ret; + + lockdep_assert_held(&vdev->dev_set->lock); + + /* + * If the driver doesn't provide this op then it means the device does + * not do DMA at all. So nothing to do. + */ + if (!vdev->ops->bind_iommufd) + return 0; + + ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); + if (ret) + return ret; + + ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); + if (ret) + goto err_unbind; + ret = vdev->ops->attach_ioas(vdev, &ioas_id); + if (ret) + goto err_unbind; + + /* + * The legacy path has no way to return the device id or the selected + * pt_id + */ + return 0; + +err_unbind: + if (vdev->ops->unbind_iommufd) + vdev->ops->unbind_iommufd(vdev); + return ret; +} + +void vfio_iommufd_unbind(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (vdev->ops->unbind_iommufd) + vdev->ops->unbind_iommufd(vdev); +} + +/* + * The physical standard ops mean that the iommufd_device is bound to the + * physical device vdev->dev that was provided to vfio_init_group_dev(). Drivers + * using this ops set should call vfio_register_group_dev() + */ +int vfio_iommufd_physical_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id) +{ + struct iommufd_device *idev; + + idev = iommufd_device_bind(ictx, vdev->dev, out_device_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + vdev->iommufd_device = idev; + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind); + +void vfio_iommufd_physical_unbind(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (vdev->iommufd_attached) { + iommufd_device_detach(vdev->iommufd_device); + vdev->iommufd_attached = false; + } + iommufd_device_unbind(vdev->iommufd_device); + vdev->iommufd_device = NULL; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_unbind); + +int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id) +{ + int rc; + + rc = iommufd_device_attach(vdev->iommufd_device, pt_id); + if (rc) + return rc; + vdev->iommufd_attached = true; + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas); + +/* + * The emulated standard ops mean that vfio_device is going to use the + * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using this + * ops set should call vfio_register_emulated_iommu_dev(). + */ + +static void vfio_emulated_unmap(void *data, unsigned long iova, + unsigned long length) +{ + struct vfio_device *vdev = data; + + vdev->ops->dma_unmap(vdev, iova, length); +} + +static const struct iommufd_access_ops vfio_user_ops = { + .needs_pin_pages = 1, + .unmap = vfio_emulated_unmap, +}; + +int vfio_iommufd_emulated_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + vdev->iommufd_ictx = ictx; + iommufd_ctx_get(ictx); + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_bind); + +void vfio_iommufd_emulated_unbind(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (vdev->iommufd_access) { + iommufd_access_destroy(vdev->iommufd_access); + vdev->iommufd_access = NULL; + } + iommufd_ctx_put(vdev->iommufd_ictx); + vdev->iommufd_ictx = NULL; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_unbind); + +int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id) +{ + struct iommufd_access *user; + + lockdep_assert_held(&vdev->dev_set->lock); + + user = iommufd_access_create(vdev->iommufd_ictx, *pt_id, &vfio_user_ops, + vdev); + if (IS_ERR(user)) + return PTR_ERR(user); + vdev->iommufd_access = user; + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_attach_ioas); diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 39eeca18a0f7..40019b11c5a9 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -1246,6 +1246,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = { .mmap = hisi_acc_vfio_pci_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { @@ -1261,6 +1264,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index fd6ccb8454a2..32d1f38d351e 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -623,6 +623,9 @@ static const struct vfio_device_ops mlx5vf_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static int mlx5vf_pci_probe(struct pci_dev *pdev, diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 1d4919edfbde..29091ee2e984 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -138,6 +138,9 @@ static const struct vfio_device_ops vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index eaea63e5294c..5a046098d0bd 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -117,6 +117,9 @@ static const struct vfio_device_ops vfio_amba_ops = { .read = vfio_platform_read, .write = vfio_platform_write, .mmap = vfio_platform_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static const struct amba_id pl330_ids[] = { diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c index 82cedcebfd90..b87c3b708783 100644 --- a/drivers/vfio/platform/vfio_platform.c +++ b/drivers/vfio/platform/vfio_platform.c @@ -106,6 +106,9 @@ static const struct vfio_device_ops vfio_platform_ops = { .read = vfio_platform_read, .write = vfio_platform_write, .mmap = vfio_platform_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static struct platform_driver vfio_platform_driver = { diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index bcad54bbab08..ce5fe3fc493b 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -10,6 +10,7 @@ #include <linux/cdev.h> #include <linux/module.h> +struct iommufd_ctx; struct iommu_group; struct vfio_device; struct vfio_container; @@ -54,14 +55,18 @@ struct vfio_group { struct list_head device_list; struct mutex device_lock; struct list_head vfio_next; +#if IS_ENABLED(CONFIG_VFIO_CONTAINER) struct list_head container_next; +#endif enum vfio_group_type type; struct mutex group_lock; struct kvm *kvm; struct file *opened_file; struct blocking_notifier_head notifier; + struct iommufd_ctx *iommufd; }; +#if IS_ENABLED(CONFIG_VFIO_CONTAINER) /* events for the backend driver notify callback */ enum vfio_iommu_notify_type { VFIO_IOMMU_CONTAINER_CLOSE = 0, @@ -109,20 +114,101 @@ struct vfio_iommu_driver { int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops); -bool vfio_assert_device_open(struct vfio_device *device); - struct vfio_container *vfio_container_from_file(struct file *filep); -int vfio_device_assign_container(struct vfio_device *device); -void vfio_device_unassign_container(struct vfio_device *device); +int vfio_group_use_container(struct vfio_group *group); +void vfio_group_unuse_container(struct vfio_group *group); int vfio_container_attach_group(struct vfio_container *container, struct vfio_group *group); void vfio_group_detach_container(struct vfio_group *group); void vfio_device_container_register(struct vfio_device *device); void vfio_device_container_unregister(struct vfio_device *device); -long vfio_container_ioctl_check_extension(struct vfio_container *container, - unsigned long arg); +int vfio_container_pin_pages(struct vfio_container *container, + struct iommu_group *iommu_group, dma_addr_t iova, + int npage, int prot, struct page **pages); +void vfio_container_unpin_pages(struct vfio_container *container, + dma_addr_t iova, int npage); +int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova, + void *data, size_t len, bool write); + int __init vfio_container_init(void); void vfio_container_cleanup(void); +#else +static inline struct vfio_container * +vfio_container_from_file(struct file *filep) +{ + return NULL; +} + +static inline int vfio_group_use_container(struct vfio_group *group) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_group_unuse_container(struct vfio_group *group) +{ +} + +static inline int vfio_container_attach_group(struct vfio_container *container, + struct vfio_group *group) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_group_detach_container(struct vfio_group *group) +{ +} + +static inline void vfio_device_container_register(struct vfio_device *device) +{ +} + +static inline void vfio_device_container_unregister(struct vfio_device *device) +{ +} + +static inline int vfio_container_pin_pages(struct vfio_container *container, + struct iommu_group *iommu_group, + dma_addr_t iova, int npage, int prot, + struct page **pages) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_container_unpin_pages(struct vfio_container *container, + dma_addr_t iova, int npage) +{ +} + +static inline int vfio_container_dma_rw(struct vfio_container *container, + dma_addr_t iova, void *data, size_t len, + bool write) +{ + return -EOPNOTSUPP; +} + +static inline int vfio_container_init(void) +{ + return 0; +} +static inline void vfio_container_cleanup(void) +{ +} +#endif + +#if IS_ENABLED(CONFIG_IOMMUFD) +int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx); +void vfio_iommufd_unbind(struct vfio_device *device); +#else +static inline int vfio_iommufd_bind(struct vfio_device *device, + struct iommufd_ctx *ictx) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_iommufd_unbind(struct vfio_device *device) +{ +} +#endif #ifdef CONFIG_VFIO_NOIOMMU extern bool vfio_noiommu __read_mostly; diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 2d168793d4e1..ce6e6a560c70 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -35,6 +35,7 @@ #include <linux/pm_runtime.h> #include <linux/interval_tree.h> #include <linux/iova_bitmap.h> +#include <linux/iommufd.h> #include "vfio.h" #define DRIVER_VERSION "0.3" @@ -524,6 +525,11 @@ static int __vfio_register_dev(struct vfio_device *device, if (IS_ERR(group)) return PTR_ERR(group); + if (WARN_ON(device->ops->bind_iommufd && + (!device->ops->unbind_iommufd || + !device->ops->attach_ioas))) + return -EINVAL; + /* * If the driver doesn't specify a set then the device is added to a * singleton set just for itself. @@ -662,6 +668,18 @@ EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); /* * VFIO Group fd, /dev/vfio/$GROUP */ +static bool vfio_group_has_iommu(struct vfio_group *group) +{ + lockdep_assert_held(&group->group_lock); + /* + * There can only be users if there is a container, and if there is a + * container there must be users. + */ + WARN_ON(!group->container != !group->container_users); + + return group->container || group->iommufd; +} + /* * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or * if there was no container to unset. Since the ioctl is called on @@ -673,15 +691,21 @@ static int vfio_group_ioctl_unset_container(struct vfio_group *group) int ret = 0; mutex_lock(&group->group_lock); - if (!group->container) { + if (!vfio_group_has_iommu(group)) { ret = -EINVAL; goto out_unlock; } - if (group->container_users != 1) { - ret = -EBUSY; - goto out_unlock; + if (group->container) { + if (group->container_users != 1) { + ret = -EBUSY; + goto out_unlock; + } + vfio_group_detach_container(group); + } + if (group->iommufd) { + iommufd_ctx_put(group->iommufd); + group->iommufd = NULL; } - vfio_group_detach_container(group); out_unlock: mutex_unlock(&group->group_lock); @@ -692,6 +716,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, int __user *arg) { struct vfio_container *container; + struct iommufd_ctx *iommufd; struct fd f; int ret; int fd; @@ -704,7 +729,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, return -EBADF; mutex_lock(&group->group_lock); - if (group->container || WARN_ON(group->container_users)) { + if (vfio_group_has_iommu(group)) { ret = -EINVAL; goto out_unlock; } @@ -714,12 +739,28 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, } container = vfio_container_from_file(f.file); - ret = -EINVAL; if (container) { ret = vfio_container_attach_group(container, group); goto out_unlock; } + iommufd = iommufd_ctx_from_file(f.file); + if (!IS_ERR(iommufd)) { + u32 ioas_id; + + ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); + if (ret) { + iommufd_ctx_put(group->iommufd); + goto out_unlock; + } + + group->iommufd = iommufd; + goto out_unlock; + } + + /* The FD passed is not recognized. */ + ret = -EBADFD; + out_unlock: mutex_unlock(&group->group_lock); fdput(f); @@ -729,45 +770,93 @@ out_unlock: static const struct file_operations vfio_device_fops; /* true if the vfio_device has open_device() called but not close_device() */ -bool vfio_assert_device_open(struct vfio_device *device) +static bool vfio_assert_device_open(struct vfio_device *device) { return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); } -static struct file *vfio_device_open(struct vfio_device *device) +static int vfio_device_first_open(struct vfio_device *device) { - struct file *filep; int ret; + lockdep_assert_held(&device->dev_set->lock); + + if (!try_module_get(device->dev->driver->owner)) + return -ENODEV; + + /* + * Here we pass the KVM pointer with the group under the lock. If the + * device driver will use it, it must obtain a reference and release it + * during close_device. + */ mutex_lock(&device->group->group_lock); - ret = vfio_device_assign_container(device); - mutex_unlock(&device->group->group_lock); - if (ret) - return ERR_PTR(ret); + if (!vfio_group_has_iommu(device->group)) { + ret = -EINVAL; + goto err_module_put; + } - if (!try_module_get(device->dev->driver->owner)) { - ret = -ENODEV; - goto err_unassign_container; + if (device->group->container) { + ret = vfio_group_use_container(device->group); + if (ret) + goto err_module_put; + } else if (device->group->iommufd) { + ret = vfio_iommufd_bind(device, device->group->iommufd); + if (ret) + goto err_module_put; } + device->kvm = device->group->kvm; + if (device->ops->open_device) { + ret = device->ops->open_device(device); + if (ret) + goto err_container; + } + if (device->group->container) + vfio_device_container_register(device); + mutex_unlock(&device->group->group_lock); + return 0; + +err_container: + device->kvm = NULL; + if (device->group->container) + vfio_group_unuse_container(device->group); + else if (device->group->iommufd) + vfio_iommufd_unbind(device); +err_module_put: + mutex_unlock(&device->group->group_lock); + module_put(device->dev->driver->owner); + return ret; +} + +static void vfio_device_last_close(struct vfio_device *device) +{ + lockdep_assert_held(&device->dev_set->lock); + + mutex_lock(&device->group->group_lock); + if (device->group->container) + vfio_device_container_unregister(device); + if (device->ops->close_device) + device->ops->close_device(device); + device->kvm = NULL; + if (device->group->container) + vfio_group_unuse_container(device->group); + else if (device->group->iommufd) + vfio_iommufd_unbind(device); + mutex_unlock(&device->group->group_lock); + module_put(device->dev->driver->owner); +} + +static struct file *vfio_device_open(struct vfio_device *device) +{ + struct file *filep; + int ret; + mutex_lock(&device->dev_set->lock); device->open_count++; if (device->open_count == 1) { - /* - * Here we pass the KVM pointer with the group under the read - * lock. If the device driver will use it, it must obtain a - * reference and release it during close_device. - */ - mutex_lock(&device->group->group_lock); - device->kvm = device->group->kvm; - - if (device->ops->open_device) { - ret = device->ops->open_device(device); - if (ret) - goto err_undo_count; - } - vfio_device_container_register(device); - mutex_unlock(&device->group->group_lock); + ret = vfio_device_first_open(device); + if (ret) + goto err_unlock; } mutex_unlock(&device->dev_set->lock); @@ -800,21 +889,11 @@ static struct file *vfio_device_open(struct vfio_device *device) err_close_device: mutex_lock(&device->dev_set->lock); - mutex_lock(&device->group->group_lock); - if (device->open_count == 1 && device->ops->close_device) { - device->ops->close_device(device); - - vfio_device_container_unregister(device); - } -err_undo_count: - mutex_unlock(&device->group->group_lock); + if (device->open_count == 1) + vfio_device_last_close(device); +err_unlock: device->open_count--; - if (device->open_count == 0 && device->kvm) - device->kvm = NULL; mutex_unlock(&device->dev_set->lock); - module_put(device->dev->driver->owner); -err_unassign_container: - vfio_device_unassign_container(device); return ERR_PTR(ret); } @@ -878,7 +957,14 @@ static int vfio_group_ioctl_get_status(struct vfio_group *group, return -ENODEV; } - if (group->container) + /* + * With the container FD the iommu_group_claim_dma_owner() is done + * during SET_CONTAINER but for IOMMFD this is done during + * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd + * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due + * to viability. + */ + if (vfio_group_has_iommu(group)) status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | VFIO_GROUP_FLAGS_VIABLE; else if (!iommu_group_dma_owner_claimed(group->iommu_group)) @@ -961,6 +1047,10 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep) WARN_ON(group->notifier.head); if (group->container) vfio_group_detach_container(group); + if (group->iommufd) { + iommufd_ctx_put(group->iommufd); + group->iommufd = NULL; + } group->opened_file = NULL; mutex_unlock(&group->group_lock); return 0; @@ -1016,21 +1106,11 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) mutex_lock(&device->dev_set->lock); vfio_assert_device_open(device); - mutex_lock(&device->group->group_lock); - if (device->open_count == 1 && device->ops->close_device) - device->ops->close_device(device); - - vfio_device_container_unregister(device); - mutex_unlock(&device->group->group_lock); + if (device->open_count == 1) + vfio_device_last_close(device); device->open_count--; - if (device->open_count == 0) - device->kvm = NULL; mutex_unlock(&device->dev_set->lock); - module_put(device->dev->driver->owner); - - vfio_device_unassign_container(device); - vfio_device_put_registration(device); return 0; @@ -1613,24 +1693,27 @@ EXPORT_SYMBOL_GPL(vfio_file_is_group); bool vfio_file_enforced_coherent(struct file *file) { struct vfio_group *group = file->private_data; - bool ret; + struct vfio_device *device; + bool ret = true; if (!vfio_file_is_group(file)) return true; - mutex_lock(&group->group_lock); - if (group->container) { - ret = vfio_container_ioctl_check_extension(group->container, - VFIO_DMA_CC_IOMMU); - } else { - /* - * Since the coherency state is determined only once a container - * is attached the user must do so before they can prove they - * have permission. - */ - ret = true; + /* + * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then + * any domain later attached to it will also not support it. If the cap + * is set then the iommu_domain eventually attached to the device/group + * must use a domain with enforce_cache_coherency(). + */ + mutex_lock(&group->device_lock); + list_for_each_entry(device, &group->device_list, group_next) { + if (!device_iommu_capable(device->dev, + IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) { + ret = false; + break; + } } - mutex_unlock(&group->group_lock); + mutex_unlock(&group->device_lock); return ret; } EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); @@ -1794,6 +1877,126 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); /* + * Pin contiguous user pages and return their associated host pages for local + * domain only. + * @device [in] : device + * @iova [in] : starting IOVA of user pages to be pinned. + * @npage [in] : count of pages to be pinned. This count should not + * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. + * @prot [in] : protection flags + * @pages[out] : array of host pages + * Return error or number of pages pinned. + * + * A driver may only call this function if the vfio_device was created + * by vfio_register_emulated_iommu_dev() due to vfio_container_pin_pages(). + */ +int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, + int npage, int prot, struct page **pages) +{ + /* group->container cannot change while a vfio device is open */ + if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device))) + return -EINVAL; + if (device->group->container) + return vfio_container_pin_pages(device->group->container, + device->group->iommu_group, + iova, npage, prot, pages); + if (device->iommufd_access) { + int ret; + + if (iova > ULONG_MAX) + return -EINVAL; + /* + * VFIO ignores the sub page offset, npages is from the start of + * a PAGE_SIZE chunk of IOVA. The caller is expected to recover + * the sub page offset by doing: + * pages[0] + (iova % PAGE_SIZE) + */ + ret = iommufd_access_pin_pages( + device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE), + npage * PAGE_SIZE, pages, + (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0); + if (ret) + return ret; + return npage; + } + return -EINVAL; +} +EXPORT_SYMBOL(vfio_pin_pages); + +/* + * Unpin contiguous host pages for local domain only. + * @device [in] : device + * @iova [in] : starting address of user pages to be unpinned. + * @npage [in] : count of pages to be unpinned. This count should not + * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. + */ +void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) +{ + if (WARN_ON(!vfio_assert_device_open(device))) + return; + + if (device->group->container) { + vfio_container_unpin_pages(device->group->container, iova, + npage); + return; + } + if (device->iommufd_access) { + if (WARN_ON(iova > ULONG_MAX)) + return; + iommufd_access_unpin_pages(device->iommufd_access, + ALIGN_DOWN(iova, PAGE_SIZE), + npage * PAGE_SIZE); + return; + } +} +EXPORT_SYMBOL(vfio_unpin_pages); + +/* + * This interface allows the CPUs to perform some sort of virtual DMA on + * behalf of the device. + * + * CPUs read/write from/into a range of IOVAs pointing to user space memory + * into/from a kernel buffer. + * + * As the read/write of user space memory is conducted via the CPUs and is + * not a real device DMA, it is not necessary to pin the user space memory. + * + * @device [in] : VFIO device + * @iova [in] : base IOVA of a user space buffer + * @data [in] : pointer to kernel buffer + * @len [in] : kernel buffer length + * @write : indicate read or write + * Return error code on failure or 0 on success. + */ +int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, + size_t len, bool write) +{ + if (!data || len <= 0 || !vfio_assert_device_open(device)) + return -EINVAL; + + if (device->group->container) + return vfio_container_dma_rw(device->group->container, iova, + data, len, write); + + if (device->iommufd_access) { + unsigned int flags = 0; + + if (iova > ULONG_MAX) + return -EINVAL; + + /* VFIO historically tries to auto-detect a kthread */ + if (!current->mm) + flags |= IOMMUFD_ACCESS_RW_KTHREAD; + if (write) + flags |= IOMMUFD_ACCESS_RW_WRITE; + return iommufd_access_rw(device->iommufd_access, iova, data, + len, flags); + } + return -EINVAL; +} +EXPORT_SYMBOL(vfio_dma_rw); + +/* * Module/class support */ static char *vfio_devnode(struct device *dev, umode_t *mode) @@ -1870,6 +2073,4 @@ MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); -MODULE_ALIAS_MISCDEV(VFIO_MINOR); -MODULE_ALIAS("devname:vfio/vfio"); MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); |