diff options
author | Shuo Liu <shuo.a.liu@intel.com> | 2021-02-07 11:10:37 +0800 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2021-02-09 10:58:19 +0100 |
commit | d8ad515156b66e7e79a6e4c814f997ee54eb47c7 (patch) | |
tree | 858cf13d5d32b929b3f9136ece5f729a229395bf /drivers | |
parent | 5a0c9f176f232513d4114a518cbff835d232f500 (diff) |
virt: acrn: Introduce ioeventfd
ioeventfd is a mechanism to register PIO/MMIO regions to trigger an
eventfd signal when written to by a User VM. ACRN userspace can register
any arbitrary I/O address with a corresponding eventfd and then pass the
eventfd to a specific end-point of interest for handling.
Vhost is a kernel-level virtio server which uses eventfd for signalling.
To support vhost on ACRN, ioeventfd is introduced in HSM.
A new I/O client dedicated to ioeventfd is associated with a User VM
during VM creation. HSM provides ioctls to associate an I/O region with
a eventfd. The I/O client signals a eventfd once its corresponding I/O
region is matched with an I/O request.
Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Yu Wang <yu1.wang@intel.com>
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Zhi Wang <zhi.a.wang@intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Shuo Liu <shuo.a.liu@intel.com>
Link: https://lore.kernel.org/r/20210207031040.49576-16-shuo.a.liu@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/virt/acrn/Kconfig | 1 | ||||
-rw-r--r-- | drivers/virt/acrn/Makefile | 2 | ||||
-rw-r--r-- | drivers/virt/acrn/acrn_drv.h | 10 | ||||
-rw-r--r-- | drivers/virt/acrn/hsm.c | 11 | ||||
-rw-r--r-- | drivers/virt/acrn/ioeventfd.c | 273 | ||||
-rw-r--r-- | drivers/virt/acrn/vm.c | 2 |
6 files changed, 298 insertions, 1 deletions
diff --git a/drivers/virt/acrn/Kconfig b/drivers/virt/acrn/Kconfig index 36c80378c30c..3e1a61c9d8d8 100644 --- a/drivers/virt/acrn/Kconfig +++ b/drivers/virt/acrn/Kconfig @@ -2,6 +2,7 @@ config ACRN_HSM tristate "ACRN Hypervisor Service Module" depends on ACRN_GUEST + select EVENTFD help ACRN Hypervisor Service Module (HSM) is a kernel module which communicates with ACRN userspace through ioctls and talks to diff --git a/drivers/virt/acrn/Makefile b/drivers/virt/acrn/Makefile index 21721cbf6a80..755b583b32ca 100644 --- a/drivers/virt/acrn/Makefile +++ b/drivers/virt/acrn/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ACRN_HSM) := acrn.o -acrn-y := hsm.o vm.o mm.o ioreq.o +acrn-y := hsm.o vm.o mm.o ioreq.o ioeventfd.o diff --git a/drivers/virt/acrn/acrn_drv.h b/drivers/virt/acrn/acrn_drv.h index 8a7d7721f505..e3f8190bd972 100644 --- a/drivers/virt/acrn/acrn_drv.h +++ b/drivers/virt/acrn/acrn_drv.h @@ -156,6 +156,9 @@ extern rwlock_t acrn_vm_list_lock; * @ioreq_page: The page of the I/O request shared buffer * @pci_conf_addr: Address of a PCI configuration access emulation * @monitor_page: Page of interrupt statistics of User VM + * @ioeventfds_lock: Lock to protect ioeventfds list + * @ioeventfds: List to link all hsm_ioeventfd + * @ioeventfd_client: I/O client for ioeventfds of the VM */ struct acrn_vm { struct list_head list; @@ -172,6 +175,9 @@ struct acrn_vm { struct page *ioreq_page; u32 pci_conf_addr; struct page *monitor_page; + struct mutex ioeventfds_lock; + struct list_head ioeventfds; + struct acrn_ioreq_client *ioeventfd_client; }; struct acrn_vm *acrn_vm_create(struct acrn_vm *vm, @@ -204,4 +210,8 @@ void acrn_ioreq_range_del(struct acrn_ioreq_client *client, int acrn_msi_inject(struct acrn_vm *vm, u64 msi_addr, u64 msi_data); +int acrn_ioeventfd_init(struct acrn_vm *vm); +int acrn_ioeventfd_config(struct acrn_vm *vm, struct acrn_ioeventfd *args); +void acrn_ioeventfd_deinit(struct acrn_vm *vm); + #endif /* __ACRN_HSM_DRV_H */ diff --git a/drivers/virt/acrn/hsm.c b/drivers/virt/acrn/hsm.c index b7f2deddc3e7..0b4e88b0b6bc 100644 --- a/drivers/virt/acrn/hsm.c +++ b/drivers/virt/acrn/hsm.c @@ -111,6 +111,7 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd, struct acrn_vcpu_regs *cpu_regs; struct acrn_ioreq_notify notify; struct acrn_ptdev_irq *irq_info; + struct acrn_ioeventfd ioeventfd; struct acrn_vm_memmap memmap; struct acrn_msi_entry *msi; struct acrn_pcidev *pcidev; @@ -336,6 +337,16 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd, ret = pmcmd_ioctl(cstate_cmd, (void __user *)ioctl_param); break; + case ACRN_IOCTL_IOEVENTFD: + if (copy_from_user(&ioeventfd, (void __user *)ioctl_param, + sizeof(ioeventfd))) + return -EFAULT; + + if (ioeventfd.reserved != 0) + return -EINVAL; + + ret = acrn_ioeventfd_config(vm, &ioeventfd); + break; default: dev_dbg(acrn_dev.this_device, "Unknown IOCTL 0x%x!\n", cmd); ret = -ENOTTY; diff --git a/drivers/virt/acrn/ioeventfd.c b/drivers/virt/acrn/ioeventfd.c new file mode 100644 index 000000000000..ac4037e9f947 --- /dev/null +++ b/drivers/virt/acrn/ioeventfd.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ACRN HSM eventfd - use eventfd objects to signal expected I/O requests + * + * Copyright (C) 2020 Intel Corporation. All rights reserved. + * + * Authors: + * Shuo Liu <shuo.a.liu@intel.com> + * Yakui Zhao <yakui.zhao@intel.com> + */ + +#include <linux/eventfd.h> +#include <linux/slab.h> + +#include "acrn_drv.h" + +/** + * struct hsm_ioeventfd - Properties of HSM ioeventfd + * @list: Entry within &acrn_vm.ioeventfds of ioeventfds of a VM + * @eventfd: Eventfd of the HSM ioeventfd + * @addr: Address of I/O range + * @data: Data for matching + * @length: Length of I/O range + * @type: Type of I/O range (ACRN_IOREQ_TYPE_MMIO/ACRN_IOREQ_TYPE_PORTIO) + * @wildcard: Data matching or not + */ +struct hsm_ioeventfd { + struct list_head list; + struct eventfd_ctx *eventfd; + u64 addr; + u64 data; + int length; + int type; + bool wildcard; +}; + +static inline int ioreq_type_from_flags(int flags) +{ + return flags & ACRN_IOEVENTFD_FLAG_PIO ? + ACRN_IOREQ_TYPE_PORTIO : ACRN_IOREQ_TYPE_MMIO; +} + +static void acrn_ioeventfd_shutdown(struct acrn_vm *vm, struct hsm_ioeventfd *p) +{ + lockdep_assert_held(&vm->ioeventfds_lock); + + eventfd_ctx_put(p->eventfd); + list_del(&p->list); + kfree(p); +} + +static bool hsm_ioeventfd_is_conflict(struct acrn_vm *vm, + struct hsm_ioeventfd *ioeventfd) +{ + struct hsm_ioeventfd *p; + + lockdep_assert_held(&vm->ioeventfds_lock); + + /* Either one is wildcard, the data matching will be skipped. */ + list_for_each_entry(p, &vm->ioeventfds, list) + if (p->eventfd == ioeventfd->eventfd && + p->addr == ioeventfd->addr && + p->type == ioeventfd->type && + (p->wildcard || ioeventfd->wildcard || + p->data == ioeventfd->data)) + return true; + + return false; +} + +/* + * Assign an eventfd to a VM and create a HSM ioeventfd associated with the + * eventfd. The properties of the HSM ioeventfd are built from a &struct + * acrn_ioeventfd. + */ +static int acrn_ioeventfd_assign(struct acrn_vm *vm, + struct acrn_ioeventfd *args) +{ + struct eventfd_ctx *eventfd; + struct hsm_ioeventfd *p; + int ret; + + /* Check for range overflow */ + if (args->addr + args->len < args->addr) + return -EINVAL; + + /* + * Currently, acrn_ioeventfd is used to support vhost. 1,2,4,8 width + * accesses can cover vhost's requirements. + */ + if (!(args->len == 1 || args->len == 2 || + args->len == 4 || args->len == 8)) + return -EINVAL; + + eventfd = eventfd_ctx_fdget(args->fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + ret = -ENOMEM; + goto fail; + } + + INIT_LIST_HEAD(&p->list); + p->addr = args->addr; + p->length = args->len; + p->eventfd = eventfd; + p->type = ioreq_type_from_flags(args->flags); + + /* + * ACRN_IOEVENTFD_FLAG_DATAMATCH flag is set in virtio 1.0 support, the + * writing of notification register of each virtqueue may trigger the + * notification. There is no data matching requirement. + */ + if (args->flags & ACRN_IOEVENTFD_FLAG_DATAMATCH) + p->data = args->data; + else + p->wildcard = true; + + mutex_lock(&vm->ioeventfds_lock); + + if (hsm_ioeventfd_is_conflict(vm, p)) { + ret = -EEXIST; + goto unlock_fail; + } + + /* register the I/O range into ioreq client */ + ret = acrn_ioreq_range_add(vm->ioeventfd_client, p->type, + p->addr, p->addr + p->length - 1); + if (ret < 0) + goto unlock_fail; + + list_add_tail(&p->list, &vm->ioeventfds); + mutex_unlock(&vm->ioeventfds_lock); + + return 0; + +unlock_fail: + mutex_unlock(&vm->ioeventfds_lock); + kfree(p); +fail: + eventfd_ctx_put(eventfd); + return ret; +} + +static int acrn_ioeventfd_deassign(struct acrn_vm *vm, + struct acrn_ioeventfd *args) +{ + struct hsm_ioeventfd *p; + struct eventfd_ctx *eventfd; + + eventfd = eventfd_ctx_fdget(args->fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + mutex_lock(&vm->ioeventfds_lock); + list_for_each_entry(p, &vm->ioeventfds, list) { + if (p->eventfd != eventfd) + continue; + + acrn_ioreq_range_del(vm->ioeventfd_client, p->type, + p->addr, p->addr + p->length - 1); + acrn_ioeventfd_shutdown(vm, p); + break; + } + mutex_unlock(&vm->ioeventfds_lock); + + eventfd_ctx_put(eventfd); + return 0; +} + +static struct hsm_ioeventfd *hsm_ioeventfd_match(struct acrn_vm *vm, u64 addr, + u64 data, int len, int type) +{ + struct hsm_ioeventfd *p = NULL; + + lockdep_assert_held(&vm->ioeventfds_lock); + + list_for_each_entry(p, &vm->ioeventfds, list) { + if (p->type == type && p->addr == addr && p->length >= len && + (p->wildcard || p->data == data)) + return p; + } + + return NULL; +} + +static int acrn_ioeventfd_handler(struct acrn_ioreq_client *client, + struct acrn_io_request *req) +{ + struct hsm_ioeventfd *p; + u64 addr, val; + int size; + + if (req->type == ACRN_IOREQ_TYPE_MMIO) { + /* + * I/O requests are dispatched by range check only, so a + * acrn_ioreq_client need process both READ and WRITE accesses + * of same range. READ accesses are safe to be ignored here + * because virtio PCI devices write the notify registers for + * notification. + */ + if (req->reqs.mmio_request.direction == ACRN_IOREQ_DIR_READ) { + /* reading does nothing and return 0 */ + req->reqs.mmio_request.value = 0; + return 0; + } + addr = req->reqs.mmio_request.address; + size = req->reqs.mmio_request.size; + val = req->reqs.mmio_request.value; + } else { + if (req->reqs.pio_request.direction == ACRN_IOREQ_DIR_READ) { + /* reading does nothing and return 0 */ + req->reqs.pio_request.value = 0; + return 0; + } + addr = req->reqs.pio_request.address; + size = req->reqs.pio_request.size; + val = req->reqs.pio_request.value; + } + + mutex_lock(&client->vm->ioeventfds_lock); + p = hsm_ioeventfd_match(client->vm, addr, val, size, req->type); + if (p) + eventfd_signal(p->eventfd, 1); + mutex_unlock(&client->vm->ioeventfds_lock); + + return 0; +} + +int acrn_ioeventfd_config(struct acrn_vm *vm, struct acrn_ioeventfd *args) +{ + int ret; + + if (args->flags & ACRN_IOEVENTFD_FLAG_DEASSIGN) + ret = acrn_ioeventfd_deassign(vm, args); + else + ret = acrn_ioeventfd_assign(vm, args); + + return ret; +} + +int acrn_ioeventfd_init(struct acrn_vm *vm) +{ + char name[ACRN_NAME_LEN]; + + mutex_init(&vm->ioeventfds_lock); + INIT_LIST_HEAD(&vm->ioeventfds); + snprintf(name, sizeof(name), "ioeventfd-%u", vm->vmid); + vm->ioeventfd_client = acrn_ioreq_client_create(vm, + acrn_ioeventfd_handler, + NULL, false, name); + if (!vm->ioeventfd_client) { + dev_err(acrn_dev.this_device, "Failed to create ioeventfd ioreq client!\n"); + return -EINVAL; + } + + dev_dbg(acrn_dev.this_device, "VM %u ioeventfd init.\n", vm->vmid); + return 0; +} + +void acrn_ioeventfd_deinit(struct acrn_vm *vm) +{ + struct hsm_ioeventfd *p, *next; + + dev_dbg(acrn_dev.this_device, "VM %u ioeventfd deinit.\n", vm->vmid); + acrn_ioreq_client_destroy(vm->ioeventfd_client); + mutex_lock(&vm->ioeventfds_lock); + list_for_each_entry_safe(p, next, &vm->ioeventfds, list) + acrn_ioeventfd_shutdown(vm, p); + mutex_unlock(&vm->ioeventfds_lock); +} diff --git a/drivers/virt/acrn/vm.c b/drivers/virt/acrn/vm.c index db597f27690a..2b322d956a8c 100644 --- a/drivers/virt/acrn/vm.c +++ b/drivers/virt/acrn/vm.c @@ -50,6 +50,7 @@ struct acrn_vm *acrn_vm_create(struct acrn_vm *vm, list_add(&vm->list, &acrn_vm_list); write_unlock_bh(&acrn_vm_list_lock); + acrn_ioeventfd_init(vm); dev_dbg(acrn_dev.this_device, "VM %u created.\n", vm->vmid); return vm; } @@ -67,6 +68,7 @@ int acrn_vm_destroy(struct acrn_vm *vm) list_del_init(&vm->list); write_unlock_bh(&acrn_vm_list_lock); + acrn_ioeventfd_deinit(vm); acrn_ioreq_deinit(vm); if (vm->monitor_page) { put_page(vm->monitor_page); |