diff options
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/Kconfig | 4 | ||||
-rw-r--r-- | drivers/vhost/net.c | 31 | ||||
-rw-r--r-- | drivers/vhost/scsi.c | 17 | ||||
-rw-r--r-- | drivers/vhost/test.c | 14 | ||||
-rw-r--r-- | drivers/vhost/vdpa.c | 121 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 107 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 35 | ||||
-rw-r--r-- | drivers/vhost/vringh.c | 11 | ||||
-rw-r--r-- | drivers/vhost/vsock.c | 37 |
9 files changed, 285 insertions, 92 deletions
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 362b832f5338..2c75d164b827 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -3,6 +3,8 @@ config VHOST_IOTLB tristate help Generic IOTLB implementation for vhost and vringh. + This option is selected by any driver which needs to support + an IOMMU in software. config VHOST_RING tristate @@ -63,7 +65,7 @@ config VHOST_VDPA tristate "Vhost driver for vDPA-based backend" depends on EVENTFD select VHOST - select VDPA + depends on VDPA help This kernel module can be loaded in host kernel to accelerate guest virtio devices with the vDPA-based backends. diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 87469d67ede8..e992decfec53 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -424,7 +424,7 @@ static void vhost_net_disable_vq(struct vhost_net *n, struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); struct vhost_poll *poll = n->poll + (nvq - n->vqs); - if (!vq->private_data) + if (!vhost_vq_get_backend(vq)) return; vhost_poll_stop(poll); } @@ -437,7 +437,7 @@ static int vhost_net_enable_vq(struct vhost_net *n, struct vhost_poll *poll = n->poll + (nvq - n->vqs); struct socket *sock; - sock = vq->private_data; + sock = vhost_vq_get_backend(vq); if (!sock) return 0; @@ -524,7 +524,7 @@ static void vhost_net_busy_poll(struct vhost_net *net, return; vhost_disable_notify(&net->dev, vq); - sock = rvq->private_data; + sock = vhost_vq_get_backend(rvq); busyloop_timeout = poll_rx ? rvq->busyloop_timeout: tvq->busyloop_timeout; @@ -570,8 +570,10 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net, if (r == tvq->num && tvq->busyloop_timeout) { /* Flush batched packets first */ - if (!vhost_sock_zcopy(tvq->private_data)) - vhost_tx_batch(net, tnvq, tvq->private_data, msghdr); + if (!vhost_sock_zcopy(vhost_vq_get_backend(tvq))) + vhost_tx_batch(net, tnvq, + vhost_vq_get_backend(tvq), + msghdr); vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false); @@ -685,7 +687,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, struct vhost_virtqueue *vq = &nvq->vq; struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); - struct socket *sock = vq->private_data; + struct socket *sock = vhost_vq_get_backend(vq); struct page_frag *alloc_frag = &net->page_frag; struct virtio_net_hdr *gso; struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp]; @@ -745,6 +747,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, xdp->data = buf + pad; xdp->data_end = xdp->data + len; hdr->buflen = buflen; + xdp->frame_sz = buflen; --net->refcnt_bias; alloc_frag->offset += buflen; @@ -952,7 +955,7 @@ static void handle_tx(struct vhost_net *net) struct socket *sock; mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX); - sock = vq->private_data; + sock = vhost_vq_get_backend(vq); if (!sock) goto out; @@ -1121,7 +1124,7 @@ static void handle_rx(struct vhost_net *net) int recv_pkts = 0; mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX); - sock = vq->private_data; + sock = vhost_vq_get_backend(vq); if (!sock) goto out; @@ -1324,7 +1327,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, UIO_MAXIOV + VHOST_NET_BATCH, - VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true, NULL); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); @@ -1345,9 +1348,9 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, container_of(vq, struct vhost_net_virtqueue, vq); mutex_lock(&vq->mutex); - sock = vq->private_data; + sock = vhost_vq_get_backend(vq); vhost_net_disable_vq(n, vq); - vq->private_data = NULL; + vhost_vq_set_backend(vq, NULL); vhost_net_buf_unproduce(nvq); nvq->rx_ring = NULL; mutex_unlock(&vq->mutex); @@ -1521,7 +1524,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } /* start polling new socket */ - oldsock = vq->private_data; + oldsock = vhost_vq_get_backend(vq); if (sock != oldsock) { ubufs = vhost_net_ubuf_alloc(vq, sock && vhost_sock_zcopy(sock)); @@ -1531,7 +1534,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } vhost_net_disable_vq(n, vq); - vq->private_data = sock; + vhost_vq_set_backend(vq, sock); vhost_net_buf_unproduce(nvq); r = vhost_vq_init_access(vq); if (r) @@ -1568,7 +1571,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) return 0; err_used: - vq->private_data = oldsock; + vhost_vq_set_backend(vq, oldsock); vhost_net_enable_vq(n, vq); if (ubufs) vhost_net_ubuf_put_wait_and_free(ubufs); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 7653667a8cdc..6fb4d7ecfa19 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -452,7 +452,7 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct vhost_scsi_evt *evt) unsigned out, in; int head, ret; - if (!vq->private_data) { + if (!vhost_vq_get_backend(vq)) { vs->vs_events_missed = true; return; } @@ -892,7 +892,7 @@ vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc, } else { struct vhost_scsi_tpg **vs_tpg, *tpg; - vs_tpg = vq->private_data; /* validated at handler entry */ + vs_tpg = vhost_vq_get_backend(vq); /* validated at handler entry */ tpg = READ_ONCE(vs_tpg[*vc->target]); if (unlikely(!tpg)) { @@ -929,7 +929,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) * We can handle the vq only after the endpoint is setup by calling the * VHOST_SCSI_SET_ENDPOINT ioctl. */ - vs_tpg = vq->private_data; + vs_tpg = vhost_vq_get_backend(vq); if (!vs_tpg) goto out; @@ -1184,7 +1184,7 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) * We can handle the vq only after the endpoint is setup by calling the * VHOST_SCSI_SET_ENDPOINT ioctl. */ - if (!vq->private_data) + if (!vhost_vq_get_backend(vq)) goto out; memset(&vc, 0, sizeof(vc)); @@ -1322,7 +1322,7 @@ static void vhost_scsi_evt_handle_kick(struct vhost_work *work) struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev); mutex_lock(&vq->mutex); - if (!vq->private_data) + if (!vhost_vq_get_backend(vq)) goto out; if (vs->vs_events_missed) @@ -1460,7 +1460,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); - vq->private_data = vs_tpg; + vhost_vq_set_backend(vq, vs_tpg); vhost_vq_init_access(vq); mutex_unlock(&vq->mutex); } @@ -1547,7 +1547,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); - vq->private_data = NULL; + vhost_vq_set_backend(vq, NULL); mutex_unlock(&vq->mutex); } } @@ -1628,7 +1628,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV, - VHOST_SCSI_WEIGHT, 0, NULL); + VHOST_SCSI_WEIGHT, 0, true, NULL); vhost_scsi_init_inflight(vs, NULL); @@ -2280,6 +2280,7 @@ static struct configfs_attribute *vhost_scsi_wwn_attrs[] = { static const struct target_core_fabric_ops vhost_scsi_ops = { .module = THIS_MODULE, .fabric_name = "vhost", + .max_data_sg_nents = VHOST_SCSI_PREALLOC_SGLS, .tpg_get_wwn = vhost_scsi_get_fabric_wwn, .tpg_get_tag = vhost_scsi_get_tpgt, .tpg_check_demo_mode = vhost_scsi_check_true, diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index e37c92d4d7ad..0466921f4772 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -49,7 +49,7 @@ static void handle_vq(struct vhost_test *n) void *private; mutex_lock(&vq->mutex); - private = vq->private_data; + private = vhost_vq_get_backend(vq); if (!private) { mutex_unlock(&vq->mutex); return; @@ -120,7 +120,7 @@ static int vhost_test_open(struct inode *inode, struct file *f) vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV, - VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT); + VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, true, NULL); f->private_data = n; @@ -133,8 +133,8 @@ static void *vhost_test_stop_vq(struct vhost_test *n, void *private; mutex_lock(&vq->mutex); - private = vq->private_data; - vq->private_data = NULL; + private = vhost_vq_get_backend(vq); + vhost_vq_set_backend(vq, NULL); mutex_unlock(&vq->mutex); return private; } @@ -198,8 +198,8 @@ static long vhost_test_run(struct vhost_test *n, int test) priv = test ? n : NULL; /* start polling new socket */ - oldpriv = vq->private_data; - vq->private_data = priv; + oldpriv = vhost_vq_get_backend(vq); + vhost_vq_set_backend(vq, priv); r = vhost_vq_init_access(&n->vqs[index]); @@ -225,7 +225,7 @@ static long vhost_test_reset_owner(struct vhost_test *n) { void *priv = NULL; long err; - struct vhost_umem *umem; + struct vhost_iotlb *umem; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 421f02a8530a..7580e34f76c1 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -15,12 +15,14 @@ #include <linux/module.h> #include <linux/cdev.h> #include <linux/device.h> +#include <linux/mm.h> #include <linux/iommu.h> #include <linux/uuid.h> #include <linux/vdpa.h> #include <linux/nospec.h> #include <linux/vhost.h> #include <linux/virtio_net.h> +#include <linux/kernel.h> #include "vhost.h" @@ -70,6 +72,7 @@ struct vhost_vdpa { int nvqs; int virtio_id; int minor; + struct eventfd_ctx *config_ctx; }; static DEFINE_IDA(vhost_vdpa_ida); @@ -101,6 +104,17 @@ static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) return IRQ_HANDLED; } +static irqreturn_t vhost_vdpa_config_cb(void *private) +{ + struct vhost_vdpa *v = private; + struct eventfd_ctx *config_ctx = v->config_ctx; + + if (config_ctx) + eventfd_signal(config_ctx, 1); + + return IRQ_HANDLED; +} + static void vhost_vdpa_reset(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; @@ -288,6 +302,36 @@ static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) return 0; } +static void vhost_vdpa_config_put(struct vhost_vdpa *v) +{ + if (v->config_ctx) + eventfd_ctx_put(v->config_ctx); +} + +static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) +{ + struct vdpa_callback cb; + int fd; + struct eventfd_ctx *ctx; + + cb.callback = vhost_vdpa_config_cb; + cb.private = v->vdpa; + if (copy_from_user(&fd, argp, sizeof(fd))) + return -EFAULT; + + ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); + swap(ctx, v->config_ctx); + + if (!IS_ERR_OR_NULL(ctx)) + eventfd_ctx_put(ctx); + + if (IS_ERR(v->config_ctx)) + return PTR_ERR(v->config_ctx); + + v->vdpa->config->set_config_cb(v->vdpa, &cb); + + return 0; +} static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -296,7 +340,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, struct vdpa_callback cb; struct vhost_virtqueue *vq; struct vhost_vring_state s; - u8 status; u32 idx; long r; @@ -310,8 +353,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, idx = array_index_nospec(idx, v->nvqs); vq = &v->vqs[idx]; - status = ops->get_status(vdpa); - if (cmd == VHOST_VDPA_SET_VRING_ENABLE) { if (copy_from_user(&s, argp, sizeof(s))) return -EFAULT; @@ -398,6 +439,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_SET_LOG_FD: r = -ENOIOCTLCMD; break; + case VHOST_VDPA_SET_CONFIG_CALL: + r = vhost_vdpa_set_config_call(v, argp); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) @@ -530,7 +574,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, if (!npages) return -EINVAL; - down_read(&dev->mm->mmap_sem); + mmap_read_lock(dev->mm); locked = atomic64_add_return(npages, &dev->mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; @@ -583,7 +627,7 @@ out: vhost_vdpa_unmap(v, msg->iova, msg->size); atomic64_sub(npages, &dev->mm->pinned_vm); } - up_read(&dev->mm->mmap_sem); + mmap_read_unlock(dev->mm); free_page((unsigned long)page_list); return ret; } @@ -678,8 +722,6 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) int nvqs, i, r, opened; v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); - if (!v) - return -ENODEV; opened = atomic_cmpxchg(&v->opened, 0, 1); if (opened) @@ -699,7 +741,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) vqs[i] = &v->vqs[i]; vqs[i]->handle_kick = handle_vq_kick; } - vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, + vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_vdpa_process_iotlb_msg); dev->iotlb = vhost_iotlb_alloc(0, 0); @@ -734,6 +776,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_dev_stop(&v->vdev); vhost_vdpa_iotlb_free(v); vhost_vdpa_free_domain(v); + vhost_vdpa_config_put(v); vhost_dev_cleanup(&v->vdev); kfree(v->vdev.vqs); mutex_unlock(&d->mutex); @@ -744,12 +787,74 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) return 0; } +#ifdef CONFIG_MMU +static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) +{ + struct vhost_vdpa *v = vmf->vma->vm_file->private_data; + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + struct vdpa_notification_area notify; + struct vm_area_struct *vma = vmf->vma; + u16 index = vma->vm_pgoff; + + notify = ops->get_vq_notification(vdpa, index); + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (remap_pfn_range(vma, vmf->address & PAGE_MASK, + notify.addr >> PAGE_SHIFT, PAGE_SIZE, + vma->vm_page_prot)) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct vhost_vdpa_vm_ops = { + .fault = vhost_vdpa_fault, +}; + +static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct vhost_vdpa *v = vma->vm_file->private_data; + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + struct vdpa_notification_area notify; + int index = vma->vm_pgoff; + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + if (vma->vm_flags & VM_READ) + return -EINVAL; + if (index > 65535) + return -EINVAL; + if (!ops->get_vq_notification) + return -ENOTSUPP; + + /* To be safe and easily modelled by userspace, We only + * support the doorbell which sits on the page boundary and + * does not share the page with other registers. + */ + notify = ops->get_vq_notification(vdpa, index); + if (notify.addr & (PAGE_SIZE - 1)) + return -EINVAL; + if (vma->vm_end - vma->vm_start != notify.size) + return -ENOTSUPP; + + vma->vm_ops = &vhost_vdpa_vm_ops; + return 0; +} +#endif /* CONFIG_MMU */ + static const struct file_operations vhost_vdpa_fops = { .owner = THIS_MODULE, .open = vhost_vdpa_open, .release = vhost_vdpa_release, .write_iter = vhost_vdpa_chr_write_iter, .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, +#ifdef CONFIG_MMU + .mmap = vhost_vdpa_mmap, +#endif /* CONFIG_MMU */ .compat_ioctl = compat_ptr_ioctl, }; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index d450e16c5c25..062595ee1f83 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -166,11 +166,16 @@ static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); + struct vhost_work *work = &poll->work; if (!(key_to_poll(key) & poll->mask)) return 0; - vhost_poll_queue(poll); + if (!poll->dev->use_worker) + work->fn(work); + else + vhost_poll_queue(poll); + return 0; } @@ -454,6 +459,7 @@ static size_t vhost_get_desc_size(struct vhost_virtqueue *vq, void vhost_dev_init(struct vhost_dev *dev, struct vhost_virtqueue **vqs, int nvqs, int iov_limit, int weight, int byte_weight, + bool use_worker, int (*msg_handler)(struct vhost_dev *dev, struct vhost_iotlb_msg *msg)) { @@ -471,6 +477,7 @@ void vhost_dev_init(struct vhost_dev *dev, dev->iov_limit = iov_limit; dev->weight = weight; dev->byte_weight = byte_weight; + dev->use_worker = use_worker; dev->msg_handler = msg_handler; init_llist_head(&dev->work_list); init_waitqueue_head(&dev->wait); @@ -534,6 +541,36 @@ bool vhost_dev_has_owner(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_has_owner); +static void vhost_attach_mm(struct vhost_dev *dev) +{ + /* No owner, become one */ + if (dev->use_worker) { + dev->mm = get_task_mm(current); + } else { + /* vDPA device does not use worker thead, so there's + * no need to hold the address space for mm. This help + * to avoid deadlock in the case of mmap() which may + * held the refcnt of the file and depends on release + * method to remove vma. + */ + dev->mm = current->mm; + mmgrab(dev->mm); + } +} + +static void vhost_detach_mm(struct vhost_dev *dev) +{ + if (!dev->mm) + return; + + if (dev->use_worker) + mmput(dev->mm); + else + mmdrop(dev->mm); + + dev->mm = NULL; +} + /* Caller should have device mutex */ long vhost_dev_set_owner(struct vhost_dev *dev) { @@ -546,21 +583,24 @@ long vhost_dev_set_owner(struct vhost_dev *dev) goto err_mm; } - /* No owner, become one */ - dev->mm = get_task_mm(current); + vhost_attach_mm(dev); + dev->kcov_handle = kcov_common_handle(); - worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); - if (IS_ERR(worker)) { - err = PTR_ERR(worker); - goto err_worker; - } + if (dev->use_worker) { + worker = kthread_create(vhost_worker, dev, + "vhost-%d", current->pid); + if (IS_ERR(worker)) { + err = PTR_ERR(worker); + goto err_worker; + } - dev->worker = worker; - wake_up_process(worker); /* avoid contributing to loadavg */ + dev->worker = worker; + wake_up_process(worker); /* avoid contributing to loadavg */ - err = vhost_attach_cgroups(dev); - if (err) - goto err_cgroup; + err = vhost_attach_cgroups(dev); + if (err) + goto err_cgroup; + } err = vhost_dev_alloc_iovecs(dev); if (err) @@ -568,12 +608,12 @@ long vhost_dev_set_owner(struct vhost_dev *dev) return 0; err_cgroup: - kthread_stop(worker); - dev->worker = NULL; + if (dev->worker) { + kthread_stop(dev->worker); + dev->worker = NULL; + } err_worker: - if (dev->mm) - mmput(dev->mm); - dev->mm = NULL; + vhost_detach_mm(dev); dev->kcov_handle = 0; err_mm: return err; @@ -670,9 +710,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) dev->worker = NULL; dev->kcov_handle = 0; } - if (dev->mm) - mmput(dev->mm); - dev->mm = NULL; + vhost_detach_mm(dev); } EXPORT_SYMBOL_GPL(vhost_dev_cleanup); @@ -730,7 +768,7 @@ static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq, if (!map) return NULL; - return (void *)(uintptr_t)(map->addr + addr - map->start); + return (void __user *)(uintptr_t)(map->addr + addr - map->start); } /* Can we switch to this memory table? */ @@ -869,7 +907,7 @@ static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq, * not happen in this case. */ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, - void *addr, unsigned int size, + void __user *addr, unsigned int size, int type) { void __user *uaddr = vhost_vq_meta_fetch(vq, @@ -882,7 +920,7 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, #define vhost_put_user(vq, x, ptr) \ ({ \ - int ret = -EFAULT; \ + int ret; \ if (!vq->iotlb) { \ ret = __put_user(x, ptr); \ } else { \ @@ -1244,9 +1282,9 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) } static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, - struct vring_desc __user *desc, - struct vring_avail __user *avail, - struct vring_used __user *used) + vring_desc_t __user *desc, + vring_avail_t __user *avail, + vring_used_t __user *used) { return access_ok(desc, vhost_get_desc_size(vq, num)) && @@ -1574,7 +1612,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd); if (IS_ERR(eventfp)) { r = PTR_ERR(eventfp); break; @@ -1590,7 +1628,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd); + ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; @@ -1602,7 +1640,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd); + ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; @@ -1727,7 +1765,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) r = get_user(fd, (int __user *)argp); if (r < 0) break; - ctx = fd == -1 ? NULL : eventfd_ctx_fdget(fd); + ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; @@ -1762,15 +1800,14 @@ static int set_bit_to_user(int nr, void __user *addr) int bit = nr + (log % PAGE_SIZE) * 8; int r; - r = get_user_pages_fast(log, 1, FOLL_WRITE, &page); + r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page); if (r < 0) return r; BUG_ON(r != 1); base = kmap_atomic(page); set_bit(bit, base); kunmap_atomic(base); - set_page_dirty_lock(page); - put_page(page); + unpin_user_pages_dirty_lock(&page, 1, true); return 0; } @@ -2301,7 +2338,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) { - struct vring_used_elem __user *used; + vring_used_elem_t __user *used; u16 old, new; int start; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 181382185bbc..c8e96a095d3b 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -67,9 +67,9 @@ struct vhost_virtqueue { /* The actual ring of buffers. */ struct mutex mutex; unsigned int num; - struct vring_desc __user *desc; - struct vring_avail __user *avail; - struct vring_used __user *used; + vring_desc_t __user *desc; + vring_avail_t __user *avail; + vring_used_t __user *used; const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS]; struct file *kick; struct eventfd_ctx *call_ctx; @@ -154,6 +154,7 @@ struct vhost_dev { int weight; int byte_weight; u64 kcov_handle; + bool use_worker; int (*msg_handler)(struct vhost_dev *dev, struct vhost_iotlb_msg *msg); }; @@ -161,6 +162,7 @@ struct vhost_dev { bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs, int iov_limit, int weight, int byte_weight, + bool use_worker, int (*msg_handler)(struct vhost_dev *dev, struct vhost_iotlb_msg *msg)); long vhost_dev_set_owner(struct vhost_dev *dev); @@ -231,6 +233,33 @@ enum { (1ULL << VIRTIO_F_VERSION_1) }; +/** + * vhost_vq_set_backend - Set backend. + * + * @vq Virtqueue. + * @private_data The private data. + * + * Context: Need to call with vq->mutex acquired. + */ +static inline void vhost_vq_set_backend(struct vhost_virtqueue *vq, + void *private_data) +{ + vq->private_data = private_data; +} + +/** + * vhost_vq_get_backend - Get backend. + * + * @vq Virtqueue. + * + * Context: Need to call with vq->mutex acquired. + * Return: Private data previously set with vhost_vq_set_backend. + */ +static inline void *vhost_vq_get_backend(struct vhost_virtqueue *vq) +{ + return vq->private_data; +} + static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) { return vq->acked_features & (1ULL << bit); diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index ee0491f579ac..e059a9a47cdf 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -13,9 +13,11 @@ #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/export.h> +#if IS_REACHABLE(CONFIG_VHOST_IOTLB) #include <linux/bvec.h> #include <linux/highmem.h> #include <linux/vhost_iotlb.h> +#endif #include <uapi/linux/virtio_config.h> static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) @@ -618,9 +620,9 @@ static inline int xfer_to_user(const struct vringh *vrh, */ int vringh_init_user(struct vringh *vrh, u64 features, unsigned int num, bool weak_barriers, - struct vring_desc __user *desc, - struct vring_avail __user *avail, - struct vring_used __user *used) + vring_desc_t __user *desc, + vring_avail_t __user *avail, + vring_used_t __user *used) { /* Sane power of 2 please! */ if (!num || num > 0xffff || (num & (num - 1))) { @@ -1059,6 +1061,8 @@ int vringh_need_notify_kern(struct vringh *vrh) } EXPORT_SYMBOL(vringh_need_notify_kern); +#if IS_REACHABLE(CONFIG_VHOST_IOTLB) + static int iotlb_translate(const struct vringh *vrh, u64 addr, u64 len, struct bio_vec iov[], int iov_size, u32 perm) @@ -1416,5 +1420,6 @@ int vringh_need_notify_iotlb(struct vringh *vrh) } EXPORT_SYMBOL(vringh_need_notify_iotlb); +#endif MODULE_LICENSE("GPL"); diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 97669484a3f6..a483cec31d5c 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -91,7 +91,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, mutex_lock(&vq->mutex); - if (!vq->private_data) + if (!vhost_vq_get_backend(vq)) goto out; /* Avoid further vmexits, we're already processing the virtqueue */ @@ -181,14 +181,14 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, break; } - vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len); - added = true; - - /* Deliver to monitoring devices all correctly transmitted - * packets. + /* Deliver to monitoring devices all packets that we + * will transmit. */ virtio_transport_deliver_tap_pkt(pkt); + vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len); + added = true; + pkt->off += payload_len; total_len += payload_len; @@ -196,6 +196,12 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, * to send it with the next available buffer. */ if (pkt->off < pkt->len) { + /* We are queueing the same virtio_vsock_pkt to handle + * the remaining bytes, and we want to deliver it + * to monitoring devices in the next iteration. + */ + pkt->tap_delivered = false; + spin_lock_bh(&vsock->send_pkt_list_lock); list_add(&pkt->list, &vsock->send_pkt_list); spin_unlock_bh(&vsock->send_pkt_list_lock); @@ -440,7 +446,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) mutex_lock(&vq->mutex); - if (!vq->private_data) + if (!vhost_vq_get_backend(vq)) goto out; vhost_disable_notify(&vsock->dev, vq); @@ -533,8 +539,8 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) goto err_vq; } - if (!vq->private_data) { - vq->private_data = vsock; + if (!vhost_vq_get_backend(vq)) { + vhost_vq_set_backend(vq, vsock); ret = vhost_vq_init_access(vq); if (ret) goto err_vq; @@ -543,18 +549,23 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) mutex_unlock(&vq->mutex); } + /* Some packets may have been queued before the device was started, + * let's kick the send worker to send them. + */ + vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + mutex_unlock(&vsock->dev.mutex); return 0; err_vq: - vq->private_data = NULL; + vhost_vq_set_backend(vq, NULL); mutex_unlock(&vq->mutex); for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); - vq->private_data = NULL; + vhost_vq_set_backend(vq, NULL); mutex_unlock(&vq->mutex); } err: @@ -577,7 +588,7 @@ static int vhost_vsock_stop(struct vhost_vsock *vsock) struct vhost_virtqueue *vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); - vq->private_data = NULL; + vhost_vq_set_backend(vq, NULL); mutex_unlock(&vq->mutex); } @@ -621,7 +632,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT, - VHOST_VSOCK_WEIGHT, NULL); + VHOST_VSOCK_WEIGHT, true, NULL); file->private_data = vsock; spin_lock_init(&vsock->send_pkt_list_lock); |