diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-11 14:48:42 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-11 14:48:42 -0700 |
commit | 78e709522d2c012cb0daad2e668506637bffb7c2 (patch) | |
tree | 899d892238891f4f2ca1dee3657cb69694c0ca34 /drivers/vhost | |
parent | b79bd0d5102b4a3ea908018fda6b84a4c8fd6235 (diff) | |
parent | 7bc7f61897b66bef78bb5952e3d1e9f3aaf9ccca (diff) |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin:
- vduse driver ("vDPA Device in Userspace") supporting emulated virtio
block devices
- virtio-vsock support for end of record with SEQPACKET
- vdpa: mac and mq support for ifcvf and mlx5
- vdpa: management netlink for ifcvf
- virtio-i2c, gpio dt bindings
- misc fixes and cleanups
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (39 commits)
Documentation: Add documentation for VDUSE
vduse: Introduce VDUSE - vDPA Device in Userspace
vduse: Implement an MMU-based software IOTLB
vdpa: Support transferring virtual addressing during DMA mapping
vdpa: factor out vhost_vdpa_pa_map() and vhost_vdpa_pa_unmap()
vdpa: Add an opaque pointer for vdpa_config_ops.dma_map()
vhost-iotlb: Add an opaque pointer for vhost IOTLB
vhost-vdpa: Handle the failure of vdpa_reset()
vdpa: Add reset callback in vdpa_config_ops
vdpa: Fix some coding style issues
file: Export receive_fd() to modules
eventfd: Export eventfd_wake_count to modules
iova: Export alloc_iova_fast() and free_iova_fast()
virtio-blk: remove unneeded "likely" statements
virtio-balloon: Use virtio_find_vqs() helper
vdpa: Make use of PFN_PHYS/PFN_UP/PFN_DOWN helper macro
vsock_test: update message bounds test for MSG_EOR
af_vsock: rename variables in receive loop
virtio/vsock: support MSG_EOR bit processing
vhost/vsock: support MSG_EOR bit processing
...
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/iotlb.c | 20 | ||||
-rw-r--r-- | drivers/vhost/scsi.c | 14 | ||||
-rw-r--r-- | drivers/vhost/vdpa.c | 188 | ||||
-rw-r--r-- | drivers/vhost/vsock.c | 28 |
4 files changed, 177 insertions, 73 deletions
diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c index 0582079e4bcc..670d56c879e5 100644 --- a/drivers/vhost/iotlb.c +++ b/drivers/vhost/iotlb.c @@ -36,19 +36,21 @@ void vhost_iotlb_map_free(struct vhost_iotlb *iotlb, EXPORT_SYMBOL_GPL(vhost_iotlb_map_free); /** - * vhost_iotlb_add_range - add a new range to vhost IOTLB + * vhost_iotlb_add_range_ctx - add a new range to vhost IOTLB * @iotlb: the IOTLB * @start: start of the IOVA range * @last: last of IOVA range * @addr: the address that is mapped to @start * @perm: access permission of this range + * @opaque: the opaque pointer for the new mapping * * Returns an error last is smaller than start or memory allocation * fails */ -int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, - u64 start, u64 last, - u64 addr, unsigned int perm) +int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, + u64 start, u64 last, + u64 addr, unsigned int perm, + void *opaque) { struct vhost_iotlb_map *map; @@ -71,6 +73,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, map->last = last; map->addr = addr; map->perm = perm; + map->opaque = opaque; iotlb->nmaps++; vhost_iotlb_itree_insert(map, &iotlb->root); @@ -80,6 +83,15 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, return 0; } +EXPORT_SYMBOL_GPL(vhost_iotlb_add_range_ctx); + +int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, + u64 start, u64 last, + u64 addr, unsigned int perm) +{ + return vhost_iotlb_add_range_ctx(iotlb, start, last, + addr, perm, NULL); +} EXPORT_SYMBOL_GPL(vhost_iotlb_add_range); /** diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 46f897e41217..532e204f2b1b 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1,24 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0+ /******************************************************************************* * Vhost kernel TCM fabric driver for virtio SCSI initiators * * (C) Copyright 2010-2013 Datera, Inc. * (C) Copyright 2010-2012 IBM Corp. * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. - * * Authors: Nicholas A. Bellinger <nab@daterainc.com> * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * ****************************************************************************/ #include <linux/module.h> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 9479f7f79217..f41d081777f5 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -116,12 +116,13 @@ static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) irq_bypass_unregister_producer(&vq->call_ctx.producer); } -static void vhost_vdpa_reset(struct vhost_vdpa *v) +static int vhost_vdpa_reset(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; - vdpa_reset(vdpa); v->in_batch = 0; + + return vdpa_reset(vdpa); } static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) @@ -157,7 +158,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; u8 status, status_old; - int nvqs = v->nvqs; + int ret, nvqs = v->nvqs; u16 i; if (copy_from_user(&status, statusp, sizeof(status))) @@ -172,7 +173,12 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) if (status != 0 && (ops->get_status(vdpa) & ~status) != 0) return -EINVAL; - ops->set_status(vdpa, status); + if (status == 0) { + ret = ops->reset(vdpa); + if (ret) + return ret; + } else + ops->set_status(vdpa, status); if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) for (i = 0; i < nvqs; i++) @@ -498,7 +504,7 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, return r; } -static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) +static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) { struct vhost_dev *dev = &v->vdev; struct vhost_iotlb *iotlb = dev->iotlb; @@ -507,19 +513,44 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) unsigned long pfn, pinned; while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { - pinned = map->size >> PAGE_SHIFT; - for (pfn = map->addr >> PAGE_SHIFT; + pinned = PFN_DOWN(map->size); + for (pfn = PFN_DOWN(map->addr); pinned > 0; pfn++, pinned--) { page = pfn_to_page(pfn); if (map->perm & VHOST_ACCESS_WO) set_page_dirty_lock(page); unpin_user_page(page); } - atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm); + atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); vhost_iotlb_map_free(iotlb, map); } } +static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last) +{ + struct vhost_dev *dev = &v->vdev; + struct vhost_iotlb *iotlb = dev->iotlb; + struct vhost_iotlb_map *map; + struct vdpa_map_file *map_file; + + while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { + map_file = (struct vdpa_map_file *)map->opaque; + fput(map_file->file); + kfree(map_file); + vhost_iotlb_map_free(iotlb, map); + } +} + +static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) +{ + struct vdpa_device *vdpa = v->vdpa; + + if (vdpa->use_va) + return vhost_vdpa_va_unmap(v, start, last); + + return vhost_vdpa_pa_unmap(v, start, last); +} + static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) { struct vhost_dev *dev = &v->vdev; @@ -551,21 +582,21 @@ static int perm_to_iommu_flags(u32 perm) return flags | IOMMU_CACHE; } -static int vhost_vdpa_map(struct vhost_vdpa *v, - u64 iova, u64 size, u64 pa, u32 perm) +static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, + u64 size, u64 pa, u32 perm, void *opaque) { struct vhost_dev *dev = &v->vdev; struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; int r = 0; - r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, - pa, perm); + r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1, + pa, perm, opaque); if (r) return r; if (ops->dma_map) { - r = ops->dma_map(vdpa, iova, size, pa, perm); + r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); } else if (ops->set_map) { if (!v->in_batch) r = ops->set_map(vdpa, dev->iotlb); @@ -573,13 +604,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, r = iommu_map(v->domain, iova, pa, size, perm_to_iommu_flags(perm)); } - - if (r) + if (r) { vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); - else - atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); + return r; + } - return r; + if (!vdpa->use_va) + atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); + + return 0; } static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) @@ -600,38 +633,78 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) } } -static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, - struct vhost_iotlb_msg *msg) +static int vhost_vdpa_va_map(struct vhost_vdpa *v, + u64 iova, u64 size, u64 uaddr, u32 perm) +{ + struct vhost_dev *dev = &v->vdev; + u64 offset, map_size, map_iova = iova; + struct vdpa_map_file *map_file; + struct vm_area_struct *vma; + int ret; + + mmap_read_lock(dev->mm); + + while (size) { + vma = find_vma(dev->mm, uaddr); + if (!vma) { + ret = -EINVAL; + break; + } + map_size = min(size, vma->vm_end - uaddr); + if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && + !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) + goto next; + + map_file = kzalloc(sizeof(*map_file), GFP_KERNEL); + if (!map_file) { + ret = -ENOMEM; + break; + } + offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; + map_file->offset = offset; + map_file->file = get_file(vma->vm_file); + ret = vhost_vdpa_map(v, map_iova, map_size, uaddr, + perm, map_file); + if (ret) { + fput(map_file->file); + kfree(map_file); + break; + } +next: + size -= map_size; + uaddr += map_size; + map_iova += map_size; + } + if (ret) + vhost_vdpa_unmap(v, iova, map_iova - iova); + + mmap_read_unlock(dev->mm); + + return ret; +} + +static int vhost_vdpa_pa_map(struct vhost_vdpa *v, + u64 iova, u64 size, u64 uaddr, u32 perm) { struct vhost_dev *dev = &v->vdev; - struct vhost_iotlb *iotlb = dev->iotlb; struct page **page_list; unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; unsigned long npages, cur_base, map_pfn, last_pfn = 0; unsigned long lock_limit, sz2pin, nchunks, i; - u64 iova = msg->iova; + u64 start = iova; long pinned; int ret = 0; - if (msg->iova < v->range.first || !msg->size || - msg->iova > U64_MAX - msg->size + 1 || - msg->iova + msg->size - 1 > v->range.last) - return -EINVAL; - - if (vhost_iotlb_itree_first(iotlb, msg->iova, - msg->iova + msg->size - 1)) - return -EEXIST; - /* Limit the use of memory for bookkeeping */ page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) return -ENOMEM; - if (msg->perm & VHOST_ACCESS_WO) + if (perm & VHOST_ACCESS_WO) gup_flags |= FOLL_WRITE; - npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; + npages = PFN_UP(size + (iova & ~PAGE_MASK)); if (!npages) { ret = -EINVAL; goto free; @@ -639,13 +712,13 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, mmap_read_lock(dev->mm); - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { ret = -ENOMEM; goto unlock; } - cur_base = msg->uaddr & PAGE_MASK; + cur_base = uaddr & PAGE_MASK; iova &= PAGE_MASK; nchunks = 0; @@ -673,10 +746,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ - csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; + csize = PFN_PHYS(last_pfn - map_pfn + 1); ret = vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm); + PFN_PHYS(map_pfn), + perm, NULL); if (ret) { /* * Unpin the pages that are left unmapped @@ -699,13 +772,13 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, last_pfn = this_pfn; } - cur_base += pinned << PAGE_SHIFT; + cur_base += PFN_PHYS(pinned); npages -= pinned; } /* Pin the rest chunk */ - ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, - map_pfn << PAGE_SHIFT, msg->perm); + ret = vhost_vdpa_map(v, iova, PFN_PHYS(last_pfn - map_pfn + 1), + PFN_PHYS(map_pfn), perm, NULL); out: if (ret) { if (nchunks) { @@ -724,13 +797,38 @@ out: for (pfn = map_pfn; pfn <= last_pfn; pfn++) unpin_user_page(pfn_to_page(pfn)); } - vhost_vdpa_unmap(v, msg->iova, msg->size); + vhost_vdpa_unmap(v, start, size); } unlock: mmap_read_unlock(dev->mm); free: free_page((unsigned long)page_list); return ret; + +} + +static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, + struct vhost_iotlb_msg *msg) +{ + struct vhost_dev *dev = &v->vdev; + struct vdpa_device *vdpa = v->vdpa; + struct vhost_iotlb *iotlb = dev->iotlb; + + if (msg->iova < v->range.first || !msg->size || + msg->iova > U64_MAX - msg->size + 1 || + msg->iova + msg->size - 1 > v->range.last) + return -EINVAL; + + if (vhost_iotlb_itree_first(iotlb, msg->iova, + msg->iova + msg->size - 1)) + return -EEXIST; + + if (vdpa->use_va) + return vhost_vdpa_va_map(v, msg->iova, msg->size, + msg->uaddr, msg->perm); + + return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr, + msg->perm); } static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, @@ -860,7 +958,9 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) return -EBUSY; nvqs = v->nvqs; - vhost_vdpa_reset(v); + r = vhost_vdpa_reset(v); + if (r) + goto err; vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); if (!vqs) { @@ -945,7 +1045,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); if (remap_pfn_range(vma, vmf->address & PAGE_MASK, - notify.addr >> PAGE_SHIFT, PAGE_SIZE, + PFN_DOWN(notify.addr), PAGE_SIZE, vma->vm_page_prot)) return VM_FAULT_SIGBUS; diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index f249622ef11b..938aefbc75ec 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -114,7 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, size_t nbytes; size_t iov_len, payload_len; int head; - bool restore_flag = false; + u32 flags_to_restore = 0; spin_lock_bh(&vsock->send_pkt_list_lock); if (list_empty(&vsock->send_pkt_list)) { @@ -178,16 +178,21 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, * small rx buffers, headers of packets in rx queue are * created dynamically and are initialized with header * of current packet(except length). But in case of - * SOCK_SEQPACKET, we also must clear record delimeter - * bit(VIRTIO_VSOCK_SEQ_EOR). Otherwise, instead of one - * packet with delimeter(which marks end of record), - * there will be sequence of packets with delimeter - * bit set. After initialized header will be copied to - * rx buffer, this bit will be restored. + * SOCK_SEQPACKET, we also must clear message delimeter + * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit + * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise, + * there will be sequence of packets with these + * bits set. After initialized header will be copied to + * rx buffer, these required bits will be restored. */ - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) { - pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); - restore_flag = true; + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) { + pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); + flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM; + + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) { + pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR; + } } } @@ -224,8 +229,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, * to send it with the next available buffer. */ if (pkt->off < pkt->len) { - if (restore_flag) - pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + pkt->hdr.flags |= cpu_to_le32(flags_to_restore); /* We are queueing the same virtio_vsock_pkt to handle * the remaining bytes, and we want to deliver it |