summaryrefslogtreecommitdiff
path: root/drivers/vhost
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-11 14:48:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-11 14:48:42 -0700
commit78e709522d2c012cb0daad2e668506637bffb7c2 (patch)
tree899d892238891f4f2ca1dee3657cb69694c0ca34 /drivers/vhost
parentb79bd0d5102b4a3ea908018fda6b84a4c8fd6235 (diff)
parent7bc7f61897b66bef78bb5952e3d1e9f3aaf9ccca (diff)
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: - vduse driver ("vDPA Device in Userspace") supporting emulated virtio block devices - virtio-vsock support for end of record with SEQPACKET - vdpa: mac and mq support for ifcvf and mlx5 - vdpa: management netlink for ifcvf - virtio-i2c, gpio dt bindings - misc fixes and cleanups * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (39 commits) Documentation: Add documentation for VDUSE vduse: Introduce VDUSE - vDPA Device in Userspace vduse: Implement an MMU-based software IOTLB vdpa: Support transferring virtual addressing during DMA mapping vdpa: factor out vhost_vdpa_pa_map() and vhost_vdpa_pa_unmap() vdpa: Add an opaque pointer for vdpa_config_ops.dma_map() vhost-iotlb: Add an opaque pointer for vhost IOTLB vhost-vdpa: Handle the failure of vdpa_reset() vdpa: Add reset callback in vdpa_config_ops vdpa: Fix some coding style issues file: Export receive_fd() to modules eventfd: Export eventfd_wake_count to modules iova: Export alloc_iova_fast() and free_iova_fast() virtio-blk: remove unneeded "likely" statements virtio-balloon: Use virtio_find_vqs() helper vdpa: Make use of PFN_PHYS/PFN_UP/PFN_DOWN helper macro vsock_test: update message bounds test for MSG_EOR af_vsock: rename variables in receive loop virtio/vsock: support MSG_EOR bit processing vhost/vsock: support MSG_EOR bit processing ...
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/iotlb.c20
-rw-r--r--drivers/vhost/scsi.c14
-rw-r--r--drivers/vhost/vdpa.c188
-rw-r--r--drivers/vhost/vsock.c28
4 files changed, 177 insertions, 73 deletions
diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c
index 0582079e4bcc..670d56c879e5 100644
--- a/drivers/vhost/iotlb.c
+++ b/drivers/vhost/iotlb.c
@@ -36,19 +36,21 @@ void vhost_iotlb_map_free(struct vhost_iotlb *iotlb,
EXPORT_SYMBOL_GPL(vhost_iotlb_map_free);
/**
- * vhost_iotlb_add_range - add a new range to vhost IOTLB
+ * vhost_iotlb_add_range_ctx - add a new range to vhost IOTLB
* @iotlb: the IOTLB
* @start: start of the IOVA range
* @last: last of IOVA range
* @addr: the address that is mapped to @start
* @perm: access permission of this range
+ * @opaque: the opaque pointer for the new mapping
*
* Returns an error last is smaller than start or memory allocation
* fails
*/
-int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
- u64 start, u64 last,
- u64 addr, unsigned int perm)
+int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb,
+ u64 start, u64 last,
+ u64 addr, unsigned int perm,
+ void *opaque)
{
struct vhost_iotlb_map *map;
@@ -71,6 +73,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
map->last = last;
map->addr = addr;
map->perm = perm;
+ map->opaque = opaque;
iotlb->nmaps++;
vhost_iotlb_itree_insert(map, &iotlb->root);
@@ -80,6 +83,15 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
return 0;
}
+EXPORT_SYMBOL_GPL(vhost_iotlb_add_range_ctx);
+
+int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
+ u64 start, u64 last,
+ u64 addr, unsigned int perm)
+{
+ return vhost_iotlb_add_range_ctx(iotlb, start, last,
+ addr, perm, NULL);
+}
EXPORT_SYMBOL_GPL(vhost_iotlb_add_range);
/**
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 46f897e41217..532e204f2b1b 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1,24 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0+
/*******************************************************************************
* Vhost kernel TCM fabric driver for virtio SCSI initiators
*
* (C) Copyright 2010-2013 Datera, Inc.
* (C) Copyright 2010-2012 IBM Corp.
*
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
- *
* Authors: Nicholas A. Bellinger <nab@daterainc.com>
* Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
****************************************************************************/
#include <linux/module.h>
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 9479f7f79217..f41d081777f5 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -116,12 +116,13 @@ static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
irq_bypass_unregister_producer(&vq->call_ctx.producer);
}
-static void vhost_vdpa_reset(struct vhost_vdpa *v)
+static int vhost_vdpa_reset(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
- vdpa_reset(vdpa);
v->in_batch = 0;
+
+ return vdpa_reset(vdpa);
}
static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
@@ -157,7 +158,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
u8 status, status_old;
- int nvqs = v->nvqs;
+ int ret, nvqs = v->nvqs;
u16 i;
if (copy_from_user(&status, statusp, sizeof(status)))
@@ -172,7 +173,12 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
if (status != 0 && (ops->get_status(vdpa) & ~status) != 0)
return -EINVAL;
- ops->set_status(vdpa, status);
+ if (status == 0) {
+ ret = ops->reset(vdpa);
+ if (ret)
+ return ret;
+ } else
+ ops->set_status(vdpa, status);
if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
for (i = 0; i < nvqs; i++)
@@ -498,7 +504,7 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
return r;
}
-static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
{
struct vhost_dev *dev = &v->vdev;
struct vhost_iotlb *iotlb = dev->iotlb;
@@ -507,19 +513,44 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
unsigned long pfn, pinned;
while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
- pinned = map->size >> PAGE_SHIFT;
- for (pfn = map->addr >> PAGE_SHIFT;
+ pinned = PFN_DOWN(map->size);
+ for (pfn = PFN_DOWN(map->addr);
pinned > 0; pfn++, pinned--) {
page = pfn_to_page(pfn);
if (map->perm & VHOST_ACCESS_WO)
set_page_dirty_lock(page);
unpin_user_page(page);
}
- atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
+ atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
vhost_iotlb_map_free(iotlb, map);
}
}
+static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+{
+ struct vhost_dev *dev = &v->vdev;
+ struct vhost_iotlb *iotlb = dev->iotlb;
+ struct vhost_iotlb_map *map;
+ struct vdpa_map_file *map_file;
+
+ while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
+ map_file = (struct vdpa_map_file *)map->opaque;
+ fput(map_file->file);
+ kfree(map_file);
+ vhost_iotlb_map_free(iotlb, map);
+ }
+}
+
+static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+
+ if (vdpa->use_va)
+ return vhost_vdpa_va_unmap(v, start, last);
+
+ return vhost_vdpa_pa_unmap(v, start, last);
+}
+
static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v)
{
struct vhost_dev *dev = &v->vdev;
@@ -551,21 +582,21 @@ static int perm_to_iommu_flags(u32 perm)
return flags | IOMMU_CACHE;
}
-static int vhost_vdpa_map(struct vhost_vdpa *v,
- u64 iova, u64 size, u64 pa, u32 perm)
+static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
+ u64 size, u64 pa, u32 perm, void *opaque)
{
struct vhost_dev *dev = &v->vdev;
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
int r = 0;
- r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
- pa, perm);
+ r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1,
+ pa, perm, opaque);
if (r)
return r;
if (ops->dma_map) {
- r = ops->dma_map(vdpa, iova, size, pa, perm);
+ r = ops->dma_map(vdpa, iova, size, pa, perm, opaque);
} else if (ops->set_map) {
if (!v->in_batch)
r = ops->set_map(vdpa, dev->iotlb);
@@ -573,13 +604,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
r = iommu_map(v->domain, iova, pa, size,
perm_to_iommu_flags(perm));
}
-
- if (r)
+ if (r) {
vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
- else
- atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
+ return r;
+ }
- return r;
+ if (!vdpa->use_va)
+ atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
+
+ return 0;
}
static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
@@ -600,38 +633,78 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
}
}
-static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
- struct vhost_iotlb_msg *msg)
+static int vhost_vdpa_va_map(struct vhost_vdpa *v,
+ u64 iova, u64 size, u64 uaddr, u32 perm)
+{
+ struct vhost_dev *dev = &v->vdev;
+ u64 offset, map_size, map_iova = iova;
+ struct vdpa_map_file *map_file;
+ struct vm_area_struct *vma;
+ int ret;
+
+ mmap_read_lock(dev->mm);
+
+ while (size) {
+ vma = find_vma(dev->mm, uaddr);
+ if (!vma) {
+ ret = -EINVAL;
+ break;
+ }
+ map_size = min(size, vma->vm_end - uaddr);
+ if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
+ !(vma->vm_flags & (VM_IO | VM_PFNMAP))))
+ goto next;
+
+ map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
+ if (!map_file) {
+ ret = -ENOMEM;
+ break;
+ }
+ offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
+ map_file->offset = offset;
+ map_file->file = get_file(vma->vm_file);
+ ret = vhost_vdpa_map(v, map_iova, map_size, uaddr,
+ perm, map_file);
+ if (ret) {
+ fput(map_file->file);
+ kfree(map_file);
+ break;
+ }
+next:
+ size -= map_size;
+ uaddr += map_size;
+ map_iova += map_size;
+ }
+ if (ret)
+ vhost_vdpa_unmap(v, iova, map_iova - iova);
+
+ mmap_read_unlock(dev->mm);
+
+ return ret;
+}
+
+static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
+ u64 iova, u64 size, u64 uaddr, u32 perm)
{
struct vhost_dev *dev = &v->vdev;
- struct vhost_iotlb *iotlb = dev->iotlb;
struct page **page_list;
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
unsigned int gup_flags = FOLL_LONGTERM;
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
unsigned long lock_limit, sz2pin, nchunks, i;
- u64 iova = msg->iova;
+ u64 start = iova;
long pinned;
int ret = 0;
- if (msg->iova < v->range.first || !msg->size ||
- msg->iova > U64_MAX - msg->size + 1 ||
- msg->iova + msg->size - 1 > v->range.last)
- return -EINVAL;
-
- if (vhost_iotlb_itree_first(iotlb, msg->iova,
- msg->iova + msg->size - 1))
- return -EEXIST;
-
/* Limit the use of memory for bookkeeping */
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
- if (msg->perm & VHOST_ACCESS_WO)
+ if (perm & VHOST_ACCESS_WO)
gup_flags |= FOLL_WRITE;
- npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
+ npages = PFN_UP(size + (iova & ~PAGE_MASK));
if (!npages) {
ret = -EINVAL;
goto free;
@@ -639,13 +712,13 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
mmap_read_lock(dev->mm);
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
ret = -ENOMEM;
goto unlock;
}
- cur_base = msg->uaddr & PAGE_MASK;
+ cur_base = uaddr & PAGE_MASK;
iova &= PAGE_MASK;
nchunks = 0;
@@ -673,10 +746,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
if (last_pfn && (this_pfn != last_pfn + 1)) {
/* Pin a contiguous chunk of memory */
- csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
+ csize = PFN_PHYS(last_pfn - map_pfn + 1);
ret = vhost_vdpa_map(v, iova, csize,
- map_pfn << PAGE_SHIFT,
- msg->perm);
+ PFN_PHYS(map_pfn),
+ perm, NULL);
if (ret) {
/*
* Unpin the pages that are left unmapped
@@ -699,13 +772,13 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
last_pfn = this_pfn;
}
- cur_base += pinned << PAGE_SHIFT;
+ cur_base += PFN_PHYS(pinned);
npages -= pinned;
}
/* Pin the rest chunk */
- ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
- map_pfn << PAGE_SHIFT, msg->perm);
+ ret = vhost_vdpa_map(v, iova, PFN_PHYS(last_pfn - map_pfn + 1),
+ PFN_PHYS(map_pfn), perm, NULL);
out:
if (ret) {
if (nchunks) {
@@ -724,13 +797,38 @@ out:
for (pfn = map_pfn; pfn <= last_pfn; pfn++)
unpin_user_page(pfn_to_page(pfn));
}
- vhost_vdpa_unmap(v, msg->iova, msg->size);
+ vhost_vdpa_unmap(v, start, size);
}
unlock:
mmap_read_unlock(dev->mm);
free:
free_page((unsigned long)page_list);
return ret;
+
+}
+
+static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
+ struct vhost_iotlb_msg *msg)
+{
+ struct vhost_dev *dev = &v->vdev;
+ struct vdpa_device *vdpa = v->vdpa;
+ struct vhost_iotlb *iotlb = dev->iotlb;
+
+ if (msg->iova < v->range.first || !msg->size ||
+ msg->iova > U64_MAX - msg->size + 1 ||
+ msg->iova + msg->size - 1 > v->range.last)
+ return -EINVAL;
+
+ if (vhost_iotlb_itree_first(iotlb, msg->iova,
+ msg->iova + msg->size - 1))
+ return -EEXIST;
+
+ if (vdpa->use_va)
+ return vhost_vdpa_va_map(v, msg->iova, msg->size,
+ msg->uaddr, msg->perm);
+
+ return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr,
+ msg->perm);
}
static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
@@ -860,7 +958,9 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
return -EBUSY;
nvqs = v->nvqs;
- vhost_vdpa_reset(v);
+ r = vhost_vdpa_reset(v);
+ if (r)
+ goto err;
vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
if (!vqs) {
@@ -945,7 +1045,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
- notify.addr >> PAGE_SHIFT, PAGE_SIZE,
+ PFN_DOWN(notify.addr), PAGE_SIZE,
vma->vm_page_prot))
return VM_FAULT_SIGBUS;
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index f249622ef11b..938aefbc75ec 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -114,7 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
size_t nbytes;
size_t iov_len, payload_len;
int head;
- bool restore_flag = false;
+ u32 flags_to_restore = 0;
spin_lock_bh(&vsock->send_pkt_list_lock);
if (list_empty(&vsock->send_pkt_list)) {
@@ -178,16 +178,21 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
* small rx buffers, headers of packets in rx queue are
* created dynamically and are initialized with header
* of current packet(except length). But in case of
- * SOCK_SEQPACKET, we also must clear record delimeter
- * bit(VIRTIO_VSOCK_SEQ_EOR). Otherwise, instead of one
- * packet with delimeter(which marks end of record),
- * there will be sequence of packets with delimeter
- * bit set. After initialized header will be copied to
- * rx buffer, this bit will be restored.
+ * SOCK_SEQPACKET, we also must clear message delimeter
+ * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit
+ * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise,
+ * there will be sequence of packets with these
+ * bits set. After initialized header will be copied to
+ * rx buffer, these required bits will be restored.
*/
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
- pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
- restore_flag = true;
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
+ pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+ flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM;
+
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
+ pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR;
+ }
}
}
@@ -224,8 +229,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
* to send it with the next available buffer.
*/
if (pkt->off < pkt->len) {
- if (restore_flag)
- pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ pkt->hdr.flags |= cpu_to_le32(flags_to_restore);
/* We are queueing the same virtio_vsock_pkt to handle
* the remaining bytes, and we want to deliver it