From 7db0d6027e69f47431800b510192436563ba415b Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Wed, 18 Oct 2023 20:14:42 +0300 Subject: vhost-vdpa: introduce descriptor group backend feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace knows if the device has dedicated descriptor group or not by checking this feature bit. It's only exposed if the vdpa driver backend implements the .get_vq_desc_group() operation callback. Userspace trying to negotiate this feature when it or the dependent _F_IOTLB_ASID feature hasn't been exposed will result in an error. Signed-off-by: Si-Wei Liu Acked-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231018171456.1624030-5-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Si-Wei Liu Tested-by: Si-Wei Liu Tested-by: Lei Yang --- drivers/vhost/vdpa.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 78379ffd2336..2f21798a37ee 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -389,6 +389,14 @@ static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) return ops->resume; } +static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->get_vq_desc_group; +} + static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; @@ -690,6 +698,7 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; if (features & ~(VHOST_VDPA_BACKEND_FEATURES | + BIT_ULL(VHOST_BACKEND_F_DESC_ASID) | BIT_ULL(VHOST_BACKEND_F_SUSPEND) | BIT_ULL(VHOST_BACKEND_F_RESUME) | BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) @@ -700,6 +709,12 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && !vhost_vdpa_can_resume(v)) return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && + !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) + return -EINVAL; + if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && + !vhost_vdpa_has_desc_group(v)) + return -EOPNOTSUPP; vhost_set_backend_features(&v->vdev, features); return 0; } @@ -753,6 +768,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); if (vhost_vdpa_can_resume(v)) features |= BIT_ULL(VHOST_BACKEND_F_RESUME); + if (vhost_vdpa_has_desc_group(v)) + features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID); features |= vhost_vdpa_get_backend_features(v); if (copy_to_user(featurep, &features, sizeof(features))) r = -EFAULT; -- cgit v1.3.1 From c8068d9bae0c78e8880451b94668253449b2d71d Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Wed, 18 Oct 2023 20:14:43 +0300 Subject: vhost-vdpa: uAPI to get dedicated descriptor group id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With _F_DESC_ASID backend feature, the device can now support the VHOST_VDPA_GET_VRING_DESC_GROUP ioctl, and it may expose the descriptor table (including avail and used ring) in a different group than the buffers it contains. This new uAPI will fetch the group ID of the descriptor table. Signed-off-by: Si-Wei Liu Acked-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231018171456.1624030-6-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Si-Wei Liu Tested-by: Si-Wei Liu Tested-by: Lei Yang --- drivers/vhost/vdpa.c | 10 ++++++++++ include/uapi/linux/vhost.h | 8 ++++++++ 2 files changed, 18 insertions(+) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 2f21798a37ee..851535f57b95 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -613,6 +613,16 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, else if (copy_to_user(argp, &s, sizeof(s))) return -EFAULT; return 0; + case VHOST_VDPA_GET_VRING_DESC_GROUP: + if (!vhost_vdpa_has_desc_group(v)) + return -EOPNOTSUPP; + s.index = idx; + s.num = ops->get_vq_desc_group(vdpa, idx); + if (s.num >= vdpa->ngroups) + return -EIO; + else if (copy_to_user(argp, &s, sizeof(s))) + return -EFAULT; + return 0; case VHOST_VDPA_SET_GROUP_ASID: if (copy_from_user(&s, argp, sizeof(s))) return -EFAULT; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index f5c48b61ab62..649560c685f1 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -219,4 +219,12 @@ */ #define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E) +/* Get the group for the descriptor table including driver & device areas + * of a virtqueue: read index, write group in num. + * The virtqueue index is stored in the index field of vhost_vring_state. + * The group ID of the descriptor table for this specific virtqueue + * is returned via num field of vhost_vring_state. + */ +#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) #endif -- cgit v1.3.1 From 5ff1f51eefb1cd2c20c3f49538a64c09a1cc98be Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 28 Sep 2023 14:18:33 +0200 Subject: vhost-scsi: Spelling s/preceeding/preceding/g Fix a misspelling of "preceding". Signed-off-by: Geert Uytterhoeven Message-Id: Signed-off-by: Michael S. Tsirkin Reviewed-by: Simon Horman Reviewed-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index abef0619c790..2d689181bafe 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1158,7 +1158,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) /* * Set prot_iter to data_iter and truncate it to * prot_bytes, and advance data_iter past any - * preceeding prot_bytes that may be present. + * preceding prot_bytes that may be present. * * Also fix up the exp_data_len to reflect only the * actual data payload length. -- cgit v1.3.1 From 1d0f874bfe7871837fb215999979284d579fc373 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Sat, 21 Oct 2023 02:25:14 -0700 Subject: vhost-vdpa: reset vendor specific mapping to initial state in .release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Devices with on-chip IOMMU or vendor specific IOTLB implementation may need to restore iotlb mapping to the initial or default state using the .reset_map op, as it's desirable for some parent devices to not work with DMA ops and maintain a simple IOMMU model with .reset_map. In particular, device reset should not cause mapping to go away on such IOTLB model, so persistent mapping is implied across reset. Before the userspace process using vhost-vdpa is gone, give it a chance to reset iotlb back to the initial state in vhost_vdpa_cleanup(). Signed-off-by: Si-Wei Liu Acked-by: Eugenio Pérez Message-Id: <1697880319-4937-3-git-send-email-si-wei.liu@oracle.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang --- drivers/vhost/vdpa.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 851535f57b95..c6bfe9bdde42 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -131,6 +131,15 @@ static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, return vhost_vdpa_alloc_as(v, asid); } +static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (ops->reset_map) + ops->reset_map(vdpa, asid); +} + static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) { struct vhost_vdpa_as *as = asid_to_as(v, asid); @@ -140,6 +149,14 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) hlist_del(&as->hash_link); vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); + /* + * Devices with vendor specific IOMMU may need to restore + * iotlb to the initial or default state, which cannot be + * cleaned up in the all range unmap call above. Give them + * a chance to clean up or reset the map to the desired + * state. + */ + vhost_vdpa_reset_map(v, asid); kfree(as); return 0; -- cgit v1.3.1 From 4398776f7a6d532c466f9e41f601c9a291fac5ef Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Sat, 21 Oct 2023 02:25:15 -0700 Subject: vhost-vdpa: introduce IOTLB_PERSIST backend feature bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace needs this feature flag to distinguish if vhost-vdpa iotlb in the kernel can be trusted to persist IOTLB mapping across vDPA reset. Without it, userspace has no way to tell apart if it's running on an older kernel, which could silently drop all iotlb mapping across vDPA reset, especially with broken parent driver implementation for the .reset driver op. The broken driver may incorrectly drop all mappings of its own as part of .reset, which inadvertently ends up with corrupted mapping state between vhost-vdpa userspace and the kernel. As a workaround, to make the mapping behaviour predictable across reset, userspace has to pro-actively remove all mappings before vDPA reset, and then restore all the mappings afterwards. This workaround is done unconditionally on top of all parent drivers today, due to the parent driver implementation issue and no means to differentiate. This workaround had been utilized in QEMU since day one when the corresponding vhost-vdpa userspace backend came to the world. There are 3 cases that backend may claim this feature bit on for: - parent device that has to work with platform IOMMU - parent device with on-chip IOMMU that has the expected .reset_map support in driver - parent device with vendor specific IOMMU implementation with persistent IOTLB mapping already that has to specifically declare this backend feature The reason why .reset_map is being one of the pre-condition for persistent iotlb is because without it, vhost-vdpa can't switch back iotlb to the initial state later on, especially for the on-chip IOMMU case which starts with identity mapping at device creation. virtio-vdpa requires on-chip IOMMU to perform 1:1 passthrough translation from PA to IOVA as-is to begin with, and .reset_map is the only means to turn back iotlb to the identity mapping mode after vhost-vdpa is gone. The difference in behavior did not matter as QEMU unmaps all the memory unregistering the memory listener at vhost_vdpa_dev_start( started = false), but the backend acknowledging this feature flag allows QEMU to make sure it is safe to skip this unmap & map in the case of vhost stop & start cycle. In that sense, this feature flag is actually a signal for userspace to know that the driver bug has been solved. Not offering it indicates that userspace cannot trust the kernel will retain the maps. Signed-off-by: Si-Wei Liu Acked-by: Eugenio Pérez Message-Id: <1697880319-4937-4-git-send-email-si-wei.liu@oracle.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang --- drivers/vhost/vdpa.c | 15 +++++++++++++++ include/uapi/linux/vhost_types.h | 2 ++ 2 files changed, 17 insertions(+) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index c6bfe9bdde42..acc7c74ba7d6 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -439,6 +439,15 @@ static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v) return ops->get_backend_features(vdpa); } +static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return (!ops->set_map && !ops->dma_map) || ops->reset_map || + vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); +} + static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; @@ -726,6 +735,7 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, return -EFAULT; if (features & ~(VHOST_VDPA_BACKEND_FEATURES | BIT_ULL(VHOST_BACKEND_F_DESC_ASID) | + BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) | BIT_ULL(VHOST_BACKEND_F_SUSPEND) | BIT_ULL(VHOST_BACKEND_F_RESUME) | BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) @@ -742,6 +752,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && !vhost_vdpa_has_desc_group(v)) return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) && + !vhost_vdpa_has_persistent_map(v)) + return -EOPNOTSUPP; vhost_set_backend_features(&v->vdev, features); return 0; } @@ -797,6 +810,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, features |= BIT_ULL(VHOST_BACKEND_F_RESUME); if (vhost_vdpa_has_desc_group(v)) features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID); + if (vhost_vdpa_has_persistent_map(v)) + features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); features |= vhost_vdpa_get_backend_features(v); if (copy_to_user(featurep, &features, sizeof(features))) r = -EFAULT; diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 18ad6ae7ab5c..d7656908f730 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -190,5 +190,7 @@ struct vhost_vdpa_iova_range { * buffers may reside. Requires VHOST_BACKEND_F_IOTLB_ASID. */ #define VHOST_BACKEND_F_DESC_ASID 0x7 +/* IOTLB don't flush memory mapping across device reset */ +#define VHOST_BACKEND_F_IOTLB_PERSIST 0x8 #endif -- cgit v1.3.1 From bc91df5c70ac720eca18bd1f4a288f2582713d3e Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Sat, 21 Oct 2023 02:25:17 -0700 Subject: vhost-vdpa: clean iotlb map during reset for older userspace Using .compat_reset op from the previous patch, the buggy .reset behaviour can be kept as-is on older userspace apps, which don't ack the IOTLB_PERSIST backend feature. As this compatibility quirk is limited to those drivers that used to be buggy in the past, it won't affect change the behaviour or affect ABI on the setups with API compliant driver. The separation of .compat_reset from the regular .reset allows vhost-vdpa able to know which driver had broken behaviour before, so it can apply the corresponding compatibility quirk to the individual driver whenever needed. Compared to overloading the existing .reset with flags, .compat_reset won't cause any extra burden to the implementation of every compliant driver. [mst: squashed in two fixup commits] Message-Id: <1697880319-4937-6-git-send-email-si-wei.liu@oracle.com> Message-Id: <1698102863-21122-1-git-send-email-si-wei.liu@oracle.com> Reported-by: Dragos Tatulea Tested-by: Dragos Tatulea Message-Id: <1698275594-19204-1-git-send-email-si-wei.liu@oracle.com> Reported-by: Lei Yang Signed-off-by: Si-Wei Liu Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang --- drivers/vhost/vdpa.c | 20 ++++++++++++++++---- drivers/virtio/virtio_vdpa.c | 2 +- include/linux/vdpa.h | 7 +++++-- 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index acc7c74ba7d6..30df5c58db73 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -227,13 +227,24 @@ static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) irq_bypass_unregister_producer(&vq->call_ctx.producer); } -static int vhost_vdpa_reset(struct vhost_vdpa *v) +static int _compat_vdpa_reset(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; + u32 flags = 0; - v->in_batch = 0; + if (v->vdev.vqs) { + flags |= !vhost_backend_has_feature(v->vdev.vqs[0], + VHOST_BACKEND_F_IOTLB_PERSIST) ? + VDPA_RESET_F_CLEAN_MAP : 0; + } + + return vdpa_reset(vdpa, flags); +} - return vdpa_reset(vdpa); +static int vhost_vdpa_reset(struct vhost_vdpa *v) +{ + v->in_batch = 0; + return _compat_vdpa_reset(v); } static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) @@ -312,7 +323,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) vhost_vdpa_unsetup_vq_irq(v, i); if (status == 0) { - ret = vdpa_reset(vdpa); + ret = _compat_vdpa_reset(v); if (ret) return ret; } else @@ -1344,6 +1355,7 @@ static void vhost_vdpa_cleanup(struct vhost_vdpa *v) vhost_vdpa_free_domain(v); vhost_dev_cleanup(&v->vdev); kfree(v->vdev.vqs); + v->vdev.vqs = NULL; } static int vhost_vdpa_open(struct inode *inode, struct file *filep) diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 06ce6d8c2e00..8d63e5923d24 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -100,7 +100,7 @@ static void virtio_vdpa_reset(struct virtio_device *vdev) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - vdpa_reset(vdpa); + vdpa_reset(vdpa, 0); } static bool virtio_vdpa_notify(struct virtqueue *vq) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 6b8cbf75712d..db15ac07f8a6 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -519,14 +519,17 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) return vdev->dma_dev; } -static inline int vdpa_reset(struct vdpa_device *vdev) +static inline int vdpa_reset(struct vdpa_device *vdev, u32 flags) { const struct vdpa_config_ops *ops = vdev->config; int ret; down_write(&vdev->cf_lock); vdev->features_valid = false; - ret = ops->reset(vdev); + if (ops->compat_reset && flags) + ret = ops->compat_reset(vdev, flags); + else + ret = ops->reset(vdev); up_write(&vdev->cf_lock); return ret; } -- cgit v1.3.1