diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-09 11:04:08 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-09 11:04:08 -0700 |
commit | dbfa18c5d7695766f24c0c140204e1f8c921fb95 (patch) | |
tree | 233ae288135ab9de0420499704ac45f02e214859 | |
parent | 7e8c948b3f09b19e3883517e10a3d5ffa9899f84 (diff) | |
parent | 07496eeab577eef1d4912b3e1b502a2b52002ac3 (diff) |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio bug fixes from Michael Tsirkin:
"A bunch of fixes all over the place"
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
tools/virtio: use canonical ftrace path
vhost_vdpa: support PACKED when setting-getting vring_base
vhost: support PACKED when setting-getting vring_base
vhost: Fix worker hangs due to missed wake up calls
vhost: Fix crash during early vhost_transport_send_pkt calls
vhost_net: revert upend_idx only on retriable error
vhost_vdpa: tell vqs about the negotiated
vdpa/mlx5: Fix hang when cvq commands are triggered during device unregister
tools/virtio: Add .gitignore for ringtest
tools/virtio: Fix arm64 ringtest compilation error
vduse: avoid empty string for dev name
vhost: use kzalloc() instead of kmalloc() followed by memset()
-rw-r--r-- | drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 | ||||
-rw-r--r-- | drivers/vdpa/vdpa_user/vduse_dev.c | 3 | ||||
-rw-r--r-- | drivers/vhost/net.c | 11 | ||||
-rw-r--r-- | drivers/vhost/vdpa.c | 34 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 75 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 10 | ||||
-rw-r--r-- | kernel/vhost_task.c | 16 | ||||
-rw-r--r-- | tools/virtio/ringtest/.gitignore | 7 | ||||
-rw-r--r-- | tools/virtio/ringtest/main.h | 11 | ||||
-rw-r--r-- | tools/virtio/virtio-trace/README | 2 | ||||
-rw-r--r-- | tools/virtio/virtio-trace/trace-agent.c | 12 |
11 files changed, 119 insertions, 64 deletions
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index e29e32b306ad..279ac6a558d2 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -3349,10 +3349,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * mlx5_vdpa_remove_debugfs(ndev->debugfs); ndev->debugfs = NULL; unregister_link_notifier(ndev); + _vdpa_unregister_device(dev); wq = mvdev->wq; mvdev->wq = NULL; destroy_workqueue(wq); - _vdpa_unregister_device(dev); mgtdev->ndev = NULL; } diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index de97e38c3b82..5f5c21674fdc 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1685,6 +1685,9 @@ static bool vduse_validate_config(struct vduse_dev_config *config) if (config->vq_num > 0xffff) return false; + if (!config->name[0]) + return false; + if (!device_is_allowed(config->device_id)) return false; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 07181cd8d52e..ae2273196b0c 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -935,13 +935,18 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { + bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS; + if (zcopy_used) { if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS) vhost_net_ubuf_put(ubufs); - nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) - % UIO_MAXIOV; + if (retry) + nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) + % UIO_MAXIOV; + else + vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; } - if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) { + if (retry) { vhost_discard_vq_desc(vq, 1); vhost_net_enable_vq(net, vq); break; diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 8c1aefc865f0..bf77924d5b60 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -407,7 +407,10 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + struct vhost_dev *d = &v->vdev; + u64 actual_features; u64 features; + int i; /* * It's not allowed to change the features after they have @@ -422,6 +425,16 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) if (vdpa_set_features(vdpa, features)) return -EINVAL; + /* let the vqs know what has been configured */ + actual_features = ops->get_driver_features(vdpa); + for (i = 0; i < d->nvqs; ++i) { + struct vhost_virtqueue *vq = d->vqs[i]; + + mutex_lock(&vq->mutex); + vq->acked_features = actual_features; + mutex_unlock(&vq->mutex); + } + return 0; } @@ -594,7 +607,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, if (r) return r; - vq->last_avail_idx = vq_state.split.avail_index; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + vq->last_avail_idx = vq_state.packed.last_avail_idx | + (vq_state.packed.last_avail_counter << 15); + vq->last_used_idx = vq_state.packed.last_used_idx | + (vq_state.packed.last_used_counter << 15); + } else { + vq->last_avail_idx = vq_state.split.avail_index; + } break; } @@ -612,9 +632,15 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, break; case VHOST_SET_VRING_BASE: - vq_state.split.avail_index = vq->last_avail_idx; - if (ops->set_vq_state(vdpa, idx, &vq_state)) - r = -EINVAL; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; + vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); + vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff; + vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000); + } else { + vq_state.split.avail_index = vq->last_avail_idx; + } + r = ops->set_vq_state(vdpa, idx, &vq_state); break; case VHOST_SET_VRING_CALL: diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 074273020849..60c9ebd629dd 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -235,7 +235,7 @@ void vhost_dev_flush(struct vhost_dev *dev) { struct vhost_flush_struct flush; - if (dev->worker) { + if (dev->worker.vtsk) { init_completion(&flush.wait_event); vhost_work_init(&flush.work, vhost_flush_work); @@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(vhost_dev_flush); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) { - if (!dev->worker) + if (!dev->worker.vtsk) return; if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { @@ -255,8 +255,8 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) * sure it was not in the list. * test_and_set_bit() implies a memory barrier. */ - llist_add(&work->node, &dev->worker->work_list); - vhost_task_wake(dev->worker->vtsk); + llist_add(&work->node, &dev->worker.work_list); + vhost_task_wake(dev->worker.vtsk); } } EXPORT_SYMBOL_GPL(vhost_work_queue); @@ -264,7 +264,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue); /* A lockless hint for busy polling code to exit the loop */ bool vhost_has_work(struct vhost_dev *dev) { - return dev->worker && !llist_empty(&dev->worker->work_list); + return !llist_empty(&dev->worker.work_list); } EXPORT_SYMBOL_GPL(vhost_has_work); @@ -341,6 +341,8 @@ static bool vhost_worker(void *data) node = llist_del_all(&worker->work_list); if (node) { + __set_current_state(TASK_RUNNING); + node = llist_reverse_order(node); /* make sure flag is seen after deletion */ smp_wmb(); @@ -456,7 +458,8 @@ void vhost_dev_init(struct vhost_dev *dev, dev->umem = NULL; dev->iotlb = NULL; dev->mm = NULL; - dev->worker = NULL; + memset(&dev->worker, 0, sizeof(dev->worker)); + init_llist_head(&dev->worker.work_list); dev->iov_limit = iov_limit; dev->weight = weight; dev->byte_weight = byte_weight; @@ -530,47 +533,30 @@ static void vhost_detach_mm(struct vhost_dev *dev) static void vhost_worker_free(struct vhost_dev *dev) { - struct vhost_worker *worker = dev->worker; - - if (!worker) + if (!dev->worker.vtsk) return; - dev->worker = NULL; - WARN_ON(!llist_empty(&worker->work_list)); - vhost_task_stop(worker->vtsk); - kfree(worker); + WARN_ON(!llist_empty(&dev->worker.work_list)); + vhost_task_stop(dev->worker.vtsk); + dev->worker.kcov_handle = 0; + dev->worker.vtsk = NULL; } static int vhost_worker_create(struct vhost_dev *dev) { - struct vhost_worker *worker; struct vhost_task *vtsk; char name[TASK_COMM_LEN]; - int ret; - - worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); - if (!worker) - return -ENOMEM; - dev->worker = worker; - worker->kcov_handle = kcov_common_handle(); - init_llist_head(&worker->work_list); snprintf(name, sizeof(name), "vhost-%d", current->pid); - vtsk = vhost_task_create(vhost_worker, worker, name); - if (!vtsk) { - ret = -ENOMEM; - goto free_worker; - } + vtsk = vhost_task_create(vhost_worker, &dev->worker, name); + if (!vtsk) + return -ENOMEM; - worker->vtsk = vtsk; + dev->worker.kcov_handle = kcov_common_handle(); + dev->worker.vtsk = vtsk; vhost_task_start(vtsk); return 0; - -free_worker: - kfree(worker); - dev->worker = NULL; - return ret; } /* Caller should have device mutex */ @@ -1614,17 +1600,25 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - if (s.num > 0xffff) { - r = -EINVAL; - break; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + vq->last_avail_idx = s.num & 0xffff; + vq->last_used_idx = (s.num >> 16) & 0xffff; + } else { + if (s.num > 0xffff) { + r = -EINVAL; + break; + } + vq->last_avail_idx = s.num; } - vq->last_avail_idx = s.num; /* Forget the cached index value. */ vq->avail_idx = vq->last_avail_idx; break; case VHOST_GET_VRING_BASE: s.index = idx; - s.num = vq->last_avail_idx; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) + s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16); + else + s.num = vq->last_avail_idx; if (copy_to_user(argp, &s, sizeof s)) r = -EFAULT; break; @@ -2563,12 +2557,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify); /* Create a new message. */ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) { - struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); + /* Make sure all padding within the structure is initialized. */ + struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; - /* Make sure all padding within the structure is initialized. */ - memset(&node->msg, 0, sizeof node->msg); node->vq = vq; node->msg.type = type; return node; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 0308638cdeee..fc900be504b3 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -92,13 +92,17 @@ struct vhost_virtqueue { /* The routine to call when the Guest pings us, or timeout. */ vhost_work_fn_t handle_kick; - /* Last available index we saw. */ + /* Last available index we saw. + * Values are limited to 0x7fff, and the high bit is used as + * a wrap counter when using VIRTIO_F_RING_PACKED. */ u16 last_avail_idx; /* Caches available index value from user. */ u16 avail_idx; - /* Last index we used. */ + /* Last index we used. + * Values are limited to 0x7fff, and the high bit is used as + * a wrap counter when using VIRTIO_F_RING_PACKED. */ u16 last_used_idx; /* Used flags */ @@ -154,7 +158,7 @@ struct vhost_dev { struct vhost_virtqueue **vqs; int nvqs; struct eventfd_ctx *log_ctx; - struct vhost_worker *worker; + struct vhost_worker worker; struct vhost_iotlb *umem; struct vhost_iotlb *iotlb; spinlock_t iotlb_lock; diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index f80d5c51ae67..da35e5b7f047 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -28,10 +28,6 @@ static int vhost_task_fn(void *data) for (;;) { bool did_work; - /* mb paired w/ vhost_task_stop */ - if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) - break; - if (!dead && signal_pending(current)) { struct ksignal ksig; /* @@ -48,11 +44,17 @@ static int vhost_task_fn(void *data) clear_thread_flag(TIF_SIGPENDING); } + /* mb paired w/ vhost_task_stop */ + set_current_state(TASK_INTERRUPTIBLE); + + if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) { + __set_current_state(TASK_RUNNING); + break; + } + did_work = vtsk->fn(vtsk->data); - if (!did_work) { - set_current_state(TASK_INTERRUPTIBLE); + if (!did_work) schedule(); - } } complete(&vtsk->exited); diff --git a/tools/virtio/ringtest/.gitignore b/tools/virtio/ringtest/.gitignore new file mode 100644 index 000000000000..100b9e30c0f4 --- /dev/null +++ b/tools/virtio/ringtest/.gitignore @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +/noring +/ptr_ring +/ring +/virtio_ring_0_9 +/virtio_ring_inorder +/virtio_ring_poll diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index b68920d52750..d18dd317e27f 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -8,6 +8,7 @@ #ifndef MAIN_H #define MAIN_H +#include <assert.h> #include <stdbool.h> extern int param; @@ -95,6 +96,8 @@ extern unsigned ring_size; #define cpu_relax() asm ("rep; nop" ::: "memory") #elif defined(__s390x__) #define cpu_relax() barrier() +#elif defined(__aarch64__) +#define cpu_relax() asm ("yield" ::: "memory") #else #define cpu_relax() assert(0) #endif @@ -112,6 +115,8 @@ static inline void busy_wait(void) #if defined(__x86_64__) || defined(__i386__) #define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") +#elif defined(__aarch64__) +#define smp_mb() asm volatile("dmb ish" ::: "memory") #else /* * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized @@ -136,10 +141,16 @@ static inline void busy_wait(void) #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) #define smp_wmb() barrier() +#elif defined(__aarch64__) +#define smp_wmb() asm volatile("dmb ishst" ::: "memory") #else #define smp_wmb() smp_release() #endif +#ifndef __always_inline +#define __always_inline inline __attribute__((always_inline)) +#endif + static __always_inline void __read_once_size(const volatile void *p, void *res, int size) { diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README index 4fb9368bf751..0127ff0c54b0 100644 --- a/tools/virtio/virtio-trace/README +++ b/tools/virtio/virtio-trace/README @@ -95,7 +95,7 @@ Run 1) Enable ftrace in the guest <Example> - # echo 1 > /sys/kernel/debug/tracing/events/sched/enable + # echo 1 > /sys/kernel/tracing/events/sched/enable 2) Run trace agent in the guest This agent must be operated as root. diff --git a/tools/virtio/virtio-trace/trace-agent.c b/tools/virtio/virtio-trace/trace-agent.c index cdfe77c2b4c8..7e2d9bbf0b84 100644 --- a/tools/virtio/virtio-trace/trace-agent.c +++ b/tools/virtio/virtio-trace/trace-agent.c @@ -18,8 +18,9 @@ #define PIPE_DEF_BUFS 16 #define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS) #define PIPE_MAX_SIZE (1024*1024) -#define READ_PATH_FMT \ - "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw" +#define TRACEFS "/sys/kernel/tracing" +#define DEBUGFS "/sys/kernel/debug/tracing" +#define READ_PATH_FMT "%s/per_cpu/cpu%d/trace_pipe_raw" #define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d" #define CTL_PATH "/dev/virtio-ports/agent-ctl-path" @@ -120,9 +121,12 @@ static const char *make_path(int cpu_num, bool this_is_write_path) if (this_is_write_path) /* write(output) path */ ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num); - else + else { /* read(input) path */ - ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num); + ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, TRACEFS, cpu_num); + if (ret > 0 && access(buf, F_OK) != 0) + ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, DEBUGFS, cpu_num); + } if (ret <= 0) { pr_err("Failed to generate %s path(CPU#%d):%d\n", |