diff options
author | Dragos Tatulea <dtatulea@nvidia.com> | 2024-08-16 12:01:56 +0300 |
---|---|---|
committer | Michael S. Tsirkin <mst@redhat.com> | 2024-09-25 07:07:42 -0400 |
commit | 5eb8c7eb1ec74ac6b9e7337674cb7a33e82a1e68 (patch) | |
tree | 2e109fced0668b194871554e8526a25d12ef3354 /drivers/vdpa/mlx5 | |
parent | dcf3eac01f063df0a60ea779399331d2ac535784 (diff) |
vdpa/mlx5: Parallelize device resume
Currently device resume works on vqs serially. Building up on previous
changes that converted vq operations to the async api, this patch
parallelizes the device resume.
For 1 vDPA device x 32 VQs (16 VQPs) attached to a large VM (256 GB RAM,
32 CPUs x 2 threads per core), the device resume time is reduced from
~16 ms to ~4.5 ms.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Acked-by: Eugenio PĂ©rez <eperezma@redhat.com>
Message-Id: <20240816090159.1967650-8-dtatulea@nvidia.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Diffstat (limited to 'drivers/vdpa/mlx5')
-rw-r--r-- | drivers/vdpa/mlx5/net/mlx5_vnet.c | 40 |
1 files changed, 14 insertions, 26 deletions
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 17f74b1f0644..fc9fb8d9ac4f 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1675,10 +1675,15 @@ static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mv return suspend_vqs(ndev, mvq->index, 1); } -static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +static int resume_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs) { + struct mlx5_vdpa_virtqueue *mvq; int err; + if (start_vq >= ndev->mvdev.max_vqs) + return -EINVAL; + + mvq = &ndev->vqs[start_vq]; if (!mvq->initialized) return 0; @@ -1690,13 +1695,9 @@ static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq /* Due to a FW quirk we need to modify the VQ fields first then change state. * This should be fixed soon. After that, a single command can be used. */ - err = modify_virtqueues(ndev, mvq->index, 1, mvq->fw_state); - if (err) { - mlx5_vdpa_err(&ndev->mvdev, - "modify vq properties failed for vq %u, err: %d\n", - mvq->index, err); + err = modify_virtqueues(ndev, start_vq, num_vqs, mvq->fw_state); + if (err) return err; - } break; case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: if (!is_resumable(ndev)) { @@ -1712,25 +1713,12 @@ static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq return -EINVAL; } - err = modify_virtqueues(ndev, mvq->index, 1, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); - if (err) - mlx5_vdpa_err(&ndev->mvdev, "modify to resume failed for vq %u, err: %d\n", - mvq->index, err); - - return err; + return modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); } -static int resume_vqs(struct mlx5_vdpa_net *ndev) +static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { - int err = 0; - - for (int i = 0; i < ndev->cur_num_vqs; i++) { - int local_err = resume_vq(ndev, &ndev->vqs[i]); - - err = local_err ? local_err : err; - } - - return err; + return resume_vqs(ndev, mvq->index, 1); } static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) @@ -3080,7 +3068,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, return err; } - resume_vqs(ndev); + resume_vqs(ndev, 0, ndev->cur_num_vqs); return 0; } @@ -3204,7 +3192,7 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) teardown_vq_resources(ndev); if (ndev->setup) { - err = resume_vqs(ndev); + err = resume_vqs(ndev, 0, ndev->cur_num_vqs); if (err) { mlx5_vdpa_warn(mvdev, "failed to resume VQs\n"); goto err_driver; @@ -3628,7 +3616,7 @@ static int mlx5_vdpa_resume(struct vdpa_device *vdev) down_write(&ndev->reslock); mvdev->suspended = false; - err = resume_vqs(ndev); + err = resume_vqs(ndev, 0, ndev->cur_num_vqs); register_link_notifier(ndev); up_write(&ndev->reslock); |