diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-11 14:34:17 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-11 14:34:17 -0700 |
commit | 57b077939287835b9396a1c3b40d35609cf2fcb8 (patch) | |
tree | 32948b4a9b5cf26c7326d23367ee19f7573928a3 | |
parent | ce13266d97b198934e86166491bfa4938e96508f (diff) | |
parent | 8a7c3213db068135e816a6a517157de6443290d6 (diff) |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin:
- IRQ bypass support for vdpa and IFC
- MLX5 vdpa driver
- Endianness fixes for virtio drivers
- Misc other fixes
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (71 commits)
vdpa/mlx5: fix up endian-ness for mtu
vdpa: Fix pointer math bug in vdpasim_get_config()
vdpa/mlx5: Fix pointer math in mlx5_vdpa_get_config()
vdpa/mlx5: fix memory allocation failure checks
vdpa/mlx5: Fix uninitialised variable in core/mr.c
vdpa_sim: init iommu lock
virtio_config: fix up warnings on parisc
vdpa/mlx5: Add VDPA driver for supported mlx5 devices
vdpa/mlx5: Add shared memory registration code
vdpa/mlx5: Add support library for mlx5 VDPA implementation
vdpa/mlx5: Add hardware descriptive header file
vdpa: Modify get_vq_state() to return error code
net/vdpa: Use struct for set/get vq state
vdpa: remove hard coded virtq num
vdpasim: support batch updating
vhost-vdpa: support IOTLB batching hints
vhost-vdpa: support get/set backend features
vhost: generialize backend features setting/getting
vhost-vdpa: refine ioctl pre-processing
vDPA: dont change vq irq after DRIVER_OK
...
60 files changed, 3886 insertions, 408 deletions
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 351aee52aca6..a6c4bb6c2c01 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -385,7 +385,7 @@ static irqreturn_t vu_req_interrupt(int irq, void *data) } break; case VHOST_USER_SLAVE_IOTLB_MSG: - /* not supported - VIRTIO_F_IOMMU_PLATFORM */ + /* not supported - VIRTIO_F_ACCESS_PLATFORM */ case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */ default: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 12ea77f99ff3..232dd2f9a081 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10667,11 +10667,17 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + int ret; irqfd->producer = prod; + kvm_arch_start_assignment(irqfd->kvm); + ret = kvm_x86_ops.update_pi_irte(irqfd->kvm, + prod->irq, irqfd->gsi, 1); + + if (ret) + kvm_arch_end_assignment(irqfd->kvm); - return kvm_x86_ops.update_pi_irte(irqfd->kvm, - prod->irq, irqfd->gsi, 1); + return ret; } void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, @@ -10694,6 +10700,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, if (ret) printk(KERN_INFO "irq bypass consumer (token %p) unregistration" " fails: %d\n", irqfd->consumer.token, ret); + + kvm_arch_end_assignment(irqfd->kvm); } int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index 0c66d6193ca2..080955a1dd9c 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -204,8 +204,8 @@ static int virtcrypto_update_status(struct virtio_crypto *vcrypto) u32 status; int err; - virtio_cread(vcrypto->vdev, - struct virtio_crypto_config, status, &status); + virtio_cread_le(vcrypto->vdev, + struct virtio_crypto_config, status, &status); /* * Unknown status bits would be a host error and the driver @@ -323,31 +323,31 @@ static int virtcrypto_probe(struct virtio_device *vdev) if (!vcrypto) return -ENOMEM; - virtio_cread(vdev, struct virtio_crypto_config, + virtio_cread_le(vdev, struct virtio_crypto_config, max_dataqueues, &max_data_queues); if (max_data_queues < 1) max_data_queues = 1; - virtio_cread(vdev, struct virtio_crypto_config, - max_cipher_key_len, &max_cipher_key_len); - virtio_cread(vdev, struct virtio_crypto_config, - max_auth_key_len, &max_auth_key_len); - virtio_cread(vdev, struct virtio_crypto_config, - max_size, &max_size); - virtio_cread(vdev, struct virtio_crypto_config, - crypto_services, &crypto_services); - virtio_cread(vdev, struct virtio_crypto_config, - cipher_algo_l, &cipher_algo_l); - virtio_cread(vdev, struct virtio_crypto_config, - cipher_algo_h, &cipher_algo_h); - virtio_cread(vdev, struct virtio_crypto_config, - hash_algo, &hash_algo); - virtio_cread(vdev, struct virtio_crypto_config, - mac_algo_l, &mac_algo_l); - virtio_cread(vdev, struct virtio_crypto_config, - mac_algo_h, &mac_algo_h); - virtio_cread(vdev, struct virtio_crypto_config, - aead_algo, &aead_algo); + virtio_cread_le(vdev, struct virtio_crypto_config, + max_cipher_key_len, &max_cipher_key_len); + virtio_cread_le(vdev, struct virtio_crypto_config, + max_auth_key_len, &max_auth_key_len); + virtio_cread_le(vdev, struct virtio_crypto_config, + max_size, &max_size); + virtio_cread_le(vdev, struct virtio_crypto_config, + crypto_services, &crypto_services); + virtio_cread_le(vdev, struct virtio_crypto_config, + cipher_algo_l, &cipher_algo_l); + virtio_cread_le(vdev, struct virtio_crypto_config, + cipher_algo_h, &cipher_algo_h); + virtio_cread_le(vdev, struct virtio_crypto_config, + hash_algo, &hash_algo); + virtio_cread_le(vdev, struct virtio_crypto_config, + mac_algo_l, &mac_algo_l); + virtio_cread_le(vdev, struct virtio_crypto_config, + mac_algo_h, &mac_algo_h); + virtio_cread_le(vdev, struct virtio_crypto_config, + aead_algo, &aead_algo); /* Add virtio crypto device to global table */ err = virtcrypto_devmgr_add_dev(vcrypto); diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 0a5c8cf409fb..4d944a0dff3e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -39,8 +39,8 @@ static void virtio_gpu_config_changed_work_func(struct work_struct *work) u32 events_read, events_clear = 0; /* read the config space */ - virtio_cread(vgdev->vdev, struct virtio_gpu_config, - events_read, &events_read); + virtio_cread_le(vgdev->vdev, struct virtio_gpu_config, + events_read, &events_read); if (events_read & VIRTIO_GPU_EVENT_DISPLAY) { if (vgdev->has_edid) virtio_gpu_cmd_get_edids(vgdev); @@ -49,8 +49,8 @@ static void virtio_gpu_config_changed_work_func(struct work_struct *work) drm_helper_hpd_irq_event(vgdev->ddev); events_clear |= VIRTIO_GPU_EVENT_DISPLAY; } - virtio_cwrite(vgdev->vdev, struct virtio_gpu_config, - events_clear, &events_clear); + virtio_cwrite_le(vgdev->vdev, struct virtio_gpu_config, + events_clear, &events_clear); } static void virtio_gpu_init_vq(struct virtio_gpu_queue *vgvq, @@ -165,8 +165,8 @@ int virtio_gpu_init(struct drm_device *dev) } /* get display info */ - virtio_cread(vgdev->vdev, struct virtio_gpu_config, - num_scanouts, &num_scanouts); + virtio_cread_le(vgdev->vdev, struct virtio_gpu_config, + num_scanouts, &num_scanouts); vgdev->num_scanouts = min_t(uint32_t, num_scanouts, VIRTIO_GPU_MAX_SCANOUTS); if (!vgdev->num_scanouts) { @@ -176,8 +176,8 @@ int virtio_gpu_init(struct drm_device *dev) } DRM_INFO("number of scanouts: %d\n", num_scanouts); - virtio_cread(vgdev->vdev, struct virtio_gpu_config, - num_capsets, &num_capsets); + virtio_cread_le(vgdev->vdev, struct virtio_gpu_config, + num_capsets, &num_capsets); DRM_INFO("number of cap sets: %d\n", num_capsets); virtio_gpu_modeset_init(vgdev); diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 346cef5ce251..2cdd3cd9ce75 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -141,7 +141,7 @@ static int virtio_gpu_object_shmem_init(struct virtio_gpu_device *vgdev, struct virtio_gpu_mem_entry **ents, unsigned int *nents) { - bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); + bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev); struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo); struct scatterlist *sg; int si, ret; diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 9e663a5d9952..53af60d484a4 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -599,7 +599,7 @@ void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(objs->objs[0]); struct virtio_gpu_transfer_to_host_2d *cmd_p; struct virtio_gpu_vbuffer *vbuf; - bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); + bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev); struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo); if (use_dma_api) @@ -1015,7 +1015,7 @@ void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(objs->objs[0]); struct virtio_gpu_transfer_host_3d *cmd_p; struct virtio_gpu_vbuffer *vbuf; - bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); + bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev); struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo); if (use_dma_api) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index f6f07489a9aa..b4da396cce60 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1010,8 +1010,8 @@ static int viommu_probe(struct virtio_device *vdev) if (ret) return ret; - virtio_cread(vdev, struct virtio_iommu_config, page_size_mask, - &viommu->pgsize_bitmap); + virtio_cread_le(vdev, struct virtio_iommu_config, page_size_mask, + &viommu->pgsize_bitmap); if (!viommu->pgsize_bitmap) { ret = -EINVAL; @@ -1022,25 +1022,25 @@ static int viommu_probe(struct virtio_device *vdev) viommu->last_domain = ~0U; /* Optional features */ - virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, - struct virtio_iommu_config, input_range.start, - &input_start); + virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, + struct virtio_iommu_config, input_range.start, + &input_start); - virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, - struct virtio_iommu_config, input_range.end, - &input_end); + virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, + struct virtio_iommu_config, input_range.end, + &input_end); - virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, - struct virtio_iommu_config, domain_range.start, - &viommu->first_domain); + virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, + struct virtio_iommu_config, domain_range.start, + &viommu->first_domain); - virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, - struct virtio_iommu_config, domain_range.end, - &viommu->last_domain); + virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, + struct virtio_iommu_config, domain_range.end, + &viommu->last_domain); - virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE, - struct virtio_iommu_config, probe_size, - &viommu->probe_size); + virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_PROBE, + struct virtio_iommu_config, probe_size, + &viommu->probe_size); viommu->geometry = (struct iommu_domain_geometry) { .aperture_start = input_start, diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 6fa8fe5ef160..0ada48edf749 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2264,12 +2264,13 @@ static void virtnet_update_settings(struct virtnet_info *vi) if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) return; - speed = virtio_cread32(vi->vdev, offsetof(struct virtio_net_config, - speed)); + virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); + if (ethtool_validate_speed(speed)) vi->speed = speed; - duplex = virtio_cread8(vi->vdev, offsetof(struct virtio_net_config, - duplex)); + + virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); + if (ethtool_validate_duplex(duplex)) vi->duplex = duplex; } diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c index 5e3d07b47e0c..726c7354d465 100644 --- a/drivers/nvdimm/virtio_pmem.c +++ b/drivers/nvdimm/virtio_pmem.c @@ -58,9 +58,9 @@ static int virtio_pmem_probe(struct virtio_device *vdev) goto out_err; } - virtio_cread(vpmem->vdev, struct virtio_pmem_config, + virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, start, &vpmem->start); - virtio_cread(vpmem->vdev, struct virtio_pmem_config, + virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, size, &vpmem->size); res.start = vpmem->start; diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index 5739a9669b29..bbc4e71a16ff 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -625,7 +625,10 @@ static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring, vdev_id = VIRTIO_ID_NET; hdr_len = sizeof(struct virtio_net_hdr); config = &fifo->vdev[vdev_id]->config.net; - if (ntohs(hdr.len) > config->mtu + + /* A legacy-only interface for now. */ + if (ntohs(hdr.len) > + __virtio16_to_cpu(virtio_legacy_is_little_endian(), + config->mtu) + MLXBF_TMFIFO_NET_L2_OVERHEAD) return; } else { @@ -1231,8 +1234,12 @@ static int mlxbf_tmfifo_probe(struct platform_device *pdev) /* Create the network vdev. */ memset(&net_config, 0, sizeof(net_config)); - net_config.mtu = ETH_DATA_LEN; - net_config.status = VIRTIO_NET_S_LINK_UP; + + /* A legacy-only interface for now. */ + net_config.mtu = __cpu_to_virtio16(virtio_legacy_is_little_endian(), + ETH_DATA_LEN); + net_config.status = __cpu_to_virtio16(virtio_legacy_is_little_endian(), + VIRTIO_NET_S_LINK_UP); mlxbf_tmfifo_get_cfg_mac(net_config.mac); rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_NET, MLXBF_TMFIFO_NET_FEATURES, &net_config, diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 8cc003aa4d00..ca1c39b6f631 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -754,14 +754,14 @@ static struct scsi_host_template virtscsi_host_template = { #define virtscsi_config_get(vdev, fld) \ ({ \ - typeof(((struct virtio_scsi_config *)0)->fld) __val; \ + __virtio_native_type(struct virtio_scsi_config, fld) __val; \ virtio_cread(vdev, struct virtio_scsi_config, fld, &__val); \ __val; \ }) #define virtscsi_config_set(vdev, fld, val) \ do { \ - typeof(((struct virtio_scsi_config *)0)->fld) __val = (val); \ + __virtio_native_type(struct virtio_scsi_config, fld) __val = (val); \ virtio_cwrite(vdev, struct virtio_scsi_config, fld, &__val); \ } while(0) diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index d93a69b12f81..4271c408103e 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -29,4 +29,23 @@ config IFCVF To compile this driver as a module, choose M here: the module will be called ifcvf. +config MLX5_VDPA + bool "MLX5 VDPA support library for ConnectX devices" + depends on MLX5_CORE + default n + help + Support library for Mellanox VDPA drivers. Provides code that is + common for all types of VDPA drivers. The following drivers are planned: + net, block. + +config MLX5_VDPA_NET + tristate "vDPA driver for ConnectX devices" + depends on MLX5_VDPA + default n + help + VDPA network driver for ConnectX6 and newer. Provides offloading + of virtio net datapath such that descriptors put on the ring will + be executed by the hardware. It also supports a variety of stateless + offloads depending on the actual device used and firmware version. + endif # VDPA diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile index 8bbb686ca7a2..d160e9b63a66 100644 --- a/drivers/vdpa/Makefile +++ b/drivers/vdpa/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_VDPA) += vdpa.o obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ obj-$(CONFIG_IFCVF) += ifcvf/ +obj-$(CONFIG_MLX5_VDPA) += mlx5/ diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index 94bf0328b68d..f2a128e56de5 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -272,7 +272,7 @@ static int ifcvf_config_features(struct ifcvf_hw *hw) return 0; } -u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) +u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) { struct ifcvf_lm_cfg __iomem *ifcvf_lm; void __iomem *avail_idx_addr; @@ -287,7 +287,7 @@ u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) return last_avail_idx; } -int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num) +int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num) { struct ifcvf_lm_cfg __iomem *ifcvf_lm; void __iomem *avail_idx_addr; diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index f4554412e607..08f267a2aafe 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -29,7 +29,7 @@ (1ULL << VIRTIO_F_VERSION_1) | \ (1ULL << VIRTIO_NET_F_STATUS) | \ (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ - (1ULL << VIRTIO_F_IOMMU_PLATFORM) | \ + (1ULL << VIRTIO_F_ACCESS_PLATFORM) | \ (1ULL << VIRTIO_NET_F_MRG_RXBUF)) /* Only one queue pair for now. */ @@ -116,7 +116,7 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status); void io_write64_twopart(u64 val, u32 *lo, u32 *hi); void ifcvf_reset(struct ifcvf_hw *hw); u64 ifcvf_get_features(struct ifcvf_hw *hw); -u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); -int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num); +u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); +int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num); struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw); #endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index f5a60c14b979..076d7ac5e723 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -50,8 +50,10 @@ static void ifcvf_free_irq(struct ifcvf_adapter *adapter, int queues) int i; - for (i = 0; i < queues; i++) + for (i = 0; i < queues; i++) { devm_free_irq(&pdev->dev, vf->vring[i].irq, &vf->vring[i]); + vf->vring[i].irq = -EINVAL; + } ifcvf_free_irq_vectors(pdev); } @@ -235,19 +237,21 @@ static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev) return IFCVF_QUEUE_MAX; } -static u64 ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid) +static int ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid, + struct vdpa_vq_state *state) { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - return ifcvf_get_vq_state(vf, qid); + state->avail_index = ifcvf_get_vq_state(vf, qid); + return 0; } static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid, - u64 num) + const struct vdpa_vq_state *state) { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - return ifcvf_set_vq_state(vf, qid, num); + return ifcvf_set_vq_state(vf, qid, state->avail_index); } static void ifcvf_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid, @@ -352,6 +356,14 @@ static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, vf->config_cb.private = cb->private; } +static int ifcvf_vdpa_get_vq_irq(struct vdpa_device *vdpa_dev, + u16 qid) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return vf->vring[qid].irq; +} + /* * IFCVF currently does't have on-chip IOMMU, so not * implemented set_map()/dma_map()/dma_unmap() @@ -369,6 +381,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = { .get_vq_ready = ifcvf_vdpa_get_vq_ready, .set_vq_num = ifcvf_vdpa_set_vq_num, .set_vq_address = ifcvf_vdpa_set_vq_address, + .get_vq_irq = ifcvf_vdpa_get_vq_irq, .kick_vq = ifcvf_vdpa_kick_vq, .get_generation = ifcvf_vdpa_get_generation, .get_device_id = ifcvf_vdpa_get_device_id, @@ -384,7 +397,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct device *dev = &pdev->dev; struct ifcvf_adapter *adapter; struct ifcvf_hw *vf; - int ret; + int ret, i; ret = pcim_enable_device(pdev); if (ret) { @@ -420,7 +433,8 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) } adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, - dev, &ifc_vdpa_ops); + dev, &ifc_vdpa_ops, + IFCVF_MAX_QUEUE_PAIRS * 2); if (adapter == NULL) { IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); return -ENOMEM; @@ -441,6 +455,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err; } + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) + vf->vring[i].irq = -EINVAL; + ret = vdpa_register_device(&adapter->vdpa); if (ret) { IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus"); diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile new file mode 100644 index 000000000000..89a5bededc9f --- /dev/null +++ b/drivers/vdpa/mlx5/Makefile @@ -0,0 +1,4 @@ +subdir-ccflags-y += -I$(srctree)/drivers/vdpa/mlx5/core + +obj-$(CONFIG_MLX5_VDPA_NET) += mlx5_vdpa.o +mlx5_vdpa-$(CONFIG_MLX5_VDPA_NET) += net/main.o net/mlx5_vnet.o core/resources.o core/mr.o diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h new file mode 100644 index 000000000000..5c92a576edae --- /dev/null +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#ifndef __MLX5_VDPA_H__ +#define __MLX5_VDPA_H__ + +#include <linux/vdpa.h> +#include <linux/mlx5/driver.h> + +struct mlx5_vdpa_direct_mr { + u64 start; + u64 end; + u32 perm; + struct mlx5_core_mkey mr; + struct sg_table sg_head; + int log_size; + int nsg; + struct list_head list; + u64 offset; +}; + +struct mlx5_vdpa_mr { + struct mlx5_core_mkey mkey; + + /* list of direct MRs descendants of this indirect mr */ + struct list_head head; + unsigned long num_directs; + unsigned long num_klms; + bool initialized; + + /* serialize mkey creation and destruction */ + struct mutex mkey_mtx; +}; + +struct mlx5_vdpa_resources { + u32 pdn; + struct mlx5_uars_page *uar; + void __iomem *kick_addr; + u16 uid; + u32 null_mkey; + bool valid; +}; + +struct mlx5_vdpa_dev { + struct vdpa_device vdev; + struct mlx5_core_dev *mdev; + struct mlx5_vdpa_resources res; + + u64 mlx_features; + u64 actual_features; + u8 status; + u32 max_vqs; + u32 generation; + + struct mlx5_vdpa_mr mr; +}; + +int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); +int mlx5_vdpa_dealloc_pd(struct mlx5_vdpa_dev *dev, u32 pdn, u16 uid); +int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey); +int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn); +void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn); +int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn); +void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn); +int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn); +void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn); +int mlx5_vdpa_alloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 *tdn); +void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn); +int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev); +void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev); +int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in, + int inlen); +int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey); +int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, + bool *change_map); +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb); +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev); + +#define mlx5_vdpa_warn(__dev, format, ...) \ + dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ + current->pid, ##__VA_ARGS__) + +#define mlx5_vdpa_info(__dev, format, ...) \ + dev_info((__dev)->mdev->device, "%s:%d:(pid %d): " format, __func__, __LINE__, \ + current->pid, ##__VA_ARGS__) + +#define mlx5_vdpa_dbg(__dev, format, ...) \ + dev_debug((__dev)->mdev->device, "%s:%d:(pid %d): " format, __func__, __LINE__, \ + current->pid, ##__VA_ARGS__) + +#endif /* __MLX5_VDPA_H__ */ diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h b/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h new file mode 100644 index 000000000000..f6f57a29b38e --- /dev/null +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#ifndef __MLX5_VDPA_IFC_H_ +#define __MLX5_VDPA_IFC_H_ + +#include <linux/mlx5/mlx5_ifc.h> + +enum { + MLX5_VIRTIO_Q_EVENT_MODE_NO_MSIX_MODE = 0x0, + MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE = 0x1, + MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE = 0x2, +}; + +enum { + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = 0x1, // do I check this caps? + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = 0x2, +}; + +enum { + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0, + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1, +}; + +struct mlx5_ifc_virtio_q_bits { + u8 virtio_q_type[0x8]; + u8 reserved_at_8[0x5]; + u8 event_mode[0x3]; + u8 queue_index[0x10]; + + u8 full_emulation[0x1]; + u8 virtio_version_1_0[0x1]; + u8 reserved_at_22[0x2]; + u8 offload_type[0x4]; + u8 event_qpn_or_msix[0x18]; + + u8 doorbell_stride_index[0x10]; + u8 queue_size[0x10]; + + u8 device_emulation_id[0x20]; + + u8 desc_addr[0x40]; + + u8 used_addr[0x40]; + + u8 available_addr[0x40]; + + u8 virtio_q_mkey[0x20]; + + u8 max_tunnel_desc[0x10]; + u8 reserved_at_170[0x8]; + u8 error_type[0x8]; + + u8 umem_1_id[0x20]; + + u8 umem_1_size[0x20]; + + u8 umem_1_offset[0x40]; + + u8 umem_2_id[0x20]; + + u8 umem_2_size[0x20]; + + u8 umem_2_offset[0x40]; + + u8 umem_3_id[0x20]; + + u8 umem_3_size[0x20]; + + u8 umem_3_offset[0x40]; + + u8 counter_set_id[0x20]; + + u8 reserved_at_320[0x8]; + u8 pd[0x18]; + + u8 reserved_at_340[0xc0]; +}; + +struct mlx5_ifc_virtio_net_q_object_bits { + u8 modify_field_select[0x40]; + + u8 reserved_at_40[0x20]; + + u8 vhca_id[0x10]; + u8 reserved_at_70[0x10]; + + u8 queue_feature_bit_mask_12_3[0xa]; + u8 dirty_bitmap_dump_enable[0x1]; + u8 vhost_log_page[0x5]; + u8 reserved_at_90[0xc]; + u8 state[0x4]; + + u8 reserved_at_a0[0x5]; + u8 queue_feature_bit_mask_2_0[0x3]; + u8 tisn_or_qpn[0x18]; + + u8 dirty_bitmap_mkey[0x20]; + + u8 dirty_bitmap_size[0x20]; + + u8 dirty_bitmap_addr[0x40]; + + u8 hw_available_index[0x10]; + u8 hw_used_index[0x10]; + + u8 reserved_at_160[0xa0]; + + struct mlx5_ifc_virtio_q_bits virtio_q_context; +}; + +struct mlx5_ifc_create_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +struct mlx5_ifc_create_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_destroy_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_destroy_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_query_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; +}; + +struct mlx5_ifc_query_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +enum { + MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, + MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, + MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, +}; + +enum { + MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT = 0x0, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY = 0x1, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND = 0x2, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3, +}; + +enum { + MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0, + MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1, +}; + +struct mlx5_ifc_modify_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +struct mlx5_ifc_modify_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +#endif /* __MLX5_VDPA_IFC_H_ */ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c new file mode 100644 index 000000000000..ef1c550f8266 --- /dev/null +++ b/drivers/vdpa/mlx5/core/mr.c @@ -0,0 +1,486 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include <linux/vdpa.h> +#include <linux/gcd.h> +#include <linux/string.h> +#include <linux/mlx5/qp.h> +#include "mlx5_vdpa.h" + +/* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */ +#define MLX5_DIV_ROUND_UP_POW2(_n, _s) \ +({ \ + u64 __s = _s; \ + u64 _res; \ + _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \ + _res; \ +}) + +static int get_octo_len(u64 len, int page_shift) +{ + u64 page_size = 1ULL << page_shift; + int npages; + + npages = ALIGN(len, page_size) >> page_shift; + return (npages + 1) / 2; +} + +static void fill_sg(struct mlx5_vdpa_direct_mr *mr, void *in) +{ + struct scatterlist *sg; + __be64 *pas; + int i; + + pas = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for_each_sg(mr->sg_head.sgl, sg, mr->nsg, i) + (*pas) = cpu_to_be64(sg_dma_address(sg)); +} + +static void mlx5_set_access_mode(void *mkc, int mode) +{ + MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2); +} + +static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) +{ + struct scatterlist *sg; + int i; + + for_each_sg(mr->sg_head.sgl, sg, mr->nsg, i) + mtt[i] = cpu_to_be64(sg_dma_address(sg)); +} + +static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) +{ + int inlen; + void *mkc; + void *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); + fill_sg(mr, in); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO)); + MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO)); + mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); + MLX5_SET64(mkc, mkc, start_addr, mr->offset); + MLX5_SET64(mkc, mkc, len, mr->end - mr->start); + MLX5_SET(mkc, mkc, log_page_size, mr->log_size); + MLX5_SET(mkc, mkc, translations_octword_size, + get_octo_len(mr->end - mr->start, mr->log_size)); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + get_octo_len(mr->end - mr->start, mr->log_size)); + populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); + err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); + kvfree(in); + if (err) { + mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); + return err; + } + + return 0; +} + +static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) +{ + mlx5_vdpa_destroy_mkey(mvdev, &mr->mr); +} + +static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) +{ + return max_t(u64, map->start, mr->start); +} + +static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) +{ + return min_t(u64, map->last + 1, mr->end); +} + +static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) +{ + return map_end(map, mr) - map_start(map, mr); +} + +#define MLX5_VDPA_INVALID_START_ADDR ((u64)-1) +#define MLX5_VDPA_INVALID_LEN ((u64)-1) + +static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey) +{ + struct mlx5_vdpa_direct_mr *s; + + s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); + if (!s) + return MLX5_VDPA_INVALID_START_ADDR; + + return s->start; +} + +static u64 indir_len(struct mlx5_vdpa_mr *mkey) +{ + struct mlx5_vdpa_direct_mr *s; + struct mlx5_vdpa_direct_mr *e; + + s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); + if (!s) + return MLX5_VDPA_INVALID_LEN; + + e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list); + + return e->end - s->start; +} + +#define LOG_MAX_KLM_SIZE 30 +#define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE) + +static u32 klm_bcount(u64 size) +{ + return (u32)size; +} + +static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in) +{ + struct mlx5_vdpa_direct_mr *dmr; + struct mlx5_klm *klmarr; + struct mlx5_klm *klm; + bool first = true; + u64 preve; + int i; + + klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + i = 0; + list_for_each_entry(dmr, &mkey->head, list) { +again: + klm = &klmarr[i++]; + if (first) { + preve = dmr->start; + first = false; + } + + if (preve == dmr->start) { + klm->key = cpu_to_be32(dmr->mr.key); + klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start)); + preve = dmr->end; + } else { + klm->key = cpu_to_be32(mvdev->res.null_mkey); + klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve)); + preve = dmr->start; + goto again; + } + } +} + +static int klm_byte_size(int nklms) +{ + return 16 * ALIGN(nklms, 4); +} + +static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) +{ + int inlen; + void *mkc; + void *in; + int err; + u64 start; + u64 len; + + start = indir_start_addr(mr); + len = indir_len(mr); + if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN) + return -EINVAL; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); + MLX5_SET64(mkc, mkc, start_addr, start); + MLX5_SET64(mkc, mkc, len, len); + MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms); + fill_indir(mvdev, mr, in); + err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen); + kfree(in); + return err; +} + +static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey) +{ + mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey); +} + +static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr, + struct vhost_iotlb *iotlb) +{ + struct vhost_iotlb_map *map; + unsigned long lgcd = 0; + int log_entity_size; + unsigned long size; + u64 start = 0; + int err; + struct page *pg; + unsigned int nsg; + int sglen; + u64 pa; + u64 paend; + struct scatterlist *sg; + struct device *dma = mvdev->mdev->device; + int ret; + + for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); + map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { + size = maplen(map, mr); + lgcd = gcd(lgcd, size); + start += size; + } + log_entity_size = ilog2(lgcd); + + sglen = 1 << log_entity_size; + nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size); + + err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL); + if (err) + return err; + + sg = mr->sg_head.sgl; + for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); + map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { + paend = map->addr + maplen(map, mr); + for (pa = map->addr; pa < paend; pa += sglen) { + pg = pfn_to_page(__phys_to_pfn(pa)); + if (!sg) { + mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n", + map->start, map->last + 1); + err = -ENOMEM; + goto err_map; + } + sg_set_page(sg, pg, sglen, 0); + sg = sg_next(sg); + if (!sg) + goto done; + } + } +done: + mr->log_size = log_entity_size; + mr->nsg = nsg; + ret = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); + if (!ret) + goto err_map; + + err = create_direct_mr(mvdev, mr); + if (err) + goto err_direct; + + return 0; + +err_direct: + dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); +err_map: + sg_free_table(&mr->sg_head); + return err; +} + +static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) +{ + struct device *dma = mvdev->mdev->device; + + destroy_direct_mr(mvdev, mr); + dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); + sg_free_table(&mr->sg_head); +} + +static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm, + struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + struct mlx5_vdpa_direct_mr *dmr; + struct mlx5_vdpa_direct_mr *n; + LIST_HEAD(tmp); + u64 st; + u64 sz; + int err; + int i = 0; + + st = start; + while (size) { + sz = (u32)min_t(u64, MAX_KLM_SIZE, size); + dmr = kzalloc(sizeof(*dmr), GFP_KERNEL); + if (!dmr) { + err = -ENOMEM; + goto err_alloc; + } + + dmr->start = st; + dmr->end = st + sz; + dmr->perm = perm; + err = map_direct_mr(mvdev, dmr, iotlb); + if (err) { + kfree(dmr); + goto err_alloc; + } + + list_add_tail(&dmr->list, &tmp); + size -= sz; + mr->num_directs++; + mr->num_klms++; + st += sz; + i++; + } + list_splice_tail(&tmp, &mr->head); + return 0; + +err_alloc: + list_for_each_entry_safe(dmr, n, &mr->head, list) { + list_del_init(&dmr->list); + unmap_direct_mr(mvdev, dmr); + kfree(dmr); + } + return err; +} + +/* The iotlb pointer contains a list of maps. Go over the maps, possibly + * merging mergeable maps, and create direct memory keys that provide the + * device access to memory. The direct mkeys are then referred to by the + * indirect memory key that provides access to the enitre address space given + * by iotlb. + */ +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + struct mlx5_vdpa_direct_mr *dmr; + struct mlx5_vdpa_direct_mr *n; + struct vhost_iotlb_map *map; + u32 pperm = U16_MAX; + u64 last = U64_MAX; + u64 ps = U64_MAX; + u64 pe = U64_MAX; + u64 start = 0; + int err = 0; + int nnuls; + + if (mr->initialized) + return 0; + + INIT_LIST_HEAD(&mr->head); + for (map = vhost_iotlb_itree_first(iotlb, start, last); map; + map = vhost_iotlb_itree_next(map, start, last)) { + start = map->start; + if (pe == map->start && pperm == map->perm) { + pe = map->last + 1; + } else { + if (ps != U64_MAX) { + if (pe < map->start) { + /* We have a hole in the map. Check how + * many null keys are required to fill it. + */ + nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe, + LOG_MAX_KLM_SIZE); + mr->num_klms += nnuls; + } + err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); + if (err) + goto err_chain; + } + ps = map->start; + pe = map->last + 1; + pperm = map->perm; + } + } + err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); + if (err) + goto err_chain; + + /* Create the memory key that defines the guests's address space. This + * memory key refers to the direct keys that contain the MTT + * translations + */ + err = create_indirect_key(mvdev, mr); + if (err) + goto err_chain; + + mr->initialized = true; + return 0; + +err_chain: + list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { + list_del_init(&dmr->list); + unmap_direct_mr(mvdev, dmr); + kfree(dmr); + } + return err; +} + +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + int err; + + mutex_lock(&mr->mkey_mtx); + err = _mlx5_vdpa_create_mr(mvdev, iotlb); + mutex_unlock(&mr->mkey_mtx); + return err; +} + +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + struct mlx5_vdpa_direct_mr *dmr; + struct mlx5_vdpa_direct_mr *n; + + mutex_lock(&mr->mkey_mtx); + if (!mr->initialized) + goto out; + + destroy_indirect_key(mvdev, mr); + list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { + list_del_init(&dmr->list); + unmap_direct_mr(mvdev, dmr); + kfree(dmr); + } + memset(mr, 0, sizeof(*mr)); + mr->initialized = false; +out: + mutex_unlock(&mr->mkey_mtx); +} + +static bool map_empty(struct vhost_iotlb *iotlb) +{ + return !vhost_iotlb_itree_first(iotlb, 0, U64_MAX); +} + +int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, + bool *change_map) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + int err = 0; + + *change_map = false; + if (map_empty(iotlb)) { + mlx5_vdpa_destroy_mr(mvdev); + return 0; + } + mutex_lock(&mr->mkey_mtx); + if (mr->initialized) { + mlx5_vdpa_info(mvdev, "memory map update\n"); + *change_map = true; + } + if (!*change_map) + err = _mlx5_vdpa_create_mr(mvdev, iotlb); + mutex_unlock(&mr->mkey_mtx); + + return err; +} diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c new file mode 100644 index 000000000000..96e6421c5d1c --- /dev/null +++ b/drivers/vdpa/mlx5/core/resources.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include <linux/mlx5/driver.h> +#include "mlx5_vdpa.h" + +static int alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid) +{ + struct mlx5_core_dev *mdev = dev->mdev; + + u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; + int err; + + MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); + MLX5_SET(alloc_pd_in, in, uid, uid); + + err = mlx5_cmd_exec_inout(mdev, alloc_pd, in, out); + if (!err) + *pdn = MLX5_GET(alloc_pd_out, out, pd); + + return err; +} + +static int dealloc_pd(struct mlx5_vdpa_dev *dev, u32 pdn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; + struct mlx5_core_dev *mdev = dev->mdev; + + MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, in, pd, pdn); + MLX5_SET(dealloc_pd_in, in, uid, uid); + return mlx5_cmd_exec_in(mdev, dealloc_pd, in); +} + +static int get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey) +{ + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; + struct mlx5_core_dev *mdev = dev->mdev; + int err; + + MLX5_SET(query_special_contexts_in, in, opcode, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + err = mlx5_cmd_exec_inout(mdev, query_special_contexts, in, out); + if (!err) + *null_mkey = MLX5_GET(query_special_contexts_out, out, null_mkey); + return err; +} + +static int create_uctx(struct mlx5_vdpa_dev *mvdev, u16 *uid) +{ + u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {}; + int inlen; + void *in; + int err; + + /* 0 means not supported */ + if (!MLX5_CAP_GEN(mvdev->mdev, log_max_uctx)) + return -EOPNOTSUPP; + + inlen = MLX5_ST_SZ_BYTES(create_uctx_in); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); + MLX5_SET(create_uctx_in, in, uctx.cap, MLX5_UCTX_CAP_RAW_TX); + + err = mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out)); + kfree(in); + if (!err) + *uid = MLX5_GET(create_uctx_out, out, uid); + + return err; +} + +static void destroy_uctx(struct mlx5_vdpa_dev *mvdev, u32 uid) +{ + u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {}; + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {}; + + MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); + MLX5_SET(destroy_uctx_in, in, uid, uid); + + mlx5_cmd_exec(mvdev->mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn) +{ + u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {}; + int err; + + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + MLX5_SET(create_tis_in, in, uid, mvdev->res.uid); + err = mlx5_cmd_exec_inout(mvdev->mdev, create_tis, in, out); + if (!err) + *tisn = MLX5_GET(create_tis_out, out, tisn); + + return err; +} + +void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {}; + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + mlx5_cmd_exec_in(mvdev->mdev, destroy_tis, in); +} + +int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn) +{ + u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {}; + int err; + + MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); + err = mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out)); + if (!err) + *rqtn = MLX5_GET(create_rqt_out, out, rqtn); + + return err; +} + +void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; + + MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); + mlx5_cmd_exec_in(mvdev->mdev, destroy_rqt, in); +} + +int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn) +{ + u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; + int err; + + MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); + err = mlx5_cmd_exec_inout(mvdev->mdev, create_tir, in, out); + if (!err) + *tirn = MLX5_GET(create_tir_out, out, tirn); + + return err; +} + +void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; + + MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_tir_in, in, tirn, tirn); + mlx5_cmd_exec_in(mvdev->mdev, destroy_tir, in); +} + +int mlx5_vdpa_alloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 *tdn) +{ + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {}; + int err; + + MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + MLX5_SET(alloc_transport_domain_in, in, uid, mvdev->res.uid); + + err = mlx5_cmd_exec_inout(mvdev->mdev, alloc_transport_domain, in, out); + if (!err) + *tdn = MLX5_GET(alloc_transport_domain_out, out, transport_domain); + + return err; +} + +void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {}; + + MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, in, uid, mvdev->res.uid); + MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); + mlx5_cmd_exec_in(mvdev->mdev, dealloc_transport_domain, in); +} + +int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in, + int inlen) +{ + u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {}; + u32 mkey_index; + void *mkc; + int err; + + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); + + err = mlx5_cmd_exec(mvdev->mdev, in, inlen, lout, sizeof(lout)); + if (err) + return err; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); + mkey->iova = MLX5_GET64(mkc, mkc, start_addr); + mkey->size = MLX5_GET64(mkc, mkc, len); + mkey->key |= mlx5_idx_to_mkey(mkey_index); + mkey->pd = MLX5_GET(mkc, mkc, pd); + return 0; +} + +int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey) +{ + u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {}; + + MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); + return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in); +} + +int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) +{ + u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset); + struct mlx5_vdpa_resources *res = &mvdev->res; + struct mlx5_core_dev *mdev = mvdev->mdev; + u64 kick_addr; + int err; + + if (res->valid) { + mlx5_vdpa_warn(mvdev, "resources already allocated\n"); + return -EINVAL; + } + mutex_init(&mvdev->mr.mkey_mtx); + res->uar = mlx5_get_uars_page(mdev); + if (IS_ERR(res->uar)) { + err = PTR_ERR(res->uar); + goto err_uars; + } + + err = create_uctx(mvdev, &res->uid); + if (err) + goto err_uctx; + + err = alloc_pd(mvdev, &res->pdn, res->uid); + if (err) + goto err_pd; + + err = get_null_mkey(mvdev, &res->null_mkey); + if (err) + goto err_key; + + kick_addr = pci_resource_start(mdev->pdev, 0) + offset; + res->kick_addr = ioremap(kick_addr, PAGE_SIZE); + if (!res->kick_addr) { + err = -ENOMEM; + goto err_key; + } + res->valid = true; + + return 0; + +err_key: + dealloc_pd(mvdev, res->pdn, res->uid); +err_pd: + destroy_uctx(mvdev, res->uid); +err_uctx: + mlx5_put_uars_page(mdev, res->uar); +err_uars: + mutex_destroy(&mvdev->mr.mkey_mtx); + return err; +} + +void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_resources *res = &mvdev->res; + + if (!res->valid) + return; + + iounmap(res->kick_addr); + res->kick_addr = NULL; + dealloc_pd(mvdev, res->pdn, res->uid); + destroy_uctx(mvdev, res->uid); + mlx5_put_uars_page(mvdev->mdev, res->uar); + mutex_destroy(&mvdev->mr.mkey_mtx); + res->valid = false; +} diff --git a/drivers/vdpa/mlx5/net/main.c b/drivers/vdpa/mlx5/net/main.c new file mode 100644 index 000000000000..838cd98386ff --- /dev/null +++ b/drivers/vdpa/mlx5/net/main.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> +#include "mlx5_vdpa_ifc.h" +#include "mlx5_vnet.h" + +MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox VDPA driver"); +MODULE_LICENSE("Dual BSD/GPL"); + +static bool required_caps_supported(struct mlx5_core_dev *mdev) +{ + u8 event_mode; + u64 got; + + got = MLX5_CAP_GEN_64(mdev, general_obj_types); + + if (!(got & MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q)) + return false; + + event_mode = MLX5_CAP_DEV_VDPA_EMULATION(mdev, event_mode); + if (!(event_mode & MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE)) + return false; + + if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, eth_frame_offload_type)) + return false; + + return true; +} + +static void *mlx5_vdpa_add(struct mlx5_core_dev *mdev) +{ + struct mlx5_vdpa_dev *vdev; + + if (mlx5_core_is_pf(mdev)) + return NULL; + + if (!required_caps_supported(mdev)) { + dev_info(mdev->device, "virtio net emulation not supported\n"); + return NULL; + } + vdev = mlx5_vdpa_add_dev(mdev); + if (IS_ERR(vdev)) + return NULL; + + return vdev; +} + +static void mlx5_vdpa_remove(struct mlx5_core_dev *mdev, void *context) +{ + struct mlx5_vdpa_dev *vdev = context; + + mlx5_vdpa_remove_dev(vdev); +} + +static struct mlx5_interface mlx5_vdpa_interface = { + .add = mlx5_vdpa_add, + .remove = mlx5_vdpa_remove, + .protocol = MLX5_INTERFACE_PROTOCOL_VDPA, +}; + +static int __init mlx5_vdpa_init(void) +{ + return mlx5_register_interface(&mlx5_vdpa_interface); +} + +static void __exit mlx5_vdpa_exit(void) +{ + mlx5_unregister_interface(&mlx5_vdpa_interface); +} + +module_init(mlx5_vdpa_init); +module_exit(mlx5_vdpa_exit); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c new file mode 100644 index 000000000000..9df69d5efe8c --- /dev/null +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -0,0 +1,1974 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include <linux/vdpa.h> +#include <uapi/linux/virtio_ids.h> +#include <linux/virtio_config.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/device.h> +#include "mlx5_vnet.h" +#include "mlx5_vdpa_ifc.h" +#include "mlx5_vdpa.h" + +#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev) + +#define VALID_FEATURES_MASK \ + (BIT(VIRTIO_NET_F_CSUM) | BIT(VIRTIO_NET_F_GUEST_CSUM) | \ + BIT(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT(VIRTIO_NET_F_MTU) | BIT(VIRTIO_NET_F_MAC) | \ + BIT(VIRTIO_NET_F_GUEST_TSO4) | BIT(VIRTIO_NET_F_GUEST_TSO6) | \ + BIT(VIRTIO_NET_F_GUEST_ECN) | BIT(VIRTIO_NET_F_GUEST_UFO) | BIT(VIRTIO_NET_F_HOST_TSO4) | \ + BIT(VIRTIO_NET_F_HOST_TSO6) | BIT(VIRTIO_NET_F_HOST_ECN) | BIT(VIRTIO_NET_F_HOST_UFO) | \ + BIT(VIRTIO_NET_F_MRG_RXBUF) | BIT(VIRTIO_NET_F_STATUS) | BIT(VIRTIO_NET_F_CTRL_VQ) | \ + BIT(VIRTIO_NET_F_CTRL_RX) | BIT(VIRTIO_NET_F_CTRL_VLAN) | \ + BIT(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ + BIT(VIRTIO_NET_F_MQ) | BIT(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT(VIRTIO_NET_F_HASH_REPORT) | \ + BIT(VIRTIO_NET_F_RSS) | BIT(VIRTIO_NET_F_RSC_EXT) | BIT(VIRTIO_NET_F_STANDBY) | \ + BIT(VIRTIO_NET_F_SPEED_DUPLEX) | BIT(VIRTIO_F_NOTIFY_ON_EMPTY) | \ + BIT(VIRTIO_F_ANY_LAYOUT) | BIT(VIRTIO_F_VERSION_1) | BIT(VIRTIO_F_ACCESS_PLATFORM) | \ + BIT(VIRTIO_F_RING_PACKED) | BIT(VIRTIO_F_ORDER_PLATFORM) | BIT(VIRTIO_F_SR_IOV)) + +#define VALID_STATUS_MASK \ + (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ + VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) + +struct mlx5_vdpa_net_resources { + u32 tisn; + u32 tdn; + u32 tirn; + u32 rqtn; + bool valid; +}; + +struct mlx5_vdpa_cq_buf { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; + int cqe_size; + int nent; +}; + +struct mlx5_vdpa_cq { + struct mlx5_core_cq mcq; + struct mlx5_vdpa_cq_buf buf; + struct mlx5_db db; + int cqe; +}; + +struct mlx5_vdpa_umem { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; + int size; + u32 id; +}; + +struct mlx5_vdpa_qp { + struct mlx5_core_qp mqp; + struct mlx5_frag_buf frag_buf; + struct mlx5_db db; + u16 head; + bool fw; +}; + +struct mlx5_vq_restore_info { + u32 num_ent; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u16 avail_index; + bool ready; + struct vdpa_callback cb; + bool restore; +}; + +struct mlx5_vdpa_virtqueue { + bool ready; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u32 num_ent; + struct vdpa_callback event_cb; + + /* Resources for implementing the notification channel from the device + * to the driver. fwqp is the firmware end of an RC connection; the + * other end is vqqp used by the driver. cq is is where completions are + * reported. + */ + struct mlx5_vdpa_cq cq; + struct mlx5_vdpa_qp fwqp; + struct mlx5_vdpa_qp vqqp; + + /* umem resources are required for the virtqueue operation. They're use + * is internal and they must be provided by the driver. + */ + struct mlx5_vdpa_umem umem1; + struct mlx5_vdpa_umem umem2; + struct mlx5_vdpa_umem umem3; + + bool initialized; + int index; + u32 virtq_id; + struct mlx5_vdpa_net *ndev; + u16 avail_idx; + int fw_state; + + /* keep last in the struct */ + struct mlx5_vq_restore_info ri; +}; + +/* We will remove this limitation once mlx5_vdpa_alloc_resources() + * provides for driver space allocation + */ +#define MLX5_MAX_SUPPORTED_VQS 16 + +struct mlx5_vdpa_net { + struct mlx5_vdpa_dev mvdev; + struct mlx5_vdpa_net_resources res; + struct virtio_net_config config; + struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS]; + + /* Serialize vq resources creation and destruction. This is required + * since memory map might change and we need to destroy and create + * resources while driver in operational. + */ + struct mutex reslock; + struct mlx5_flow_table *rxft; + struct mlx5_fc *rx_counter; + struct mlx5_flow_handle *rx_rule; + bool setup; + u16 mtu; +}; + +static void free_resources(struct mlx5_vdpa_net *ndev); +static void init_mvqs(struct mlx5_vdpa_net *ndev); +static int setup_driver(struct mlx5_vdpa_net *ndev); +static void teardown_driver(struct mlx5_vdpa_net *ndev); + +static bool mlx5_vdpa_debug; + +#define MLX5_LOG_VIO_FLAG(_feature) \ + do { \ + if (features & BIT(_feature)) \ + mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ + } while (0) + +#define MLX5_LOG_VIO_STAT(_status) \ + do { \ + if (status & (_status)) \ + mlx5_vdpa_info(mvdev, "%s\n", #_status); \ + } while (0) + +static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) +{ + if (status & ~VALID_STATUS_MASK) + mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", + status & ~VALID_STATUS_MASK); + + if (!mlx5_vdpa_debug) + return; + + mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); + if (set && !status) { + mlx5_vdpa_info(mvdev, "driver resets the device\n"); + return; + } + + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); +} + +static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) +{ + if (features & ~VALID_FEATURES_MASK) + mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", + features & ~VALID_FEATURES_MASK); + + if (!mlx5_vdpa_debug) + return; + + mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); + if (!features) + mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); + + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); + MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); + MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); + MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); + MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); + MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); + MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); + MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); +} + +static int create_tis(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + int err; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); + err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); + if (err) + mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); + + return err; +} + +static void destroy_tis(struct mlx5_vdpa_net *ndev) +{ + mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); +} + +#define MLX5_VDPA_CQE_SIZE 64 +#define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) + +static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) +{ + struct mlx5_frag_buf *frag_buf = &buf->frag_buf; + u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; + u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; + int err; + + err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, + ndev->mvdev.mdev->priv.numa_node); + if (err) + return err; + + mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); + + buf->cqe_size = MLX5_VDPA_CQE_SIZE; + buf->nent = nent; + + return 0; +} + +static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) +{ + struct mlx5_frag_buf *frag_buf = &umem->frag_buf; + + return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, + ndev->mvdev.mdev->priv.numa_node); +} + +static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) +{ + mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); +} + +static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) +{ + return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); +} + +static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) +{ + struct mlx5_cqe64 *cqe64; + void *cqe; + int i; + + for (i = 0; i < buf->nent; i++) { + cqe = get_cqe(vcq, i); + cqe64 = cqe; + cqe64->op_own = MLX5_CQE_INVALID << 4; + } +} + +static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) +{ + struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); + + if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && + !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) + return cqe64; + + return NULL; +} + +static void rx_post(struct mlx5_vdpa_qp *vqp, int n) +{ + vqp->head += n; + vqp->db.db[0] = cpu_to_be32(vqp->head); +} + +static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, + struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) +{ + struct mlx5_vdpa_qp *vqp; + __be64 *pas; + void *qpc; + + vqp = fw ? &mvq->fwqp : &mvq->vqqp; + MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + if (vqp->fw) { + /* Firmware QP is allocated by the driver for the firmware's + * use so we can skip part of the params as they will be chosen by firmware + */ + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); + MLX5_SET(qpc, qpc, no_sq, 1); + return; + } + + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); + MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); + MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, no_sq, 1); + MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); + mlx5_fill_page_frag_array(&vqp->frag_buf, pas); +} + +static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) +{ + return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, + num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, + ndev->mvdev.mdev->priv.numa_node); +} + +static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) +{ + mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); +} + +static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, + struct mlx5_vdpa_qp *vqp) +{ + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + int inlen = MLX5_ST_SZ_BYTES(create_qp_in); + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; + void *qpc; + void *in; + int err; + + if (!vqp->fw) { + vqp = &mvq->vqqp; + err = rq_buf_alloc(ndev, vqp, mvq->num_ent); + if (err) + return err; + + err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); + if (err) + goto err_db; + inlen += vqp->frag_buf.npages * sizeof(__be64); + } + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_kzalloc; + } + + qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); + if (!vqp->fw) + MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + kfree(in); + if (err) + goto err_kzalloc; + + vqp->mqp.uid = ndev->mvdev.res.uid; + vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); + + if (!vqp->fw) + rx_post(vqp, mvq->num_ent); + + return 0; + +err_kzalloc: + if (!vqp->fw) + mlx5_db_free(ndev->mvdev.mdev, &vqp->db); +err_db: + if (!vqp->fw) + rq_buf_free(ndev, vqp); + + return err; +} + +static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) +{ + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; + + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); + MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); + if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) + mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); + if (!vqp->fw) { + mlx5_db_free(ndev->mvdev.mdev, &vqp->db); + rq_buf_free(ndev, vqp); + } +} + +static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) +{ + return get_sw_cqe(cq, cq->mcq.cons_index); +} + +static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) +{ + struct mlx5_cqe64 *cqe64; + + cqe64 = next_cqe_sw(vcq); + if (!cqe64) + return -EAGAIN; + + vcq->mcq.cons_index++; + return 0; +} + +static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) +{ + mlx5_cq_set_ci(&mvq->cq.mcq); + rx_post(&mvq->vqqp, num); + if (mvq->event_cb.callback) + mvq->event_cb.callback(mvq->event_cb.private); +} + +static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) +{ + struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); + struct mlx5_vdpa_net *ndev = mvq->ndev; + void __iomem *uar_page = ndev->mvdev.res.uar->map; + int num = 0; + + while (!mlx5_vdpa_poll_one(&mvq->cq)) { + num++; + if (num > mvq->num_ent / 2) { + /* If completions keep coming while we poll, we want to + * let the hardware know that we consumed them by + * updating the doorbell record. We also let vdpa core + * know about this so it passes it on the virtio driver + * on the guest. + */ + mlx5_vdpa_handle_completions(mvq, num); + num = 0; + } + } + + if (num) + mlx5_vdpa_handle_completions(mvq, num); + + mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); +} + +static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) +{ + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + void __iomem *uar_page = ndev->mvdev.res.uar->map; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_vdpa_cq *vcq = &mvq->cq; + unsigned int irqn; + __be64 *pas; + int inlen; + void *cqc; + void *in; + int err; + int eqn; + + err = mlx5_db_alloc(mdev, &vcq->db); + if (err) + return err; + + vcq->mcq.set_ci_db = vcq->db.db; + vcq->mcq.arm_db = vcq->db.db + 1; + vcq->mcq.cqe_sz = 64; + + err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); + if (err) + goto err_db; + + cq_frag_buf_init(vcq, &vcq->buf); + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_vzalloc; + } + + MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + + /* Use vector 0 by default. Consider adding code to choose least used + * vector. + */ + err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn); + if (err) + goto err_vec; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); + MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); + + err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); + if (err) + goto err_vec; + + vcq->mcq.comp = mlx5_vdpa_cq_comp; + vcq->cqe = num_ent; + vcq->mcq.set_ci_db = vcq->db.db; + vcq->mcq.arm_db = vcq->db.db + 1; + mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); + kfree(in); + return 0; + +err_vec: + kfree(in); +err_vzalloc: + cq_frag_buf_free(ndev, &vcq->buf); +err_db: + mlx5_db_free(ndev->mvdev.mdev, &vcq->db); + return err; +} + +static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) +{ + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + struct mlx5_vdpa_cq *vcq = &mvq->cq; + + if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { + mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); + return; + } + cq_frag_buf_free(ndev, &vcq->buf); + mlx5_db_free(ndev->mvdev.mdev, &vcq->db); +} + +static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, + struct mlx5_vdpa_umem **umemp) +{ + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + int p_a; + int p_b; + + switch (num) { + case 1: + p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a); + p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b); + *umemp = &mvq->umem1; + break; + case 2: + p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a); + p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b); + *umemp = &mvq->umem2; + break; + case 3: + p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a); + p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b); + *umemp = &mvq->umem3; + break; + } + return p_a * mvq->num_ent + p_b; +} + +static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) +{ + mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); +} + +static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) +{ + int inlen; + u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; + void *um; + void *in; + int err; + __be64 *pas; + int size; + struct mlx5_vdpa_umem *umem; + + size = umem_size(ndev, mvq, num, &umem); + if (size < 0) + return size; + + umem->size = size; + err = umem_frag_buf_alloc(ndev, umem, size); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_in; + } + + MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); + MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); + um = MLX5_ADDR_OF(create_umem_in, in, umem); + MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); + + pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); + mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); + goto err_cmd; + } + + kfree(in); + umem->id = MLX5_GET(create_umem_out, out, umem_id); + + return 0; + +err_cmd: + kfree(in); +err_in: + umem_frag_buf_free(ndev, umem); + return err; +} + +static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) +{ + u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; + struct mlx5_vdpa_umem *umem; + + switch (num) { + case 1: + umem = &mvq->umem1; + break; + case 2: + umem = &mvq->umem2; + break; + case 3: + umem = &mvq->umem3; + break; + } + + MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); + MLX5_SET(destroy_umem_in, in, umem_id, umem->id); + if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) + return; + + umem_frag_buf_free(ndev, umem); +} + +static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int num; + int err; + + for (num = 1; num <= 3; num++) { + err = create_umem(ndev, mvq, num); + if (err) + goto err_umem; + } + return 0; + +err_umem: + for (num--; num > 0; num--) + umem_destroy(ndev, mvq, num); + + return err; +} + +static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int num; + + for (num = 3; num > 0; num--) + umem_destroy(ndev, mvq, num); +} + +static int get_queue_type(struct mlx5_vdpa_net *ndev) +{ + u32 type_mask; + + type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); + + /* prefer split queue */ + if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED) + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; + + WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)); + + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; +} + +static bool vq_is_tx(u16 idx) +{ + return idx % 2; +} + +static u16 get_features_12_3(u64 features) +{ + return (!!(features & BIT(VIRTIO_NET_F_HOST_TSO4)) << 9) | + (!!(features & BIT(VIRTIO_NET_F_HOST_TSO6)) << 8) | + (!!(features & BIT(VIRTIO_NET_F_CSUM)) << 7) | + (!!(features & BIT(VIRTIO_NET_F_GUEST_CSUM)) << 6); +} + +static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); + u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; + void *obj_context; + void *cmd_hdr; + void *vq_ctx; + void *in; + int err; + + err = umems_create(ndev, mvq); + if (err) + return err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_alloc; + } + + cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + + obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); + MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); + MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, + get_features_12_3(ndev->mvdev.actual_features)); + vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); + MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); + + if (vq_is_tx(mvq->index)) + MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); + + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); + MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); + MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); + MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, + !!(ndev->mvdev.actual_features & VIRTIO_F_VERSION_1)); + MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); + MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); + MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); + MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key); + MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); + MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); + MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); + MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem1.size); + MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); + MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem1.size); + MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); + if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type)) + MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1); + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); + if (err) + goto err_cmd; + + kfree(in); + mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; + +err_cmd: + kfree(in); +err_alloc: + umems_destroy(ndev, mvq); + return err; +} + +static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; + + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, + MLX5_OBJ_TYPE_VIRTIO_NET_Q); + if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { + mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); + return; + } + umems_destroy(ndev, mvq); +} + +static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) +{ + return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; +} + +static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) +{ + return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; +} + +static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, + int *outlen, u32 qpn, u32 rqpn) +{ + void *qpc; + void *pp; + + switch (cmd) { + case MLX5_CMD_OP_2RST_QP: + *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); + *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(*outlen, GFP_KERNEL); + if (!*in || !*out) + goto outerr; + + MLX5_SET(qp_2rst_in, *in, opcode, cmd); + MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(qp_2rst_in, *in, qpn, qpn); + break; + case MLX5_CMD_OP_RST2INIT_QP: + *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); + *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); + if (!*in || !*out) + goto outerr; + + MLX5_SET(rst2init_qp_in, *in, opcode, cmd); + MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(rst2init_qp_in, *in, qpn, qpn); + qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); + MLX5_SET(qpc, qpc, remote_qpn, rqpn); + MLX5_SET(qpc, qpc, rwe, 1); + pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, pp, vhca_port_num, 1); + break; + case MLX5_CMD_OP_INIT2RTR_QP: + *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); + *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); + if (!*in || !*out) + goto outerr; + + MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); + MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); + qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); + MLX5_SET(qpc, qpc, log_msg_max, 30); + MLX5_SET(qpc, qpc, remote_qpn, rqpn); + pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, pp, fl, 1); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); + *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); + if (!*in || !*out) + goto outerr; + + MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); + MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); + qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); + pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, pp, ack_timeout, 14); + MLX5_SET(qpc, qpc, retry_count, 7); + MLX5_SET(qpc, qpc, rnr_retry, 7); + break; + default: + goto outerr_nullify; + } + + return; + +outerr: + kfree(*in); + kfree(*out); +outerr_nullify: + *in = NULL; + *out = NULL; +} + +static void free_inout(void *in, void *out) +{ + kfree(in); + kfree(out); +} + +/* Two QPs are used by each virtqueue. One is used by the driver and one by + * firmware. The fw argument indicates whether the subjected QP is the one used + * by firmware. + */ +static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) +{ + int outlen; + int inlen; + void *out; + void *in; + int err; + + alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); + if (!in || !out) + return -ENOMEM; + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); + free_inout(in, out); + return err; +} + +static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int err; + + err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); + if (err) + return err; + + return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); +} + +struct mlx5_virtq_attr { + u8 state; + u16 available_index; +}; + +static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, + struct mlx5_virtq_attr *attr) +{ + int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); + u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; + void *out; + void *obj_context; + void *cmd_hdr; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); + if (err) + goto err_cmd; + + obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); + memset(attr, 0, sizeof(*attr)); + attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); + attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); + kfree(out); + return 0; + +err_cmd: + kfree(out); + return err; +} + +static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); + u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; + void *obj_context; + void *cmd_hdr; + void *in; + int err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + + obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); + MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, + MLX5_VIRTQ_MODIFY_MASK_STATE); + MLX5_SET(virtio_net_q_object, obj_context, state, state); + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); + kfree(in); + if (!err) + mvq->fw_state = state; + + return err; +} + +static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + u16 idx = mvq->index; + int err; + + if (!mvq->num_ent) + return 0; + + if (mvq->initialized) { + mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n"); + return -EINVAL; + } + + err = cq_create(ndev, idx, mvq->num_ent); + if (err) + return err; + + err = qp_create(ndev, mvq, &mvq->fwqp); + if (err) + goto err_fwqp; + + err = qp_create(ndev, mvq, &mvq->vqqp); + if (err) + goto err_vqqp; + + err = connect_qps(ndev, mvq); + if (err) + goto err_connect; + + err = create_virtqueue(ndev, mvq); + if (err) + goto err_connect; + + if (mvq->ready) { + err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", + idx, err); + goto err_connect; + } + } + + mvq->initialized = true; + return 0; + +err_connect: + qp_destroy(ndev, &mvq->vqqp); +err_vqqp: + qp_destroy(ndev, &mvq->fwqp); +err_fwqp: + cq_destroy(ndev, idx); + return err; +} + +static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + struct mlx5_virtq_attr attr; + + if (!mvq->initialized) + return; + + if (query_virtqueue(ndev, mvq, &attr)) { + mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); + return; + } + if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) + return; + + if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) + mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); +} + +static void suspend_vqs(struct mlx5_vdpa_net *ndev) +{ + int i; + + for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++) + suspend_vq(ndev, &ndev->vqs[i]); +} + +static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + if (!mvq->initialized) + return; + + suspend_vq(ndev, mvq); + destroy_virtqueue(ndev, mvq); + qp_destroy(ndev, &mvq->vqqp); + qp_destroy(ndev, &mvq->fwqp); + cq_destroy(ndev, mvq->index); + mvq->initialized = false; +} + +static int create_rqt(struct mlx5_vdpa_net *ndev) +{ + int log_max_rqt; + __be32 *list; + void *rqtc; + int inlen; + void *in; + int i, j; + int err; + + log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); + if (log_max_rqt < 1) + return -EOPNOTSUPP; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid); + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); + MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt); + MLX5_SET(rqtc, rqtc, rqt_actual_size, 1); + list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); + for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) { + if (!ndev->vqs[j].initialized) + continue; + + if (!vq_is_tx(ndev->vqs[j].index)) { + list[i] = cpu_to_be32(ndev->vqs[j].virtq_id); + i++; + } + } + + err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); + kfree(in); + if (err) + return err; + + return 0; +} + +static void destroy_rqt(struct mlx5_vdpa_net *ndev) +{ + mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); +} + +static int create_tir(struct mlx5_vdpa_net *ndev) +{ +#define HASH_IP_L4PORTS \ + (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \ + MLX5_HASH_FIELD_SEL_L4_DPORT) + static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, + 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, + 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, + 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, + 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a }; + void *rss_key; + void *outer; + void *tirc; + void *in; + int err; + + in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid); + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); + rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); + memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key)); + + outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS); + + MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn); + MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn); + + err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); + kfree(in); + return err; +} + +static void destroy_tir(struct mlx5_vdpa_net *ndev) +{ + mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); +} + +static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_flow_destination dest[2] = {}; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_namespace *ns; + int err; + + /* for now, one entry, match all, forward to tir */ + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + + ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); + if (!ns) { + mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n"); + return -EOPNOTSUPP; + } + + ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ndev->rxft)) + return PTR_ERR(ndev->rxft); + + ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false); + if (IS_ERR(ndev->rx_counter)) { + err = PTR_ERR(ndev->rx_counter); + goto err_fc; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest[0].tir_num = ndev->res.tirn; + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter_id = mlx5_fc_id(ndev->rx_counter); + ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2); + if (IS_ERR(ndev->rx_rule)) { + err = PTR_ERR(ndev->rx_rule); + ndev->rx_rule = NULL; + goto err_rule; + } + + return 0; + +err_rule: + mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); +err_fc: + mlx5_destroy_flow_table(ndev->rxft); + return err; +} + +static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) +{ + if (!ndev->rx_rule) + return; + + mlx5_del_flow_rules(ndev->rx_rule); + mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); + mlx5_destroy_flow_table(ndev->rxft); + + ndev->rx_rule = NULL; +} + +static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + if (unlikely(!mvq->ready)) + return; + + iowrite16(idx, ndev->mvdev.res.kick_addr); +} + +static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area, + u64 driver_area, u64 device_area) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + mvq->desc_addr = desc_area; + mvq->device_addr = device_area; + mvq->driver_addr = driver_area; + return 0; +} + +static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq; + + mvq = &ndev->vqs[idx]; + mvq->num_ent = num; +} + +static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx]; + + vq->event_cb = *cb; +} + +static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + if (!ready) + suspend_vq(ndev, mvq); + + mvq->ready = ready; +} + +static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + return mvq->ready; +} + +static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, + const struct vdpa_vq_state *state) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { + mlx5_vdpa_warn(mvdev, "can't modify available index\n"); + return -EINVAL; + } + + mvq->avail_idx = state->avail_index; + return 0; +} + +static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_virtq_attr attr; + int err; + + if (!mvq->initialized) + return -EAGAIN; + + err = query_virtqueue(ndev, mvq, &attr); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); + return err; + } + state->avail_index = attr.available_index; + return 0; +} + +static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) +{ + return PAGE_SIZE; +} + +enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9, + MLX5_VIRTIO_NET_F_CSUM = 1 << 10, + MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11, + MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12, +}; + +static u64 mlx_to_vritio_features(u16 dev_features) +{ + u64 result = 0; + + if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM) + result |= BIT(VIRTIO_NET_F_GUEST_CSUM); + if (dev_features & MLX5_VIRTIO_NET_F_CSUM) + result |= BIT(VIRTIO_NET_F_CSUM); + if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6) + result |= BIT(VIRTIO_NET_F_HOST_TSO6); + if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4) + result |= BIT(VIRTIO_NET_F_HOST_TSO4); + + return result; +} + +static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + u16 dev_features; + + dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask); + ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features); + if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0)) + ndev->mvdev.mlx_features |= BIT(VIRTIO_F_VERSION_1); + ndev->mvdev.mlx_features |= BIT(VIRTIO_F_ACCESS_PLATFORM); + print_features(mvdev, ndev->mvdev.mlx_features, false); + return ndev->mvdev.mlx_features; +} + +static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features) +{ + if (!(features & BIT(VIRTIO_F_ACCESS_PLATFORM))) + return -EOPNOTSUPP; + + return 0; +} + +static int setup_virtqueues(struct mlx5_vdpa_net *ndev) +{ + int err; + int i; + + for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) { + err = setup_vq(ndev, &ndev->vqs[i]); + if (err) + goto err_vq; + } + + return 0; + +err_vq: + for (--i; i >= 0; i--) + teardown_vq(ndev, &ndev->vqs[i]); + + return err; +} + +static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_virtqueue *mvq; + int i; + + for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { + mvq = &ndev->vqs[i]; + if (!mvq->initialized) + continue; + + teardown_vq(ndev, mvq); + } +} + +/* TODO: cross-endian support */ +static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) +{ + return virtio_legacy_is_little_endian() || + (mvdev->actual_features & (1ULL << VIRTIO_F_VERSION_1)); +} + +static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + int err; + + print_features(mvdev, features, true); + + err = verify_min_features(mvdev, features); + if (err) + return err; + + ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; + ndev->config.mtu = __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), + ndev->mtu); + return err; +} + +static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) +{ + /* not implemented */ + mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n"); +} + +#define MLX5_VDPA_MAX_VQ_ENTRIES 256 +static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev) +{ + return MLX5_VDPA_MAX_VQ_ENTRIES; +} + +static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev) +{ + return VIRTIO_ID_NET; +} + +static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev) +{ + return PCI_VENDOR_ID_MELLANOX; +} + +static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + + print_status(mvdev, ndev->mvdev.status, false); + return ndev->mvdev.status; +} + +static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + struct mlx5_vq_restore_info *ri = &mvq->ri; + struct mlx5_virtq_attr attr; + int err; + + if (!mvq->initialized) + return 0; + + err = query_virtqueue(ndev, mvq, &attr); + if (err) + return err; + + ri->avail_index = attr.available_index; + ri->ready = mvq->ready; + ri->num_ent = mvq->num_ent; + ri->desc_addr = mvq->desc_addr; + ri->device_addr = mvq->device_addr; + ri->driver_addr = mvq->driver_addr; + ri->cb = mvq->event_cb; + ri->restore = true; + return 0; +} + +static int save_channels_info(struct mlx5_vdpa_net *ndev) +{ + int i; + + for (i = 0; i < ndev->mvdev.max_vqs; i++) { + memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri)); + save_channel_info(ndev, &ndev->vqs[i]); + } + return 0; +} + +static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev) +{ + int i; + + for (i = 0; i < ndev->mvdev.max_vqs; i++) + memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); +} + +static void restore_channels_info(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_virtqueue *mvq; + struct mlx5_vq_restore_info *ri; + int i; + + mlx5_clear_vqs(ndev); + init_mvqs(ndev); + for (i = 0; i < ndev->mvdev.max_vqs; i++) { + mvq = &ndev->vqs[i]; + ri = &mvq->ri; + if (!ri->restore) + continue; + + mvq->avail_idx = ri->avail_index; + mvq->ready = ri->ready; + mvq->num_ent = ri->num_ent; + mvq->desc_addr = ri->desc_addr; + mvq->device_addr = ri->device_addr; + mvq->driver_addr = ri->driver_addr; + mvq->event_cb = ri->cb; + } +} + +static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb) +{ + int err; + + suspend_vqs(ndev); + err = save_channels_info(ndev); + if (err) + goto err_mr; + + teardown_driver(ndev); + mlx5_vdpa_destroy_mr(&ndev->mvdev); + err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb); + if (err) + goto err_mr; + + restore_channels_info(ndev); + err = setup_driver(ndev); + if (err) + goto err_setup; + + return 0; + +err_setup: + mlx5_vdpa_destroy_mr(&ndev->mvdev); +err_mr: + return err; +} + +static int setup_driver(struct mlx5_vdpa_net *ndev) +{ + int err; + + mutex_lock(&ndev->reslock); + if (ndev->setup) { + mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n"); + err = 0; + goto out; + } + err = setup_virtqueues(ndev); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n"); + goto out; + } + + err = create_rqt(ndev); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n"); + goto err_rqt; + } + + err = create_tir(ndev); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n"); + goto err_tir; + } + + err = add_fwd_to_tir(ndev); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n"); + goto err_fwd; + } + ndev->setup = true; + mutex_unlock(&ndev->reslock); + + return 0; + +err_fwd: + destroy_tir(ndev); +err_tir: + destroy_rqt(ndev); +err_rqt: + teardown_virtqueues(ndev); +out: + mutex_unlock(&ndev->reslock); + return err; +} + +static void teardown_driver(struct mlx5_vdpa_net *ndev) +{ + mutex_lock(&ndev->reslock); + if (!ndev->setup) + goto out; + + remove_fwd_to_tir(ndev); + destroy_tir(ndev); + destroy_rqt(ndev); + teardown_virtqueues(ndev); + ndev->setup = false; +out: + mutex_unlock(&ndev->reslock); +} + +static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + int err; + + print_status(mvdev, status, true); + if (!status) { + mlx5_vdpa_info(mvdev, "performing device reset\n"); + teardown_driver(ndev); + mlx5_vdpa_destroy_mr(&ndev->mvdev); + ndev->mvdev.status = 0; + ndev->mvdev.mlx_features = 0; + ++mvdev->generation; + return; + } + + if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { + if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + err = setup_driver(ndev); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); + goto err_setup; + } + } else { + mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); + return; + } + } + + ndev->mvdev.status = status; + return; + +err_setup: + mlx5_vdpa_destroy_mr(&ndev->mvdev); + ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; +} + +static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, + unsigned int len) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + + if (offset + len < sizeof(struct virtio_net_config)) + memcpy(buf, (u8 *)&ndev->config + offset, len); +} + +static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, + unsigned int len) +{ + /* not supported */ +} + +static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + + return mvdev->generation; +} + +static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + bool change_map; + int err; + + err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map); + if (err) { + mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); + return err; + } + + if (change_map) + return mlx5_vdpa_change_map(ndev, iotlb); + + return 0; +} + +static void mlx5_vdpa_free(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev; + + ndev = to_mlx5_vdpa_ndev(mvdev); + + free_resources(ndev); + mlx5_vdpa_free_resources(&ndev->mvdev); + mutex_destroy(&ndev->reslock); +} + +static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) +{ + struct vdpa_notification_area ret = {}; + + return ret; +} + +static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx) +{ + return -EOPNOTSUPP; +} + +static const struct vdpa_config_ops mlx5_vdpa_ops = { + .set_vq_address = mlx5_vdpa_set_vq_address, + .set_vq_num = mlx5_vdpa_set_vq_num, + .kick_vq = mlx5_vdpa_kick_vq, + .set_vq_cb = mlx5_vdpa_set_vq_cb, + .set_vq_ready = mlx5_vdpa_set_vq_ready, + .get_vq_ready = mlx5_vdpa_get_vq_ready, + .set_vq_state = mlx5_vdpa_set_vq_state, + .get_vq_state = mlx5_vdpa_get_vq_state, + .get_vq_notification = mlx5_get_vq_notification, + .get_vq_irq = mlx5_get_vq_irq, + .get_vq_align = mlx5_vdpa_get_vq_align, + .get_features = mlx5_vdpa_get_features, + .set_features = mlx5_vdpa_set_features, + .set_config_cb = mlx5_vdpa_set_config_cb, + .get_vq_num_max = mlx5_vdpa_get_vq_num_max, + .get_device_id = mlx5_vdpa_get_device_id, + .get_vendor_id = mlx5_vdpa_get_vendor_id, + .get_status = mlx5_vdpa_get_status, + .set_status = mlx5_vdpa_set_status, + .get_config = mlx5_vdpa_get_config, + .set_config = mlx5_vdpa_set_config, + .get_generation = mlx5_vdpa_get_generation, + .set_map = mlx5_vdpa_set_map, + .free = mlx5_vdpa_free, +}; + +static int alloc_resources(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_net_resources *res = &ndev->res; + int err; + + if (res->valid) { + mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n"); + return -EEXIST; + } + + err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn); + if (err) + return err; + + err = create_tis(ndev); + if (err) + goto err_tis; + + res->valid = true; + + return 0; + +err_tis: + mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); + return err; +} + +static void free_resources(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_net_resources *res = &ndev->res; + + if (!res->valid) + return; + + destroy_tis(ndev); + mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); + res->valid = false; +} + +static void init_mvqs(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_virtqueue *mvq; + int i; + + for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) { + mvq = &ndev->vqs[i]; + memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); + mvq->index = i; + mvq->ndev = ndev; + mvq->fwqp.fw = true; + } + for (; i < ndev->mvdev.max_vqs; i++) { + mvq = &ndev->vqs[i]; + memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); + mvq->index = i; + mvq->ndev = ndev; + } +} + +void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev) +{ + struct virtio_net_config *config; + struct mlx5_vdpa_dev *mvdev; + struct mlx5_vdpa_net *ndev; + u32 max_vqs; + int err; + + /* we save one virtqueue for control virtqueue should we require it */ + max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues); + max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); + + ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, + 2 * mlx5_vdpa_max_qps(max_vqs)); + if (IS_ERR(ndev)) + return ndev; + + ndev->mvdev.max_vqs = max_vqs; + mvdev = &ndev->mvdev; + mvdev->mdev = mdev; + init_mvqs(ndev); + mutex_init(&ndev->reslock); + config = &ndev->config; + err = mlx5_query_nic_vport_mtu(mdev, &ndev->mtu); + if (err) + goto err_mtu; + + err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); + if (err) + goto err_mtu; + + mvdev->vdev.dma_dev = mdev->device; + err = mlx5_vdpa_alloc_resources(&ndev->mvdev); + if (err) + goto err_mtu; + + err = alloc_resources(ndev); + if (err) + goto err_res; + + err = vdpa_register_device(&mvdev->vdev); + if (err) + goto err_reg; + + return ndev; + +err_reg: + free_resources(ndev); +err_res: + mlx5_vdpa_free_resources(&ndev->mvdev); +err_mtu: + mutex_destroy(&ndev->reslock); + put_device(&mvdev->vdev.dev); + return ERR_PTR(err); +} + +void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev) +{ + vdpa_unregister_device(&mvdev->vdev); +} diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h new file mode 100644 index 000000000000..f2d6d68b020e --- /dev/null +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#ifndef __MLX5_VNET_H_ +#define __MLX5_VNET_H_ + +#include <linux/vdpa.h> +#include <linux/virtio_net.h> +#include <linux/vringh.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> +#include "mlx5_vdpa.h" + +static inline u32 mlx5_vdpa_max_qps(int max_vqs) +{ + return max_vqs / 2; +} + +#define to_mlx5_vdpa_ndev(__mvdev) container_of(__mvdev, struct mlx5_vdpa_net, mvdev) +void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev); +void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev); + +#endif /* __MLX5_VNET_H_ */ diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index de211ef3738c..a69ffc991e13 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -61,6 +61,7 @@ static void vdpa_release_dev(struct device *d) * initialized but before registered. * @parent: the parent device * @config: the bus operations that is supported by this device + * @nvqs: number of virtqueues supported by this device * @size: size of the parent structure that contains private data * * Driver should use vdpa_alloc_device() wrapper macro instead of @@ -71,6 +72,7 @@ static void vdpa_release_dev(struct device *d) */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, + int nvqs, size_t size) { struct vdpa_device *vdev; @@ -96,6 +98,8 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, vdev->dev.release = vdpa_release_dev; vdev->index = err; vdev->config = config; + vdev->features_valid = false; + vdev->nvqs = nvqs; err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index); if (err) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index c7334cc65bb2..62d640327145 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -24,6 +24,7 @@ #include <linux/etherdevice.h> #include <linux/vringh.h> #include <linux/vdpa.h> +#include <linux/virtio_byteorder.h> #include <linux/vhost_iotlb.h> #include <uapi/linux/virtio_config.h> #include <uapi/linux/virtio_net.h> @@ -33,6 +34,10 @@ #define DRV_DESC "vDPA Device Simulator" #define DRV_LICENSE "GPL v2" +static int batch_mapping = 1; +module_param(batch_mapping, int, 0444); +MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); + struct vdpasim_virtqueue { struct vringh vring; struct vringh_kiov iov; @@ -55,12 +60,12 @@ struct vdpasim_virtqueue { static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | (1ULL << VIRTIO_F_VERSION_1) | - (1ULL << VIRTIO_F_IOMMU_PLATFORM); + (1ULL << VIRTIO_F_ACCESS_PLATFORM); /* State of each vdpasim device */ struct vdpasim { struct vdpa_device vdpa; - struct vdpasim_virtqueue vqs[2]; + struct vdpasim_virtqueue vqs[VDPASIM_VQ_NUM]; struct work_struct work; /* spinlock to synchronize virtqueue state */ spinlock_t lock; @@ -70,8 +75,27 @@ struct vdpasim { u32 status; u32 generation; u64 features; + /* spinlock to synchronize iommu table */ + spinlock_t iommu_lock; }; +/* TODO: cross-endian support */ +static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim) +{ + return virtio_legacy_is_little_endian() || + (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1)); +} + +static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val) +{ + return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val) +{ + return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val); +} + static struct vdpasim *vdpasim_dev; static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) @@ -118,7 +142,9 @@ static void vdpasim_reset(struct vdpasim *vdpasim) for (i = 0; i < VDPASIM_VQ_NUM; i++) vdpasim_vq_reset(&vdpasim->vqs[i]); + spin_lock(&vdpasim->iommu_lock); vhost_iotlb_reset(vdpasim->iommu); + spin_unlock(&vdpasim->iommu_lock); vdpasim->features = 0; vdpasim->status = 0; @@ -236,8 +262,10 @@ static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, /* For simplicity, use identical mapping to avoid e.g iova * allocator. */ + spin_lock(&vdpasim->iommu_lock); ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, pa, dir_to_perm(dir)); + spin_unlock(&vdpasim->iommu_lock); if (ret) return DMA_MAPPING_ERROR; @@ -251,8 +279,10 @@ static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr, struct vdpasim *vdpasim = dev_to_sim(dev); struct vhost_iotlb *iommu = vdpasim->iommu; + spin_lock(&vdpasim->iommu_lock); vhost_iotlb_del_range(iommu, (u64)dma_addr, (u64)dma_addr + size - 1); + spin_unlock(&vdpasim->iommu_lock); } static void *vdpasim_alloc_coherent(struct device *dev, size_t size, @@ -264,9 +294,10 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size, void *addr = kmalloc(size, flag); int ret; - if (!addr) + spin_lock(&vdpasim->iommu_lock); + if (!addr) { *dma_addr = DMA_MAPPING_ERROR; - else { + } else { u64 pa = virt_to_phys(addr); ret = vhost_iotlb_add_range(iommu, (u64)pa, @@ -279,6 +310,7 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size, } else *dma_addr = (dma_addr_t)pa; } + spin_unlock(&vdpasim->iommu_lock); return addr; } @@ -290,8 +322,11 @@ static void vdpasim_free_coherent(struct device *dev, size_t size, struct vdpasim *vdpasim = dev_to_sim(dev); struct vhost_iotlb *iommu = vdpasim->iommu; + spin_lock(&vdpasim->iommu_lock); vhost_iotlb_del_range(iommu, (u64)dma_addr, (u64)dma_addr + size - 1); + spin_unlock(&vdpasim->iommu_lock); + kfree(phys_to_virt((uintptr_t)dma_addr)); } @@ -303,21 +338,27 @@ static const struct dma_map_ops vdpasim_dma_ops = { }; static const struct vdpa_config_ops vdpasim_net_config_ops; +static const struct vdpa_config_ops vdpasim_net_batch_config_ops; static struct vdpasim *vdpasim_create(void) { - struct virtio_net_config *config; + const struct vdpa_config_ops *ops; struct vdpasim *vdpasim; struct device *dev; int ret = -ENOMEM; - vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, - &vdpasim_net_config_ops); + if (batch_mapping) + ops = &vdpasim_net_batch_config_ops; + else + ops = &vdpasim_net_config_ops; + + vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM); if (!vdpasim) goto err_alloc; INIT_WORK(&vdpasim->work, vdpasim_work); spin_lock_init(&vdpasim->lock); + spin_lock_init(&vdpasim->iommu_lock); dev = &vdpasim->vdpa.dev; dev->coherent_dma_mask = DMA_BIT_MASK(64); @@ -331,10 +372,7 @@ static struct vdpasim *vdpasim_create(void) if (!vdpasim->buffer) goto err_iommu; - config = &vdpasim->config; - config->mtu = 1500; - config->status = VIRTIO_NET_S_LINK_UP; - eth_random_addr(config->mac); + eth_random_addr(vdpasim->config.mac); vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu); vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu); @@ -413,26 +451,29 @@ static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx) return vq->ready; } -static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, u64 state) +static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, + const struct vdpa_vq_state *state) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; struct vringh *vrh = &vq->vring; spin_lock(&vdpasim->lock); - vrh->last_avail_idx = state; + vrh->last_avail_idx = state->avail_index; spin_unlock(&vdpasim->lock); return 0; } -static u64 vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx) +static int vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx, + struct vdpa_vq_state *state) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; struct vringh *vrh = &vq->vring; - return vrh->last_avail_idx; + state->avail_index = vrh->last_avail_idx; + return 0; } static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) @@ -448,13 +489,22 @@ static u64 vdpasim_get_features(struct vdpa_device *vdpa) static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct virtio_net_config *config = &vdpasim->config; /* DMA mapping must be done by driver */ - if (!(features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) + if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) return -EINVAL; vdpasim->features = features & vdpasim_features; + /* We generally only know whether guest is using the legacy interface + * here, so generally that's the earliest we can set config fields. + * Note: We actually require VIRTIO_F_ACCESS_PLATFORM above which + * implies VIRTIO_F_VERSION_1, but let's not try to be clever here. + */ + + config->mtu = cpu_to_vdpasim16(vdpasim, 1500); + config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); return 0; } @@ -508,7 +558,7 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, struct vdpasim *vdpasim = vdpa_to_sim(vdpa); if (offset + len < sizeof(struct virtio_net_config)) - memcpy(buf, &vdpasim->config + offset, len); + memcpy(buf, (u8 *)&vdpasim->config + offset, len); } static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, @@ -532,6 +582,7 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, u64 start = 0ULL, last = 0ULL - 1; int ret; + spin_lock(&vdpasim->iommu_lock); vhost_iotlb_reset(vdpasim->iommu); for (map = vhost_iotlb_itree_first(iotlb, start, last); map; @@ -541,10 +592,12 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, if (ret) goto err; } + spin_unlock(&vdpasim->iommu_lock); return 0; err: vhost_iotlb_reset(vdpasim->iommu); + spin_unlock(&vdpasim->iommu_lock); return ret; } @@ -552,16 +605,23 @@ static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, u64 pa, u32 perm) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + int ret; + + spin_lock(&vdpasim->iommu_lock); + ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa, + perm); + spin_unlock(&vdpasim->iommu_lock); - return vhost_iotlb_add_range(vdpasim->iommu, iova, - iova + size - 1, pa, perm); + return ret; } static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + spin_lock(&vdpasim->iommu_lock); vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1); + spin_unlock(&vdpasim->iommu_lock); return 0; } @@ -597,12 +657,36 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = { .get_config = vdpasim_get_config, .set_config = vdpasim_set_config, .get_generation = vdpasim_get_generation, - .set_map = vdpasim_set_map, .dma_map = vdpasim_dma_map, .dma_unmap = vdpasim_dma_unmap, .free = vdpasim_free, }; +static const struct vdpa_config_ops vdpasim_net_batch_config_ops = { + .set_vq_address = vdpasim_set_vq_address, + .set_vq_num = vdpasim_set_vq_num, + .kick_vq = vdpasim_kick_vq, + .set_vq_cb = vdpasim_set_vq_cb, + .set_vq_ready = vdpasim_set_vq_ready, + .get_vq_ready = vdpasim_get_vq_ready, + .set_vq_state = vdpasim_set_vq_state, + .get_vq_state = vdpasim_get_vq_state, + .get_vq_align = vdpasim_get_vq_align, + .get_features = vdpasim_get_features, + .set_features = vdpasim_set_features, + .set_config_cb = vdpasim_set_config_cb, + .get_vq_num_max = vdpasim_get_vq_num_max, + .get_device_id = vdpasim_get_device_id, + .get_vendor_id = vdpasim_get_vendor_id, + .get_status = vdpasim_get_status, + .set_status = vdpasim_set_status, + .get_config = vdpasim_get_config, + .set_config = vdpasim_set_config, + .get_generation = vdpasim_get_generation, + .set_map = vdpasim_set_map, + .free = vdpasim_free, +}; + static int __init vdpasim_dev_init(void) { vdpasim_dev = vdpasim_create(); diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index d3688c6afb87..587fbae06182 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -65,6 +65,7 @@ config VHOST_VDPA tristate "Vhost driver for vDPA-based backend" depends on EVENTFD select VHOST + select IRQ_BYPASS_MANAGER depends on VDPA help This kernel module can be loaded in host kernel to accelerate diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index eea902b83afe..531a00d703cd 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -73,7 +73,7 @@ enum { VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | (1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_F_IOMMU_PLATFORM) + (1ULL << VIRTIO_F_ACCESS_PLATFORM) }; enum { @@ -1615,21 +1615,6 @@ done: return err; } -static int vhost_net_set_backend_features(struct vhost_net *n, u64 features) -{ - int i; - - mutex_lock(&n->dev.mutex); - for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { - mutex_lock(&n->vqs[i].vq.mutex); - n->vqs[i].vq.acked_backend_features = features; - mutex_unlock(&n->vqs[i].vq.mutex); - } - mutex_unlock(&n->dev.mutex); - - return 0; -} - static int vhost_net_set_features(struct vhost_net *n, u64 features) { size_t vhost_hlen, sock_hlen, hdr_len; @@ -1653,7 +1638,7 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) !vhost_log_access_ok(&n->dev)) goto out_unlock; - if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) { + if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { if (vhost_init_device_iotlb(&n->dev, true)) goto out_unlock; } @@ -1730,7 +1715,8 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, return -EFAULT; if (features & ~VHOST_NET_BACKEND_FEATURES) return -EOPNOTSUPP; - return vhost_net_set_backend_features(n, features); + vhost_set_backend_features(&n->dev, features); + return 0; case VHOST_RESET_OWNER: return vhost_net_reset_owner(n); case VHOST_SET_OWNER: diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index a54b60d6623f..3fab94f88894 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -27,37 +27,11 @@ #include "vhost.h" enum { - VHOST_VDPA_FEATURES = - (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | - (1ULL << VIRTIO_F_ANY_LAYOUT) | - (1ULL << VIRTIO_F_VERSION_1) | - (1ULL << VIRTIO_F_IOMMU_PLATFORM) | - (1ULL << VIRTIO_F_RING_PACKED) | - (1ULL << VIRTIO_F_ORDER_PLATFORM) | - (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | - (1ULL << VIRTIO_RING_F_EVENT_IDX), - - VHOST_VDPA_NET_FEATURES = VHOST_VDPA_FEATURES | - (1ULL << VIRTIO_NET_F_CSUM) | - (1ULL << VIRTIO_NET_F_GUEST_CSUM) | - (1ULL << VIRTIO_NET_F_MTU) | - (1ULL << VIRTIO_NET_F_MAC) | - (1ULL << VIRTIO_NET_F_GUEST_TSO4) | - (1ULL << VIRTIO_NET_F_GUEST_TSO6) | - (1ULL << VIRTIO_NET_F_GUEST_ECN) | - (1ULL << VIRTIO_NET_F_GUEST_UFO) | - (1ULL << VIRTIO_NET_F_HOST_TSO4) | - (1ULL << VIRTIO_NET_F_HOST_TSO6) | - (1ULL << VIRTIO_NET_F_HOST_ECN) | - (1ULL << VIRTIO_NET_F_HOST_UFO) | - (1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_NET_F_STATUS) | - (1ULL << VIRTIO_NET_F_SPEED_DUPLEX), + VHOST_VDPA_BACKEND_FEATURES = + (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | + (1ULL << VHOST_BACKEND_F_IOTLB_BATCH), }; -/* Currently, only network backend w/o multiqueue is supported. */ -#define VHOST_VDPA_VQ_MAX 2 - #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) struct vhost_vdpa { @@ -73,16 +47,13 @@ struct vhost_vdpa { int virtio_id; int minor; struct eventfd_ctx *config_ctx; + int in_batch; }; static DEFINE_IDA(vhost_vdpa_ida); static dev_t vhost_vdpa_major; -static const u64 vhost_vdpa_features[] = { - [VIRTIO_ID_NET] = VHOST_VDPA_NET_FEATURES, -}; - static void handle_vq_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, @@ -96,7 +67,7 @@ static void handle_vq_kick(struct vhost_work *work) static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) { struct vhost_virtqueue *vq = private; - struct eventfd_ctx *call_ctx = vq->call_ctx; + struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; if (call_ctx) eventfd_signal(call_ctx, 1); @@ -115,12 +86,45 @@ static irqreturn_t vhost_vdpa_config_cb(void *private) return IRQ_HANDLED; } +static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) +{ + struct vhost_virtqueue *vq = &v->vqs[qid]; + const struct vdpa_config_ops *ops = v->vdpa->config; + struct vdpa_device *vdpa = v->vdpa; + int ret, irq; + + if (!ops->get_vq_irq) + return; + + irq = ops->get_vq_irq(vdpa, qid); + spin_lock(&vq->call_ctx.ctx_lock); + irq_bypass_unregister_producer(&vq->call_ctx.producer); + if (!vq->call_ctx.ctx || irq < 0) { + spin_unlock(&vq->call_ctx.ctx_lock); + return; + } + + vq->call_ctx.producer.token = vq->call_ctx.ctx; + vq->call_ctx.producer.irq = irq; + ret = irq_bypass_register_producer(&vq->call_ctx.producer); + spin_unlock(&vq->call_ctx.ctx_lock); +} + +static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) +{ + struct vhost_virtqueue *vq = &v->vqs[qid]; + + spin_lock(&vq->call_ctx.ctx_lock); + irq_bypass_unregister_producer(&vq->call_ctx.producer); + spin_unlock(&vq->call_ctx.ctx_lock); +} + static void vhost_vdpa_reset(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; - const struct vdpa_config_ops *ops = vdpa->config; - ops->set_status(vdpa, 0); + vdpa_reset(vdpa); + v->in_batch = 0; } static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) @@ -155,11 +159,15 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - u8 status; + u8 status, status_old; + int nvqs = v->nvqs; + u16 i; if (copy_from_user(&status, statusp, sizeof(status))) return -EFAULT; + status_old = ops->get_status(vdpa); + /* * Userspace shouldn't remove status bits unless reset the * status to 0. @@ -169,6 +177,14 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) ops->set_status(vdpa, status); + if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) + for (i = 0; i < nvqs; i++) + vhost_vdpa_setup_vq_irq(v, i); + + if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) + for (i = 0; i < nvqs; i++) + vhost_vdpa_unsetup_vq_irq(v, i); + return 0; } @@ -196,7 +212,6 @@ static long vhost_vdpa_get_config(struct vhost_vdpa *v, struct vhost_vdpa_config __user *c) { struct vdpa_device *vdpa = v->vdpa; - const struct vdpa_config_ops *ops = vdpa->config; struct vhost_vdpa_config config; unsigned long size = offsetof(struct vhost_vdpa_config, buf); u8 *buf; @@ -209,7 +224,7 @@ static long vhost_vdpa_get_config(struct vhost_vdpa *v, if (!buf) return -ENOMEM; - ops->get_config(vdpa, config.off, buf, config.len); + vdpa_get_config(vdpa, config.off, buf, config.len); if (copy_to_user(c->buf, buf, config.len)) { kvfree(buf); @@ -255,7 +270,6 @@ static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) u64 features; features = ops->get_features(vdpa); - features &= vhost_vdpa_features[v->virtio_id]; if (copy_to_user(featurep, &features, sizeof(features))) return -EFAULT; @@ -279,10 +293,7 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; - if (features & ~vhost_vdpa_features[v->virtio_id]) - return -EINVAL; - - if (ops->set_features(vdpa, features)) + if (vdpa_set_features(vdpa, features)) return -EINVAL; return 0; @@ -332,14 +343,18 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) return 0; } + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + struct vdpa_vq_state vq_state; struct vdpa_callback cb; struct vhost_virtqueue *vq; struct vhost_vring_state s; + u64 __user *featurep = argp; + u64 features; u32 idx; long r; @@ -353,15 +368,32 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, idx = array_index_nospec(idx, v->nvqs); vq = &v->vqs[idx]; - if (cmd == VHOST_VDPA_SET_VRING_ENABLE) { + switch (cmd) { + case VHOST_VDPA_SET_VRING_ENABLE: if (copy_from_user(&s, argp, sizeof(s))) return -EFAULT; ops->set_vq_ready(vdpa, idx, s.num); return 0; - } + case VHOST_GET_VRING_BASE: + r = ops->get_vq_state(v->vdpa, idx, &vq_state); + if (r) + return r; - if (cmd == VHOST_GET_VRING_BASE) - vq->last_avail_idx = ops->get_vq_state(v->vdpa, idx); + vq->last_avail_idx = vq_state.avail_index; + break; + case VHOST_GET_BACKEND_FEATURES: + features = VHOST_VDPA_BACKEND_FEATURES; + if (copy_to_user(featurep, &features, sizeof(features))) + return -EFAULT; + return 0; + case VHOST_SET_BACKEND_FEATURES: + if (copy_from_user(&features, featurep, sizeof(features))) + return -EFAULT; + if (features & ~VHOST_VDPA_BACKEND_FEATURES) + return -EOPNOTSUPP; + vhost_set_backend_features(&v->vdev, features); + return 0; + } r = vhost_vring_ioctl(&v->vdev, cmd, argp); if (r) @@ -377,12 +409,13 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, break; case VHOST_SET_VRING_BASE: - if (ops->set_vq_state(vdpa, idx, vq->last_avail_idx)) + vq_state.avail_index = vq->last_avail_idx; + if (ops->set_vq_state(vdpa, idx, &vq_state)) r = -EINVAL; break; case VHOST_SET_VRING_CALL: - if (vq->call_ctx) { + if (vq->call_ctx.ctx) { cb.callback = vhost_vdpa_virtqueue_cb; cb.private = vq; } else { @@ -390,6 +423,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, cb.private = NULL; } ops->set_vq_cb(vdpa, idx, &cb); + vhost_vdpa_setup_vq_irq(v, idx); break; case VHOST_SET_VRING_NUM: @@ -519,13 +553,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, if (r) return r; - if (ops->dma_map) + if (ops->dma_map) { r = ops->dma_map(vdpa, iova, size, pa, perm); - else if (ops->set_map) - r = ops->set_map(vdpa, dev->iotlb); - else + } else if (ops->set_map) { + if (!v->in_batch) + r = ops->set_map(vdpa, dev->iotlb); + } else { r = iommu_map(v->domain, iova, pa, size, perm_to_iommu_flags(perm)); + } return r; } @@ -538,12 +574,14 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); - if (ops->dma_map) + if (ops->dma_map) { ops->dma_unmap(vdpa, iova, size); - else if (ops->set_map) - ops->set_map(vdpa, dev->iotlb); - else + } else if (ops->set_map) { + if (!v->in_batch) + ops->set_map(vdpa, dev->iotlb); + } else { iommu_unmap(v->domain, iova, size); + } } static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, @@ -636,6 +674,8 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, struct vhost_iotlb_msg *msg) { struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; int r = 0; r = vhost_dev_check_owner(dev); @@ -649,6 +689,14 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, case VHOST_IOTLB_INVALIDATE: vhost_vdpa_unmap(v, msg->iova, msg->size); break; + case VHOST_IOTLB_BATCH_BEGIN: + v->in_batch = true; + break; + case VHOST_IOTLB_BATCH_END: + if (v->in_batch && ops->set_map) + ops->set_map(vdpa, dev->iotlb); + v->in_batch = false; + break; default: r = -EINVAL; break; @@ -765,6 +813,18 @@ err: return r; } +static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) +{ + struct vhost_virtqueue *vq; + int i; + + for (i = 0; i < v->nvqs; i++) { + vq = &v->vqs[i]; + if (vq->call_ctx.producer.irq) + irq_bypass_unregister_producer(&vq->call_ctx.producer); + } +} + static int vhost_vdpa_release(struct inode *inode, struct file *filep) { struct vhost_vdpa *v = filep->private_data; @@ -777,6 +837,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_vdpa_iotlb_free(v); vhost_vdpa_free_domain(v); vhost_vdpa_config_put(v); + vhost_vdpa_clean_irq(v); vhost_dev_cleanup(&v->vdev); kfree(v->vdev.vqs); mutex_unlock(&d->mutex); @@ -872,7 +933,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) { const struct vdpa_config_ops *ops = vdpa->config; struct vhost_vdpa *v; - int minor, nvqs = VHOST_VDPA_VQ_MAX; + int minor; int r; /* Currently, we only accept the network devices. */ @@ -893,14 +954,14 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) atomic_set(&v->opened, 0); v->minor = minor; v->vdpa = vdpa; - v->nvqs = nvqs; + v->nvqs = vdpa->nvqs; v->virtio_id = ops->get_device_id(vdpa); device_initialize(&v->dev); v->dev.release = vhost_vdpa_release_dev; v->dev.parent = &vdpa->dev; v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); - v->vqs = kmalloc_array(nvqs, sizeof(struct vhost_virtqueue), + v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), GFP_KERNEL); if (!v->vqs) { r = -ENOMEM; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 74d135ee7e26..5857d4eec9d7 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -298,6 +298,13 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) __vhost_vq_meta_reset(d->vqs[i]); } +static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) +{ + call_ctx->ctx = NULL; + memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); + spin_lock_init(&call_ctx->ctx_lock); +} + static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { @@ -319,13 +326,13 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->log_base = NULL; vq->error_ctx = NULL; vq->kick = NULL; - vq->call_ctx = NULL; vq->log_ctx = NULL; vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; + vhost_vring_call_reset(&vq->call_ctx); __vhost_vq_meta_reset(vq); } @@ -685,8 +692,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev) eventfd_ctx_put(dev->vqs[i]->error_ctx); if (dev->vqs[i]->kick) fput(dev->vqs[i]->kick); - if (dev->vqs[i]->call_ctx) - eventfd_ctx_put(dev->vqs[i]->call_ctx); + if (dev->vqs[i]->call_ctx.ctx) + eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); vhost_vq_reset(dev, dev->vqs[i]); } vhost_dev_free_iovecs(dev); @@ -1405,7 +1412,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) memcpy(newmem, &mem, size); if (copy_from_user(newmem->regions, m->regions, - mem.nregions * sizeof *m->regions)) { + flex_array_size(newmem, regions, mem.nregions))) { kvfree(newmem); return -EFAULT; } @@ -1629,7 +1636,10 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = PTR_ERR(ctx); break; } - swap(ctx, vq->call_ctx); + + spin_lock(&vq->call_ctx.ctx_lock); + swap(ctx, vq->call_ctx.ctx); + spin_unlock(&vq->call_ctx.ctx_lock); break; case VHOST_SET_VRING_ERR: if (copy_from_user(&f, argp, sizeof f)) { @@ -2435,8 +2445,8 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) { /* Signal the Guest tell them we used something up. */ - if (vq->call_ctx && vhost_notify(dev, vq)) - eventfd_signal(vq->call_ctx, 1); + if (vq->call_ctx.ctx && vhost_notify(dev, vq)) + eventfd_signal(vq->call_ctx.ctx, 1); } EXPORT_SYMBOL_GPL(vhost_signal); @@ -2576,6 +2586,21 @@ struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, } EXPORT_SYMBOL_GPL(vhost_dequeue_msg); +void vhost_set_backend_features(struct vhost_dev *dev, u64 features) +{ + struct vhost_virtqueue *vq; + int i; + + mutex_lock(&dev->mutex); + for (i = 0; i < dev->nvqs; ++i) { + vq = dev->vqs[i]; + mutex_lock(&vq->mutex); + vq->acked_backend_features = features; + mutex_unlock(&vq->mutex); + } + mutex_unlock(&dev->mutex); +} +EXPORT_SYMBOL_GPL(vhost_set_backend_features); static int __init vhost_init(void) { diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index c8e96a095d3b..9032d3c2a9f4 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -13,6 +13,7 @@ #include <linux/virtio_ring.h> #include <linux/atomic.h> #include <linux/vhost_iotlb.h> +#include <linux/irqbypass.h> struct vhost_work; typedef void (*vhost_work_fn_t)(struct vhost_work *work); @@ -60,6 +61,12 @@ enum vhost_uaddr_type { VHOST_NUM_ADDRS = 3, }; +struct vhost_vring_call { + struct eventfd_ctx *ctx; + struct irq_bypass_producer producer; + spinlock_t ctx_lock; +}; + /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -72,7 +79,7 @@ struct vhost_virtqueue { vring_used_t __user *used; const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS]; struct file *kick; - struct eventfd_ctx *call_ctx; + struct vhost_vring_call call_ctx; struct eventfd_ctx *error_ctx; struct eventfd_ctx *log_ctx; @@ -207,6 +214,8 @@ void vhost_enqueue_msg(struct vhost_dev *dev, struct vhost_msg_node *node); struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, struct list_head *head); +void vhost_set_backend_features(struct vhost_dev *dev, u64 features); + __poll_t vhost_chr_poll(struct file *file, struct vhost_dev *dev, poll_table *wait); ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 8be02f333b7a..31cc97f2f515 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -398,12 +398,9 @@ static inline s64 towards_target(struct virtio_balloon *vb) s64 target; u32 num_pages; - virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, - &num_pages); - /* Legacy balloon config space is LE, unlike all other devices. */ - if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) - num_pages = le32_to_cpu((__force __le32)num_pages); + virtio_cread_le(vb->vdev, struct virtio_balloon_config, num_pages, + &num_pages); target = num_pages; return target - vb->num_pages; @@ -462,11 +459,8 @@ static void update_balloon_size(struct virtio_balloon *vb) u32 actual = vb->num_pages; /* Legacy balloon config space is LE, unlike all other devices. */ - if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) - actual = (__force u32)cpu_to_le32(actual); - - virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual, - &actual); + virtio_cwrite_le(vb->vdev, struct virtio_balloon_config, actual, + &actual); } static void update_balloon_stats_func(struct work_struct *work) @@ -579,12 +573,10 @@ static u32 virtio_balloon_cmd_id_received(struct virtio_balloon *vb) { if (test_and_clear_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID, &vb->config_read_bitmap)) { - virtio_cread(vb->vdev, struct virtio_balloon_config, - free_page_hint_cmd_id, - &vb->cmd_id_received_cache); /* Legacy balloon config space is LE, unlike all other devices. */ - if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) - vb->cmd_id_received_cache = le32_to_cpu((__force __le32)vb->cmd_id_received_cache); + virtio_cread_le(vb->vdev, struct virtio_balloon_config, + free_page_hint_cmd_id, + &vb->cmd_id_received_cache); } return vb->cmd_id_received_cache; @@ -600,7 +592,7 @@ static int send_cmd_id_start(struct virtio_balloon *vb) while (virtqueue_get_buf(vq, &unused)) ; - vb->cmd_id_active = virtio32_to_cpu(vb->vdev, + vb->cmd_id_active = cpu_to_virtio32(vb->vdev, virtio_balloon_cmd_id_received(vb)); sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active)); err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL); @@ -987,8 +979,8 @@ static int virtballoon_probe(struct virtio_device *vdev) if (!want_init_on_free()) memset(&poison_val, PAGE_POISON, sizeof(poison_val)); - virtio_cwrite(vb->vdev, struct virtio_balloon_config, - poison_val, &poison_val); + virtio_cwrite_le(vb->vdev, struct virtio_balloon_config, + poison_val, &poison_val); } vb->pr_dev_info.report = virtballoon_free_page_report; @@ -1129,7 +1121,7 @@ static int virtballoon_validate(struct virtio_device *vdev) else if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_REPORTING); - __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); + __virtio_clear_bit(vdev, VIRTIO_F_ACCESS_PLATFORM); return 0; } diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index efaf65b0f42d..877b2ea3ed05 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -113,9 +113,9 @@ static u8 virtinput_cfg_select(struct virtio_input *vi, { u8 size; - virtio_cwrite(vi->vdev, struct virtio_input_config, select, &select); - virtio_cwrite(vi->vdev, struct virtio_input_config, subsel, &subsel); - virtio_cread(vi->vdev, struct virtio_input_config, size, &size); + virtio_cwrite_le(vi->vdev, struct virtio_input_config, select, &select); + virtio_cwrite_le(vi->vdev, struct virtio_input_config, subsel, &subsel); + virtio_cread_le(vi->vdev, struct virtio_input_config, size, &size); return size; } @@ -158,11 +158,11 @@ static void virtinput_cfg_abs(struct virtio_input *vi, int abs) u32 mi, ma, re, fu, fl; virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ABS_INFO, abs); - virtio_cread(vi->vdev, struct virtio_input_config, u.abs.min, &mi); - virtio_cread(vi->vdev, struct virtio_input_config, u.abs.max, &ma); - virtio_cread(vi->vdev, struct virtio_input_config, u.abs.res, &re); - virtio_cread(vi->vdev, struct virtio_input_config, u.abs.fuzz, &fu); - virtio_cread(vi->vdev, struct virtio_input_config, u.abs.flat, &fl); + virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.min, &mi); + virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.max, &ma); + virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.res, &re); + virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.fuzz, &fu); + virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.flat, &fl); input_set_abs_params(vi->idev, abs, mi, ma, fu, fl); input_abs_set_res(vi->idev, abs, re); } @@ -244,14 +244,14 @@ static int virtinput_probe(struct virtio_device *vdev) size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_DEVIDS, 0); if (size >= sizeof(struct virtio_input_devids)) { - virtio_cread(vi->vdev, struct virtio_input_config, - u.ids.bustype, &vi->idev->id.bustype); - virtio_cread(vi->vdev, struct virtio_input_config, - u.ids.vendor, &vi->idev->id.vendor); - virtio_cread(vi->vdev, struct virtio_input_config, - u.ids.product, &vi->idev->id.product); - virtio_cread(vi->vdev, struct virtio_input_config, - u.ids.version, &vi->idev->id.version); + virtio_cread_le(vi->vdev, struct virtio_input_config, + u.ids.bustype, &vi->idev->id.bustype); + virtio_cread_le(vi->vdev, struct virtio_input_config, + u.ids.vendor, &vi->idev->id.vendor); + virtio_cread_le(vi->vdev, struct virtio_input_config, + u.ids.product, &vi->idev->id.product); + virtio_cread_le(vi->vdev, struct virtio_input_config, + u.ids.version, &vi->idev->id.version); } else { vi->idev->id.bustype = BUS_VIRTUAL; } diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index f26f5f64ae82..c08512fcea90 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1530,21 +1530,21 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm) uint64_t new_plugged_size, usable_region_size, end_addr; /* the plugged_size is just a reflection of what _we_ did previously */ - virtio_cread(vm->vdev, struct virtio_mem_config, plugged_size, - &new_plugged_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, + &new_plugged_size); if (WARN_ON_ONCE(new_plugged_size != vm->plugged_size)) vm->plugged_size = new_plugged_size; /* calculate the last usable memory block id */ - virtio_cread(vm->vdev, struct virtio_mem_config, - usable_region_size, &usable_region_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, + usable_region_size, &usable_region_size); end_addr = vm->addr + usable_region_size; end_addr = min(end_addr, phys_limit); vm->last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1; /* see if there is a request to change the size */ - virtio_cread(vm->vdev, struct virtio_mem_config, requested_size, - &vm->requested_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size, + &vm->requested_size); dev_info(&vm->vdev->dev, "plugged size: 0x%llx", vm->plugged_size); dev_info(&vm->vdev->dev, "requested size: 0x%llx", vm->requested_size); @@ -1677,16 +1677,16 @@ static int virtio_mem_init(struct virtio_mem *vm) } /* Fetch all properties that can't change. */ - virtio_cread(vm->vdev, struct virtio_mem_config, plugged_size, - &vm->plugged_size); - virtio_cread(vm->vdev, struct virtio_mem_config, block_size, - &vm->device_block_size); - virtio_cread(vm->vdev, struct virtio_mem_config, node_id, - &node_id); + virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, + &vm->plugged_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size, + &vm->device_block_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id, + &node_id); vm->nid = virtio_mem_translate_node_id(vm, node_id); - virtio_cread(vm->vdev, struct virtio_mem_config, addr, &vm->addr); - virtio_cread(vm->vdev, struct virtio_mem_config, region_size, - &vm->region_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr); + virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, + &vm->region_size); /* * We always hotplug memory in memory block granularity. This way, diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index db93cedd262f..9bdc6f68221f 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -481,6 +481,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { * @dev: the pci device * @cfg_type: the VIRTIO_PCI_CAP_* value we seek * @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO. + * @bars: the bitmask of BARs * * Returns offset of the capability, or 0. */ diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index a2de775801af..becc77697960 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -240,7 +240,7 @@ static inline bool virtqueue_use_indirect(struct virtqueue *_vq, static bool vring_use_dma_api(struct virtio_device *vdev) { - if (!virtio_has_iommu_quirk(vdev)) + if (!virtio_has_dma_quirk(vdev)) return true; /* Otherwise, we are left to guess. */ @@ -1960,6 +1960,9 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) { struct vring_virtqueue *vq = to_vvq(_vq); + if (unlikely(vq->broken)) + return false; + virtio_mb(vq->weak_barriers); return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : virtqueue_poll_split(_vq, last_used_idx); @@ -2225,7 +2228,7 @@ void vring_transport_features(struct virtio_device *vdev) break; case VIRTIO_F_VERSION_1: break; - case VIRTIO_F_IOMMU_PLATFORM: + case VIRTIO_F_ACCESS_PLATFORM: break; case VIRTIO_F_RING_PACKED: break; diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index c30eb55030be..4a9ddb44b2a7 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -57,9 +57,8 @@ static void virtio_vdpa_get(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - const struct vdpa_config_ops *ops = vdpa->config; - ops->get_config(vdpa, offset, buf, len); + vdpa_get_config(vdpa, offset, buf, len); } static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset, @@ -101,9 +100,8 @@ static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 status) static void virtio_vdpa_reset(struct virtio_device *vdev) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - const struct vdpa_config_ops *ops = vdpa->config; - return ops->set_status(vdpa, 0); + vdpa_reset(vdpa); } static bool virtio_vdpa_notify(struct virtqueue *vq) @@ -294,12 +292,11 @@ static u64 virtio_vdpa_get_features(struct virtio_device *vdev) static int virtio_vdpa_finalize_features(struct virtio_device *vdev) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - const struct vdpa_config_ops *ops = vdpa->config; /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); - return ops->set_features(vdpa, vdev->features); + return vdpa_set_features(vdpa, vdev->features); } static const char *virtio_vdpa_bus_name(struct virtio_device *vdev) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 4c4ef5d69298..104f35de5270 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -606,8 +606,8 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, unsigned int i; int ret = 0; - virtio_cread(vdev, struct virtio_fs_config, num_request_queues, - &fs->num_request_queues); + virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, + &fs->num_request_queues); if (fs->num_request_queues == 0) return -EINVAL; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 239db794357c..eae0bfd87d91 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -28,17 +28,28 @@ struct vdpa_notification_area { }; /** + * vDPA vq_state definition + * @avail_index: available index + */ +struct vdpa_vq_state { + u16 avail_index; +}; + +/** * vDPA device - representation of a vDPA device * @dev: underlying device * @dma_dev: the actual device that is performing DMA * @config: the configuration ops for this device. * @index: device index + * @features_valid: were features initialized? for legacy guests */ struct vdpa_device { struct device dev; struct device *dma_dev; const struct vdpa_config_ops *config; unsigned int index; + bool features_valid; + int nvqs; }; /** @@ -77,16 +88,22 @@ struct vdpa_device { * @set_vq_state: Set the state for a virtqueue * @vdev: vdpa device * @idx: virtqueue index - * @state: virtqueue state (last_avail_idx) + * @state: pointer to set virtqueue state (last_avail_idx) * Returns integer: success (0) or error (< 0) * @get_vq_state: Get the state for a virtqueue * @vdev: vdpa device * @idx: virtqueue index - * Returns virtqueue state (last_avail_idx) + * @state: pointer to returned state (last_avail_idx) * @get_vq_notification: Get the notification area for a virtqueue * @vdev: vdpa device * @idx: virtqueue index * Returns the notifcation area + * @get_vq_irq: Get the irq number of a virtqueue (optional, + * but must implemented if require vq irq offloading) + * @vdev: vdpa device + * @idx: virtqueue index + * Returns int: irq number of a virtqueue, + * negative number if no irq assigned. * @get_vq_align: Get the virtqueue align requirement * for the device * @vdev: vdpa device @@ -174,10 +191,14 @@ struct vdpa_config_ops { struct vdpa_callback *cb); void (*set_vq_ready)(struct vdpa_device *vdev, u16 idx, bool ready); bool (*get_vq_ready)(struct vdpa_device *vdev, u16 idx); - int (*set_vq_state)(struct vdpa_device *vdev, u16 idx, u64 state); - u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx); + int (*set_vq_state)(struct vdpa_device *vdev, u16 idx, + const struct vdpa_vq_state *state); + int (*get_vq_state)(struct vdpa_device *vdev, u16 idx, + struct vdpa_vq_state *state); struct vdpa_notification_area (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); + /* vq irq is not expected to be changed once DRIVER_OK is set */ + int (*get_vq_irq)(struct vdpa_device *vdv, u16 idx); /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); @@ -208,11 +229,12 @@ struct vdpa_config_ops { struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, + int nvqs, size_t size); -#define vdpa_alloc_device(dev_struct, member, parent, config) \ +#define vdpa_alloc_device(dev_struct, member, parent, config, nvqs) \ container_of(__vdpa_alloc_device( \ - parent, config, \ + parent, config, nvqs, \ sizeof(dev_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ dev_struct, member))), \ @@ -266,4 +288,36 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) { return vdev->dma_dev; } + +static inline void vdpa_reset(struct vdpa_device *vdev) +{ + const struct vdpa_config_ops *ops = vdev->config; + + vdev->features_valid = false; + ops->set_status(vdev, 0); +} + +static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) +{ + const struct vdpa_config_ops *ops = vdev->config; + + vdev->features_valid = true; + return ops->set_features(vdev, features); +} + + +static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset, + void *buf, unsigned int len) +{ + const struct vdpa_config_ops *ops = vdev->config; + + /* + * Config accesses aren't supposed to trigger before features are set. + * If it does happen we assume a legacy guest. + */ + if (!vdev->features_valid) + vdpa_set_features(vdev, 0); + ops->get_config(vdev, offset, buf, len); +} + #endif /* _LINUX_VDPA_H */ diff --git a/include/linux/virtio_caif.h b/include/linux/virtio_caif.h index 5d2d3124ca3d..ea722479510c 100644 --- a/include/linux/virtio_caif.h +++ b/include/linux/virtio_caif.h @@ -11,9 +11,9 @@ #include <linux/types.h> struct virtio_caif_transf_config { - u16 headroom; - u16 tailroom; - u32 mtu; + __virtio16 headroom; + __virtio16 tailroom; + __virtio32 mtu; u8 reserved[4]; }; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index bb4cc4910750..8fe857e27ef3 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -6,6 +6,7 @@ #include <linux/bug.h> #include <linux/virtio.h> #include <linux/virtio_byteorder.h> +#include <linux/compiler_types.h> #include <uapi/linux/virtio_config.h> struct irq_affinity; @@ -162,16 +163,16 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, } /** - * virtio_has_iommu_quirk - determine whether this device has the iommu quirk + * virtio_has_dma_quirk - determine whether this device has the DMA quirk * @vdev: the device */ -static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev) +static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev) { /* * Note the reverse polarity of the quirk feature (compared to most * other features), this is for compatibility with legacy systems. */ - return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM); } static inline @@ -287,70 +288,133 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); } +#define virtio_to_cpu(vdev, x) \ + _Generic((x), \ + __u8: (x), \ + __virtio16: virtio16_to_cpu((vdev), (x)), \ + __virtio32: virtio32_to_cpu((vdev), (x)), \ + __virtio64: virtio64_to_cpu((vdev), (x)) \ + ) + +#define cpu_to_virtio(vdev, x, m) \ + _Generic((m), \ + __u8: (x), \ + __virtio16: cpu_to_virtio16((vdev), (x)), \ + __virtio32: cpu_to_virtio32((vdev), (x)), \ + __virtio64: cpu_to_virtio64((vdev), (x)) \ + ) + +#define __virtio_native_type(structname, member) \ + typeof(virtio_to_cpu(NULL, ((structname*)0)->member)) + /* Config space accessors. */ #define virtio_cread(vdev, structname, member, ptr) \ do { \ + typeof(((structname*)0)->member) virtio_cread_v; \ + \ might_sleep(); \ - /* Must match the member's type, and be integer */ \ - if (!typecheck(typeof((((structname*)0)->member)), *(ptr))) \ - (*ptr) = 1; \ + /* Sanity check: must match the member's type */ \ + typecheck(typeof(virtio_to_cpu((vdev), virtio_cread_v)), *(ptr)); \ \ - switch (sizeof(*ptr)) { \ + switch (sizeof(virtio_cread_v)) { \ case 1: \ - *(ptr) = virtio_cread8(vdev, \ - offsetof(structname, member)); \ - break; \ case 2: \ - *(ptr) = virtio_cread16(vdev, \ - offsetof(structname, member)); \ - break; \ case 4: \ - *(ptr) = virtio_cread32(vdev, \ - offsetof(structname, member)); \ - break; \ - case 8: \ - *(ptr) = virtio_cread64(vdev, \ - offsetof(structname, member)); \ + vdev->config->get((vdev), \ + offsetof(structname, member), \ + &virtio_cread_v, \ + sizeof(virtio_cread_v)); \ break; \ default: \ - BUG(); \ + __virtio_cread_many((vdev), \ + offsetof(structname, member), \ + &virtio_cread_v, \ + 1, \ + sizeof(virtio_cread_v)); \ + break; \ } \ + *(ptr) = virtio_to_cpu(vdev, virtio_cread_v); \ } while(0) /* Config space accessors. */ #define virtio_cwrite(vdev, structname, member, ptr) \ do { \ + typeof(((structname*)0)->member) virtio_cwrite_v = \ + cpu_to_virtio(vdev, *(ptr), ((structname*)0)->member); \ + \ + might_sleep(); \ + /* Sanity check: must match the member's type */ \ + typecheck(typeof(virtio_to_cpu((vdev), virtio_cwrite_v)), *(ptr)); \ + \ + vdev->config->set((vdev), offsetof(structname, member), \ + &virtio_cwrite_v, \ + sizeof(virtio_cwrite_v)); \ + } while(0) + +/* + * Nothing virtio-specific about these, but let's worry about generalizing + * these later. + */ +#define virtio_le_to_cpu(x) \ + _Generic((x), \ + __u8: (u8)(x), \ + __le16: (u16)le16_to_cpu(x), \ + __le32: (u32)le32_to_cpu(x), \ + __le64: (u64)le64_to_cpu(x) \ + ) + +#define virtio_cpu_to_le(x, m) \ + _Generic((m), \ + __u8: (x), \ + __le16: cpu_to_le16(x), \ + __le32: cpu_to_le32(x), \ + __le64: cpu_to_le64(x) \ + ) + +/* LE (e.g. modern) Config space accessors. */ +#define virtio_cread_le(vdev, structname, member, ptr) \ + do { \ + typeof(((structname*)0)->member) virtio_cread_v; \ + \ might_sleep(); \ - /* Must match the member's type, and be integer */ \ - if (!typecheck(typeof((((structname*)0)->member)), *(ptr))) \ - BUG_ON((*ptr) == 1); \ + /* Sanity check: must match the member's type */ \ + typecheck(typeof(virtio_le_to_cpu(virtio_cread_v)), *(ptr)); \ \ - switch (sizeof(*ptr)) { \ + switch (sizeof(virtio_cread_v)) { \ case 1: \ - virtio_cwrite8(vdev, \ - offsetof(structname, member), \ - *(ptr)); \ - break; \ case 2: \ - virtio_cwrite16(vdev, \ - offsetof(structname, member), \ - *(ptr)); \ - break; \ case 4: \ - virtio_cwrite32(vdev, \ - offsetof(structname, member), \ - *(ptr)); \ - break; \ - case 8: \ - virtio_cwrite64(vdev, \ - offsetof(structname, member), \ - *(ptr)); \ + vdev->config->get((vdev), \ + offsetof(structname, member), \ + &virtio_cread_v, \ + sizeof(virtio_cread_v)); \ break; \ default: \ - BUG(); \ + __virtio_cread_many((vdev), \ + offsetof(structname, member), \ + &virtio_cread_v, \ + 1, \ + sizeof(virtio_cread_v)); \ + break; \ } \ + *(ptr) = virtio_le_to_cpu(virtio_cread_v); \ + } while(0) + +#define virtio_cwrite_le(vdev, structname, member, ptr) \ + do { \ + typeof(((structname*)0)->member) virtio_cwrite_v = \ + virtio_cpu_to_le(*(ptr), ((structname*)0)->member); \ + \ + might_sleep(); \ + /* Sanity check: must match the member's type */ \ + typecheck(typeof(virtio_le_to_cpu(virtio_cwrite_v)), *(ptr)); \ + \ + vdev->config->set((vdev), offsetof(structname, member), \ + &virtio_cwrite_v, \ + sizeof(virtio_cwrite_v)); \ } while(0) + /* Read @count fields, @bytes each. */ static inline void __virtio_cread_many(struct virtio_device *vdev, unsigned int offset, @@ -399,53 +463,60 @@ static inline void virtio_cwrite8(struct virtio_device *vdev, static inline u16 virtio_cread16(struct virtio_device *vdev, unsigned int offset) { - u16 ret; + __virtio16 ret; might_sleep(); vdev->config->get(vdev, offset, &ret, sizeof(ret)); - return virtio16_to_cpu(vdev, (__force __virtio16)ret); + return virtio16_to_cpu(vdev, ret); } static inline void virtio_cwrite16(struct virtio_device *vdev, unsigned int offset, u16 val) { + __virtio16 v; + might_sleep(); - val = (__force u16)cpu_to_virtio16(vdev, val); - vdev->config->set(vdev, offset, &val, sizeof(val)); + v = cpu_to_virtio16(vdev, val); + vdev->config->set(vdev, offset, &v, sizeof(v)); } static inline u32 virtio_cread32(struct virtio_device *vdev, unsigned int offset) { - u32 ret; + __virtio32 ret; might_sleep(); vdev->config->get(vdev, offset, &ret, sizeof(ret)); - return virtio32_to_cpu(vdev, (__force __virtio32)ret); + return virtio32_to_cpu(vdev, ret); } static inline void virtio_cwrite32(struct virtio_device *vdev, unsigned int offset, u32 val) { + __virtio32 v; + might_sleep(); - val = (__force u32)cpu_to_virtio32(vdev, val); - vdev->config->set(vdev, offset, &val, sizeof(val)); + v = cpu_to_virtio32(vdev, val); + vdev->config->set(vdev, offset, &v, sizeof(v)); } static inline u64 virtio_cread64(struct virtio_device *vdev, unsigned int offset) { - u64 ret; + __virtio64 ret; + __virtio_cread_many(vdev, offset, &ret, 1, sizeof(ret)); - return virtio64_to_cpu(vdev, (__force __virtio64)ret); + return virtio64_to_cpu(vdev, ret); } static inline void virtio_cwrite64(struct virtio_device *vdev, unsigned int offset, u64 val) { + __virtio64 v; + might_sleep(); - val = (__force u64)cpu_to_virtio64(vdev, val); - vdev->config->set(vdev, offset, &val, sizeof(val)); + v = cpu_to_virtio64(vdev, val); + vdev->config->set(vdev, offset, &v, sizeof(v)); } /* Conditional config space accessors. */ @@ -459,4 +530,14 @@ static inline void virtio_cwrite64(struct virtio_device *vdev, _r; \ }) +/* Conditional config space accessors. */ +#define virtio_cread_le_feature(vdev, fbit, structname, member, ptr) \ + ({ \ + int _r = 0; \ + if (!virtio_has_feature(vdev, fbit)) \ + _r = -ENOENT; \ + else \ + virtio_cread_le((vdev), structname, member, ptr); \ + _r; \ + }) #endif /* _LINUX_VIRTIO_CONFIG_H */ diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 3dc70adfe5f5..b485b13fa50b 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -46,16 +46,15 @@ static inline void virtio_wmb(bool weak_barriers) dma_wmb(); } -static inline void virtio_store_mb(bool weak_barriers, - __virtio16 *p, __virtio16 v) -{ - if (weak_barriers) { - virt_store_mb(*p, v); - } else { - WRITE_ONCE(*p, v); - mb(); - } -} +#define virtio_store_mb(weak_barriers, p, v) \ +do { \ + if (weak_barriers) { \ + virt_store_mb(*p, v); \ + } else { \ + WRITE_ONCE(*p, v); \ + mb(); \ + } \ +} while (0) \ struct virtio_device; struct virtqueue; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 0c2349612e77..75232185324a 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -91,6 +91,8 @@ /* Use message type V2 */ #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 +/* IOTLB can accept batching hints */ +#define VHOST_BACKEND_F_IOTLB_BATCH 0x2 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 669457ce5c48..9a269a88a6ff 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -60,6 +60,17 @@ struct vhost_iotlb_msg { #define VHOST_IOTLB_UPDATE 2 #define VHOST_IOTLB_INVALIDATE 3 #define VHOST_IOTLB_ACCESS_FAIL 4 +/* + * VHOST_IOTLB_BATCH_BEGIN and VHOST_IOTLB_BATCH_END allow modifying + * multiple mappings in one go: beginning with + * VHOST_IOTLB_BATCH_BEGIN, followed by any number of + * VHOST_IOTLB_UPDATE messages, and ending with VHOST_IOTLB_BATCH_END. + * When one of these two values is used as the message type, the rest + * of the fields in the message are ignored. There's no guarantee that + * these changes take place automatically in the device. + */ +#define VHOST_IOTLB_BATCH_BEGIN 5 +#define VHOST_IOTLB_BATCH_END 6 __u8 type; }; diff --git a/include/uapi/linux/virtio_9p.h b/include/uapi/linux/virtio_9p.h index 277c4ad44e84..441047432258 100644 --- a/include/uapi/linux/virtio_9p.h +++ b/include/uapi/linux/virtio_9p.h @@ -25,7 +25,7 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#include <linux/types.h> +#include <linux/virtio_types.h> #include <linux/virtio_ids.h> #include <linux/virtio_config.h> @@ -36,7 +36,7 @@ struct virtio_9p_config { /* length of the tag name */ - __u16 tag_len; + __virtio16 tag_len; /* non-NULL terminated tag name */ __u8 tag[0]; } __attribute__((packed)); diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index dc3e656470dd..ddaa45e723c4 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -45,20 +45,20 @@ #define VIRTIO_BALLOON_CMD_ID_DONE 1 struct virtio_balloon_config { /* Number of pages host wants Guest to give up. */ - __u32 num_pages; + __le32 num_pages; /* Number of pages we've actually got in balloon. */ - __u32 actual; + __le32 actual; /* * Free page hint command id, readonly by guest. * Was previously named free_page_report_cmd_id so we * need to carry that name for legacy support. */ union { - __u32 free_page_hint_cmd_id; - __u32 free_page_report_cmd_id; /* deprecated */ + __le32 free_page_hint_cmd_id; + __le32 free_page_report_cmd_id; /* deprecated */ }; /* Stores PAGE_POISON if page poisoning is in use */ - __u32 poison_val; + __le32 poison_val; }; #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 0f99d7b49ede..d888f013d9ff 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -57,20 +57,20 @@ struct virtio_blk_config { /* The capacity (in 512-byte sectors). */ - __u64 capacity; + __virtio64 capacity; /* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */ - __u32 size_max; + __virtio32 size_max; /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */ - __u32 seg_max; + __virtio32 seg_max; /* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */ struct virtio_blk_geometry { - __u16 cylinders; + __virtio16 cylinders; __u8 heads; __u8 sectors; } geometry; /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ - __u32 blk_size; + __virtio32 blk_size; /* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY */ /* exponent for physical block per logical block. */ @@ -78,42 +78,42 @@ struct virtio_blk_config { /* alignment offset in logical blocks. */ __u8 alignment_offset; /* minimum I/O size without performance penalty in logical blocks. */ - __u16 min_io_size; + __virtio16 min_io_size; /* optimal sustained I/O size in logical blocks. */ - __u32 opt_io_size; + __virtio32 opt_io_size; /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */ __u8 wce; __u8 unused; /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */ - __u16 num_queues; + __virtio16 num_queues; /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */ /* * The maximum discard sectors (in 512-byte sectors) for * one segment. */ - __u32 max_discard_sectors; + __virtio32 max_discard_sectors; /* * The maximum number of discard segments in a * discard command. */ - __u32 max_discard_seg; + __virtio32 max_discard_seg; /* Discard commands must be aligned to this number of sectors. */ - __u32 discard_sector_alignment; + __virtio32 discard_sector_alignment; /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */ /* * The maximum number of write zeroes sectors (in 512-byte sectors) in * one segment. */ - __u32 max_write_zeroes_sectors; + __virtio32 max_write_zeroes_sectors; /* * The maximum number of segments in a write zeroes * command. */ - __u32 max_write_zeroes_seg; + __virtio32 max_write_zeroes_seg; /* * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the * deallocation of one or more of the sectors. diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index ff8e7dc9d4dd..b5eda06f0d57 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -67,13 +67,17 @@ #define VIRTIO_F_VERSION_1 32 /* - * If clear - device has the IOMMU bypass quirk feature. - * If set - use platform tools to detect the IOMMU. + * If clear - device has the platform DMA (e.g. IOMMU) bypass quirk feature. + * If set - use platform DMA tools to access the memory. * * Note the reverse polarity (compared to most other features), * this is for compatibility with legacy systems. */ -#define VIRTIO_F_IOMMU_PLATFORM 33 +#define VIRTIO_F_ACCESS_PLATFORM 33 +#ifndef __KERNEL__ +/* Legacy name for VIRTIO_F_ACCESS_PLATFORM (for compatibility with old userspace) */ +#define VIRTIO_F_IOMMU_PLATFORM VIRTIO_F_ACCESS_PLATFORM +#endif /* __KERNEL__ */ /* This feature indicates support for the packed virtqueue layout. */ #define VIRTIO_F_RING_PACKED 34 diff --git a/include/uapi/linux/virtio_console.h b/include/uapi/linux/virtio_console.h index b7fb108c9310..7e6ec2ff0560 100644 --- a/include/uapi/linux/virtio_console.h +++ b/include/uapi/linux/virtio_console.h @@ -45,13 +45,13 @@ struct virtio_console_config { /* colums of the screens */ - __u16 cols; + __virtio16 cols; /* rows of the screens */ - __u16 rows; + __virtio16 rows; /* max. number of ports this device can hold */ - __u32 max_nr_ports; + __virtio32 max_nr_ports; /* emergency write register */ - __u32 emerg_wr; + __virtio32 emerg_wr; } __attribute__((packed)); /* diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index 50cdc8aebfcf..a03932f10565 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -414,33 +414,33 @@ struct virtio_crypto_op_data_req { struct virtio_crypto_config { /* See VIRTIO_CRYPTO_OP_* above */ - __u32 status; + __le32 status; /* * Maximum number of data queue */ - __u32 max_dataqueues; + __le32 max_dataqueues; /* * Specifies the services mask which the device support, * see VIRTIO_CRYPTO_SERVICE_* above */ - __u32 crypto_services; + __le32 crypto_services; /* Detailed algorithms mask */ - __u32 cipher_algo_l; - __u32 cipher_algo_h; - __u32 hash_algo; - __u32 mac_algo_l; - __u32 mac_algo_h; - __u32 aead_algo; + __le32 cipher_algo_l; + __le32 cipher_algo_h; + __le32 hash_algo; + __le32 mac_algo_l; + __le32 mac_algo_h; + __le32 aead_algo; /* Maximum length of cipher key */ - __u32 max_cipher_key_len; + __le32 max_cipher_key_len; /* Maximum length of authenticated key */ - __u32 max_auth_key_len; - __u32 reserve; + __le32 max_auth_key_len; + __le32 reserve; /* Maximum size of each crypto request's content */ - __u64 max_size; + __le64 max_size; }; struct virtio_crypto_inhdr { diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h index b02eb2ac3d99..3056b6e9f8ce 100644 --- a/include/uapi/linux/virtio_fs.h +++ b/include/uapi/linux/virtio_fs.h @@ -13,7 +13,7 @@ struct virtio_fs_config { __u8 tag[36]; /* Number of request queues */ - __u32 num_request_queues; + __le32 num_request_queues; } __attribute__((packed)); #endif /* _UAPI_LINUX_VIRTIO_FS_H */ diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 0c85914d9369..ccbd174ef321 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -320,10 +320,10 @@ struct virtio_gpu_resp_edid { #define VIRTIO_GPU_EVENT_DISPLAY (1 << 0) struct virtio_gpu_config { - __u32 events_read; - __u32 events_clear; - __u32 num_scanouts; - __u32 num_capsets; + __le32 events_read; + __le32 events_clear; + __le32 num_scanouts; + __le32 num_capsets; }; /* simple formats for fbcon/X use */ diff --git a/include/uapi/linux/virtio_input.h b/include/uapi/linux/virtio_input.h index a7fe5c8fb135..52084b1fb965 100644 --- a/include/uapi/linux/virtio_input.h +++ b/include/uapi/linux/virtio_input.h @@ -40,18 +40,18 @@ enum virtio_input_config_select { }; struct virtio_input_absinfo { - __u32 min; - __u32 max; - __u32 fuzz; - __u32 flat; - __u32 res; + __le32 min; + __le32 max; + __le32 fuzz; + __le32 flat; + __le32 res; }; struct virtio_input_devids { - __u16 bustype; - __u16 vendor; - __u16 product; - __u16 version; + __le16 bustype; + __le16 vendor; + __le16 product; + __le16 version; }; struct virtio_input_config { diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h index 48e3c29223b5..237e36a280cb 100644 --- a/include/uapi/linux/virtio_iommu.h +++ b/include/uapi/linux/virtio_iommu.h @@ -18,24 +18,24 @@ #define VIRTIO_IOMMU_F_MMIO 5 struct virtio_iommu_range_64 { - __u64 start; - __u64 end; + __le64 start; + __le64 end; }; struct virtio_iommu_range_32 { - __u32 start; - __u32 end; + __le32 start; + __le32 end; }; struct virtio_iommu_config { /* Supported page sizes */ - __u64 page_size_mask; + __le64 page_size_mask; /* Supported IOVA range */ struct virtio_iommu_range_64 input_range; /* Max domain ID size */ struct virtio_iommu_range_32 domain_range; /* Probe buffer size */ - __u32 probe_size; + __le32 probe_size; }; /* Request types */ diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h index a9ffe041843c..70e01c687d5e 100644 --- a/include/uapi/linux/virtio_mem.h +++ b/include/uapi/linux/virtio_mem.h @@ -185,27 +185,27 @@ struct virtio_mem_resp { struct virtio_mem_config { /* Block size and alignment. Cannot change. */ - __u64 block_size; + __le64 block_size; /* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */ - __u16 node_id; + __le16 node_id; __u8 padding[6]; /* Start address of the memory region. Cannot change. */ - __u64 addr; + __le64 addr; /* Region size (maximum). Cannot change. */ - __u64 region_size; + __le64 region_size; /* * Currently usable region size. Can grow up to region_size. Can * shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config * update will be sent). */ - __u64 usable_region_size; + __le64 usable_region_size; /* * Currently used size. Changes due to plug/unplug requests, but no * config updates will be sent. */ - __u64 plugged_size; + __le64 plugged_size; /* Requested size. New plug requests cannot exceed it. Can change. */ - __u64 requested_size; + __le64 requested_size; }; #endif /* _LINUX_VIRTIO_MEM_H */ diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 19d23e5baa4e..3f55a4215f11 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -87,19 +87,19 @@ struct virtio_net_config { /* The config defining mac address (if VIRTIO_NET_F_MAC) */ __u8 mac[ETH_ALEN]; /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ - __u16 status; + __virtio16 status; /* Maximum number of each of transmit and receive queues; * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ. * Legal values are between 1 and 0x8000 */ - __u16 max_virtqueue_pairs; + __virtio16 max_virtqueue_pairs; /* Default maximum transmit unit advice */ - __u16 mtu; + __virtio16 mtu; /* * speed, in units of 1Mb. All values 0 to INT_MAX are legal. * Any other value stands for unknown. */ - __u32 speed; + __le32 speed; /* * 0x00 - half duplex * 0x01 - full duplex diff --git a/include/uapi/linux/virtio_pmem.h b/include/uapi/linux/virtio_pmem.h index b022787ffb94..d676b3620383 100644 --- a/include/uapi/linux/virtio_pmem.h +++ b/include/uapi/linux/virtio_pmem.h @@ -15,8 +15,8 @@ #include <linux/virtio_config.h> struct virtio_pmem_config { - __u64 start; - __u64 size; + __le64 start; + __le64 size; }; #define VIRTIO_PMEM_REQ_TYPE_FLUSH 0 diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h index cc18ef8825c0..0abaae4027c0 100644 --- a/include/uapi/linux/virtio_scsi.h +++ b/include/uapi/linux/virtio_scsi.h @@ -103,16 +103,16 @@ struct virtio_scsi_event { } __attribute__((packed)); struct virtio_scsi_config { - __u32 num_queues; - __u32 seg_max; - __u32 max_sectors; - __u32 cmd_per_lun; - __u32 event_info_size; - __u32 sense_size; - __u32 cdb_size; - __u16 max_channel; - __u16 max_target; - __u32 max_lun; + __virtio32 num_queues; + __virtio32 seg_max; + __virtio32 max_sectors; + __virtio32 cmd_per_lun; + __virtio32 event_info_size; + __virtio32 sense_size; + __virtio32 cdb_size; + __virtio16 max_channel; + __virtio16 max_target; + __virtio32 max_lun; } __attribute__((packed)); /* Feature Bits */ diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index dbf14c1e2188..f2640e505c4e 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -42,16 +42,16 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev, (__virtio_test_bit((dev), feature)) /** - * virtio_has_iommu_quirk - determine whether this device has the iommu quirk + * virtio_has_dma_quirk - determine whether this device has the DMA quirk * @vdev: the device */ -static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev) +static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev) { /* * Note the reverse polarity of the quirk feature (compared to most * other features), this is for compatibility with legacy systems. */ - return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM); } static inline bool virtio_is_little_endian(struct virtio_device *vdev) diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c index 28fda42e471b..c9bb3957f58a 100644 --- a/virt/lib/irqbypass.c +++ b/virt/lib/irqbypass.c @@ -40,17 +40,21 @@ static int __connect(struct irq_bypass_producer *prod, if (prod->add_consumer) ret = prod->add_consumer(prod, cons); - if (!ret) { - ret = cons->add_producer(cons, prod); - if (ret && prod->del_consumer) - prod->del_consumer(prod, cons); - } + if (ret) + goto err_add_consumer; + + ret = cons->add_producer(cons, prod); + if (ret) + goto err_add_producer; if (cons->start) cons->start(cons); if (prod->start) prod->start(prod); - +err_add_producer: + if (prod->del_consumer) + prod->del_consumer(prod, cons); +err_add_consumer: return ret; } |