summaryrefslogtreecommitdiff
path: root/drivers/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/mdev/mdev_core.c4
-rw-r--r--drivers/vfio/pci/mlx5/cmd.c61
-rw-r--r--drivers/vfio/pci/nvgrace-gpu/main.c169
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c13
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c57
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c38
-rw-r--r--drivers/vfio/pci/virtio/migrate.c6
-rw-r--r--drivers/vfio/platform/vfio_platform_common.c10
8 files changed, 255 insertions, 103 deletions
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index ed4737de4528..f2e686f8f1ef 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -76,7 +76,7 @@ int mdev_register_parent(struct mdev_parent *parent, struct device *dev,
if (ret)
return ret;
- ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
+ ret = class_compat_create_link(mdev_bus_compat_class, dev);
if (ret)
dev_warn(dev, "Failed to create compatibility class link\n");
@@ -98,7 +98,7 @@ void mdev_unregister_parent(struct mdev_parent *parent)
dev_info(parent->dev, "MDEV: Unregistering\n");
down_write(&parent->unreg_sem);
- class_compat_remove_link(mdev_bus_compat_class, parent->dev, NULL);
+ class_compat_remove_link(mdev_bus_compat_class, parent->dev);
device_for_each_child(parent->dev, NULL, mdev_device_remove_cb);
parent_remove_sysfs_files(parent);
up_write(&parent->unreg_sem);
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 7527e277c898..11eda6b207f1 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -408,7 +408,7 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
buf->dma_dir, 0);
}
- /* Undo alloc_pages_bulk_array() */
+ /* Undo alloc_pages_bulk() */
for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
__free_page(sg_page_iter_page(&sg_iter));
sg_free_append_table(&buf->table);
@@ -431,8 +431,8 @@ static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
return -ENOMEM;
do {
- filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
- page_list);
+ filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill,
+ page_list);
if (!filled) {
ret = -ENOMEM;
goto err;
@@ -1342,7 +1342,7 @@ static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf)
{
int i;
- /* Undo alloc_pages_bulk_array() */
+ /* Undo alloc_pages_bulk() */
for (i = 0; i < recv_buf->npages; i++)
__free_page(recv_buf->page_list[i]);
@@ -1361,9 +1361,9 @@ static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf,
return -ENOMEM;
for (;;) {
- filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT,
- npages - done,
- recv_buf->page_list + done);
+ filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT,
+ npages - done,
+ recv_buf->page_list + done);
if (!filled)
goto err;
@@ -1517,7 +1517,8 @@ int mlx5vf_start_page_tracker(struct vfio_device *vdev,
struct mlx5_vhca_qp *host_qp;
struct mlx5_vhca_qp *fw_qp;
struct mlx5_core_dev *mdev;
- u32 max_msg_size = PAGE_SIZE;
+ u32 log_max_msg_size;
+ u32 max_msg_size;
u64 rq_size = SZ_2M;
u32 max_recv_wr;
int err;
@@ -1534,6 +1535,12 @@ int mlx5vf_start_page_tracker(struct vfio_device *vdev,
}
mdev = mvdev->mdev;
+ log_max_msg_size = MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_msg_size);
+ max_msg_size = (1ULL << log_max_msg_size);
+ /* The RQ must hold at least 4 WQEs/messages for successful QP creation */
+ if (rq_size < 4 * max_msg_size)
+ rq_size = 4 * max_msg_size;
+
memset(tracker, 0, sizeof(*tracker));
tracker->uar = mlx5_get_uars_page(mdev);
if (IS_ERR(tracker->uar)) {
@@ -1623,25 +1630,41 @@ set_report_output(u32 size, int index, struct mlx5_vhca_qp *qp,
{
u32 entry_size = MLX5_ST_SZ_BYTES(page_track_report_entry);
u32 nent = size / entry_size;
+ u32 nent_in_page;
+ u32 nent_to_set;
struct page *page;
+ u32 page_offset;
+ u32 page_index;
+ u32 buf_offset;
+ void *kaddr;
u64 addr;
u64 *buf;
int i;
- if (WARN_ON(index >= qp->recv_buf.npages ||
+ buf_offset = index * qp->max_msg_size;
+ if (WARN_ON(buf_offset + size >= qp->recv_buf.npages * PAGE_SIZE ||
(nent > qp->max_msg_size / entry_size)))
return;
- page = qp->recv_buf.page_list[index];
- buf = kmap_local_page(page);
- for (i = 0; i < nent; i++) {
- addr = MLX5_GET(page_track_report_entry, buf + i,
- dirty_address_low);
- addr |= (u64)MLX5_GET(page_track_report_entry, buf + i,
- dirty_address_high) << 32;
- iova_bitmap_set(dirty, addr, qp->tracked_page_size);
- }
- kunmap_local(buf);
+ do {
+ page_index = buf_offset / PAGE_SIZE;
+ page_offset = buf_offset % PAGE_SIZE;
+ nent_in_page = (PAGE_SIZE - page_offset) / entry_size;
+ page = qp->recv_buf.page_list[page_index];
+ kaddr = kmap_local_page(page);
+ buf = kaddr + page_offset;
+ nent_to_set = min(nent, nent_in_page);
+ for (i = 0; i < nent_to_set; i++) {
+ addr = MLX5_GET(page_track_report_entry, buf + i,
+ dirty_address_low);
+ addr |= (u64)MLX5_GET(page_track_report_entry, buf + i,
+ dirty_address_high) << 32;
+ iova_bitmap_set(dirty, addr, qp->tracked_page_size);
+ }
+ kunmap_local(kaddr);
+ buf_offset += (nent_to_set * entry_size);
+ nent -= nent_to_set;
+ } while (nent);
}
static void
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
index a467085038f0..e5ac39c4cc6b 100644
--- a/drivers/vfio/pci/nvgrace-gpu/main.c
+++ b/drivers/vfio/pci/nvgrace-gpu/main.c
@@ -5,6 +5,8 @@
#include <linux/sizes.h>
#include <linux/vfio_pci_core.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
/*
* The device memory usable to the workloads running in the VM is cached
@@ -17,12 +19,21 @@
#define RESMEM_REGION_INDEX VFIO_PCI_BAR2_REGION_INDEX
#define USEMEM_REGION_INDEX VFIO_PCI_BAR4_REGION_INDEX
-/* Memory size expected as non cached and reserved by the VM driver */
-#define RESMEM_SIZE SZ_1G
-
/* A hardwired and constant ABI value between the GPU FW and VFIO driver. */
#define MEMBLK_SIZE SZ_512M
+#define DVSEC_BITMAP_OFFSET 0xA
+#define MIG_SUPPORTED_WITH_CACHED_RESMEM BIT(0)
+
+#define GPU_CAP_DVSEC_REGISTER 3
+
+#define C2C_LINK_BAR0_OFFSET 0x1498
+#define HBM_TRAINING_BAR0_OFFSET 0x200BC
+#define STATUS_READY 0xFF
+
+#define POLL_QUANTUM_MS 1000
+#define POLL_TIMEOUT_MS (30 * 1000)
+
/*
* The state of the two device memory region - resmem and usemem - is
* saved as struct mem_region.
@@ -46,6 +57,7 @@ struct nvgrace_gpu_pci_core_device {
struct mem_region resmem;
/* Lock to control device memory kernel mapping */
struct mutex remap_lock;
+ bool has_mig_hw_bug;
};
static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev)
@@ -66,7 +78,7 @@ nvgrace_gpu_memregion(int index,
if (index == USEMEM_REGION_INDEX)
return &nvdev->usemem;
- if (index == RESMEM_REGION_INDEX)
+ if (nvdev->resmem.memlength && index == RESMEM_REGION_INDEX)
return &nvdev->resmem;
return NULL;
@@ -751,40 +763,67 @@ nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev,
u64 memphys, u64 memlength)
{
int ret = 0;
+ u64 resmem_size = 0;
/*
- * The VM GPU device driver needs a non-cacheable region to support
- * the MIG feature. Since the device memory is mapped as NORMAL cached,
- * carve out a region from the end with a different NORMAL_NC
- * property (called as reserved memory and represented as resmem). This
- * region then is exposed as a 64b BAR (region 2 and 3) to the VM, while
- * exposing the rest (termed as usable memory and represented using usemem)
- * as cacheable 64b BAR (region 4 and 5).
+ * On Grace Hopper systems, the VM GPU device driver needs a non-cacheable
+ * region to support the MIG feature owing to a hardware bug. Since the
+ * device memory is mapped as NORMAL cached, carve out a region from the end
+ * with a different NORMAL_NC property (called as reserved memory and
+ * represented as resmem). This region then is exposed as a 64b BAR
+ * (region 2 and 3) to the VM, while exposing the rest (termed as usable
+ * memory and represented using usemem) as cacheable 64b BAR (region 4 and 5).
*
* devmem (memlength)
* |-------------------------------------------------|
* | |
* usemem.memphys resmem.memphys
+ *
+ * This hardware bug is fixed on the Grace Blackwell platforms and the
+ * presence of the bug can be determined through nvdev->has_mig_hw_bug.
+ * Thus on systems with the hardware fix, there is no need to partition
+ * the GPU device memory and the entire memory is usable and mapped as
+ * NORMAL cached (i.e. resmem size is 0).
*/
+ if (nvdev->has_mig_hw_bug)
+ resmem_size = SZ_1G;
+
nvdev->usemem.memphys = memphys;
/*
* The device memory exposed to the VM is added to the kernel by the
- * VM driver module in chunks of memory block size. Only the usable
- * memory (usemem) is added to the kernel for usage by the VM
- * workloads. Make the usable memory size memblock aligned.
+ * VM driver module in chunks of memory block size. Note that only the
+ * usable memory (usemem) is added to the kernel for usage by the VM
+ * workloads.
*/
- if (check_sub_overflow(memlength, RESMEM_SIZE,
+ if (check_sub_overflow(memlength, resmem_size,
&nvdev->usemem.memlength)) {
ret = -EOVERFLOW;
goto done;
}
/*
- * The USEMEM part of the device memory has to be MEMBLK_SIZE
- * aligned. This is a hardwired ABI value between the GPU FW and
- * VFIO driver. The VM device driver is also aware of it and make
- * use of the value for its calculation to determine USEMEM size.
+ * The usemem region is exposed as a 64B Bar composed of region 4 and 5.
+ * Calculate and save the BAR size for the region.
+ */
+ nvdev->usemem.bar_size = roundup_pow_of_two(nvdev->usemem.memlength);
+
+ /*
+ * If the hardware has the fix for MIG, there is no requirement
+ * for splitting the device memory to create RESMEM. The entire
+ * device memory is usable and will be USEMEM. Return here for
+ * such case.
+ */
+ if (!nvdev->has_mig_hw_bug)
+ goto done;
+
+ /*
+ * When the device memory is split to workaround the MIG bug on
+ * Grace Hopper, the USEMEM part of the device memory has to be
+ * MEMBLK_SIZE aligned. This is a hardwired ABI value between the
+ * GPU FW and VFIO driver. The VM device driver is also aware of it
+ * and make use of the value for its calculation to determine USEMEM
+ * size. Note that the device memory may not be 512M aligned.
*/
nvdev->usemem.memlength = round_down(nvdev->usemem.memlength,
MEMBLK_SIZE);
@@ -803,15 +842,93 @@ nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev,
}
/*
- * The memory regions are exposed as BARs. Calculate and save
- * the BAR size for them.
+ * The resmem region is exposed as a 64b BAR composed of region 2 and 3
+ * for Grace Hopper. Calculate and save the BAR size for the region.
*/
- nvdev->usemem.bar_size = roundup_pow_of_two(nvdev->usemem.memlength);
nvdev->resmem.bar_size = roundup_pow_of_two(nvdev->resmem.memlength);
done:
return ret;
}
+static bool nvgrace_gpu_has_mig_hw_bug(struct pci_dev *pdev)
+{
+ int pcie_dvsec;
+ u16 dvsec_ctrl16;
+
+ pcie_dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_NVIDIA,
+ GPU_CAP_DVSEC_REGISTER);
+
+ if (pcie_dvsec) {
+ pci_read_config_word(pdev,
+ pcie_dvsec + DVSEC_BITMAP_OFFSET,
+ &dvsec_ctrl16);
+
+ if (dvsec_ctrl16 & MIG_SUPPORTED_WITH_CACHED_RESMEM)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * To reduce the system bootup time, the HBM training has
+ * been moved out of the UEFI on the Grace-Blackwell systems.
+ *
+ * The onus of checking whether the HBM training has completed
+ * thus falls on the module. The HBM training status can be
+ * determined from a BAR0 register.
+ *
+ * Similarly, another BAR0 register exposes the status of the
+ * CPU-GPU chip-to-chip (C2C) cache coherent interconnect.
+ *
+ * Poll these register and check for 30s. If the HBM training is
+ * not complete or if the C2C link is not ready, fail the probe.
+ *
+ * While the wait is not required on Grace Hopper systems, it
+ * is beneficial to make the check to ensure the device is in an
+ * expected state.
+ *
+ * Ensure that the BAR0 region is enabled before accessing the
+ * registers.
+ */
+static int nvgrace_gpu_wait_device_ready(struct pci_dev *pdev)
+{
+ unsigned long timeout = jiffies + msecs_to_jiffies(POLL_TIMEOUT_MS);
+ void __iomem *io;
+ int ret = -ETIME;
+
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ ret = pci_request_selected_regions(pdev, 1 << 0, KBUILD_MODNAME);
+ if (ret)
+ goto request_region_exit;
+
+ io = pci_iomap(pdev, 0, 0);
+ if (!io) {
+ ret = -ENOMEM;
+ goto iomap_exit;
+ }
+
+ do {
+ if ((ioread32(io + C2C_LINK_BAR0_OFFSET) == STATUS_READY) &&
+ (ioread32(io + HBM_TRAINING_BAR0_OFFSET) == STATUS_READY)) {
+ ret = 0;
+ goto reg_check_exit;
+ }
+ msleep(POLL_QUANTUM_MS);
+ } while (!time_after(jiffies, timeout));
+
+reg_check_exit:
+ pci_iounmap(pdev, io);
+iomap_exit:
+ pci_release_selected_regions(pdev, 1 << 0);
+request_region_exit:
+ pci_disable_device(pdev);
+ return ret;
+}
+
static int nvgrace_gpu_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
@@ -820,6 +937,10 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev,
u64 memphys, memlength;
int ret;
+ ret = nvgrace_gpu_wait_device_ready(pdev);
+ if (ret)
+ return ret;
+
ret = nvgrace_gpu_fetch_memory_property(pdev, &memphys, &memlength);
if (!ret)
ops = &nvgrace_gpu_pci_ops;
@@ -832,6 +953,8 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev,
dev_set_drvdata(&pdev->dev, &nvdev->core_device);
if (ops == &nvgrace_gpu_pci_ops) {
+ nvdev->has_mig_hw_bug = nvgrace_gpu_has_mig_hw_bug(pdev);
+
/*
* Device memory properties are identified in the host ACPI
* table. Set the nvgrace_gpu_pci_core_device structure.
@@ -868,6 +991,8 @@ static const struct pci_device_id nvgrace_gpu_vfio_pci_table[] = {
{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2345) },
/* GH200 SKU */
{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2348) },
+ /* GB200 SKU */
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2941) },
{}
};
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index ea2745c1ac5e..94142581c98c 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -511,13 +511,13 @@ static void vfio_bar_fixup(struct vfio_pci_core_device *vdev)
mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1);
mask |= PCI_ROM_ADDRESS_ENABLE;
*vbar &= cpu_to_le32((u32)mask);
- } else if (pdev->resource[PCI_ROM_RESOURCE].flags &
- IORESOURCE_ROM_SHADOW) {
- mask = ~(0x20000 - 1);
+ } else if (pdev->rom && pdev->romlen) {
+ mask = ~(roundup_pow_of_two(pdev->romlen) - 1);
mask |= PCI_ROM_ADDRESS_ENABLE;
*vbar &= cpu_to_le32((u32)mask);
- } else
+ } else {
*vbar = 0;
+ }
vdev->bardirty = false;
}
@@ -1389,11 +1389,12 @@ static int vfio_ext_cap_len(struct vfio_pci_core_device *vdev, u16 ecap, u16 epo
switch (ecap) {
case PCI_EXT_CAP_ID_VNDR:
- ret = pci_read_config_dword(pdev, epos + PCI_VSEC_HDR, &dword);
+ ret = pci_read_config_dword(pdev, epos + PCI_VNDR_HEADER,
+ &dword);
if (ret)
return pcibios_err_to_errno(ret);
- return dword >> PCI_VSEC_HDR_LEN_SHIFT;
+ return PCI_VNDR_HEADER_LEN(dword);
case PCI_EXT_CAP_ID_VC:
case PCI_EXT_CAP_ID_VC9:
case PCI_EXT_CAP_ID_MFVC:
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 1ab58da9f38a..586e49efb81b 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1054,31 +1054,27 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
info.flags = 0;
+ info.size = 0;
- /* Report the BAR size, not the ROM size */
- info.size = pci_resource_len(pdev, info.index);
- if (!info.size) {
- /* Shadow ROMs appear as PCI option ROMs */
- if (pdev->resource[PCI_ROM_RESOURCE].flags &
- IORESOURCE_ROM_SHADOW)
- info.size = 0x20000;
- else
- break;
- }
-
- /*
- * Is it really there? Enable memory decode for implicit access
- * in pci_map_rom().
- */
- cmd = vfio_pci_memory_lock_and_enable(vdev);
- io = pci_map_rom(pdev, &size);
- if (io) {
+ if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) {
+ /*
+ * Check ROM content is valid. Need to enable memory
+ * decode for ROM access in pci_map_rom().
+ */
+ cmd = vfio_pci_memory_lock_and_enable(vdev);
+ io = pci_map_rom(pdev, &size);
+ if (io) {
+ info.flags = VFIO_REGION_INFO_FLAG_READ;
+ /* Report the BAR size, not the ROM size. */
+ info.size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+ pci_unmap_rom(pdev, io);
+ }
+ vfio_pci_memory_unlock_and_restore(vdev, cmd);
+ } else if (pdev->rom && pdev->romlen) {
info.flags = VFIO_REGION_INFO_FLAG_READ;
- pci_unmap_rom(pdev, io);
- } else {
- info.size = 0;
+ /* Report BAR size as power of two. */
+ info.size = roundup_pow_of_two(pdev->romlen);
}
- vfio_pci_memory_unlock_and_restore(vdev, cmd);
break;
}
@@ -1661,14 +1657,15 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff;
vm_fault_t ret = VM_FAULT_SIGBUS;
- if (order && (vmf->address & ((PAGE_SIZE << order) - 1) ||
+ pfn = vma_to_pfn(vma) + pgoff;
+
+ if (order && (pfn & ((1 << order) - 1) ||
+ vmf->address & ((PAGE_SIZE << order) - 1) ||
vmf->address + (PAGE_SIZE << order) > vma->vm_end)) {
ret = VM_FAULT_FALLBACK;
goto out;
}
- pfn = vma_to_pfn(vma);
-
down_read(&vdev->memory_lock);
if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
@@ -1676,18 +1673,18 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
switch (order) {
case 0:
- ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff);
+ ret = vmf_insert_pfn(vma, vmf->address, pfn);
break;
#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
case PMD_ORDER:
- ret = vmf_insert_pfn_pmd(vmf, __pfn_to_pfn_t(pfn + pgoff,
- PFN_DEV), false);
+ ret = vmf_insert_pfn_pmd(vmf,
+ __pfn_to_pfn_t(pfn, PFN_DEV), false);
break;
#endif
#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
case PUD_ORDER:
- ret = vmf_insert_pfn_pud(vmf, __pfn_to_pfn_t(pfn + pgoff,
- PFN_DEV), false);
+ ret = vmf_insert_pfn_pud(vmf,
+ __pfn_to_pfn_t(pfn, PFN_DEV), false);
break;
#endif
default:
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 66b72c289284..6192788c8ba3 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -16,6 +16,7 @@
#include <linux/io.h>
#include <linux/vfio.h>
#include <linux/vgaarb.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
#include "vfio_pci_priv.h"
@@ -61,9 +62,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
VFIO_IOWRITE(8)
VFIO_IOWRITE(16)
VFIO_IOWRITE(32)
-#ifdef iowrite64
VFIO_IOWRITE(64)
-#endif
#define VFIO_IOREAD(size) \
int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \
@@ -89,9 +88,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
VFIO_IOREAD(8)
VFIO_IOREAD(16)
VFIO_IOREAD(32)
-#ifdef ioread64
VFIO_IOREAD(64)
-#endif
#define VFIO_IORDWR(size) \
static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\
@@ -127,9 +124,7 @@ static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\
VFIO_IORDWR(8)
VFIO_IORDWR(16)
VFIO_IORDWR(32)
-#if defined(ioread64) && defined(iowrite64)
VFIO_IORDWR(64)
-#endif
/*
* Read or write from an __iomem region (MMIO or I/O port) with an excluded
@@ -155,7 +150,6 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
else
fillable = 0;
-#if defined(ioread64) && defined(iowrite64)
if (fillable >= 8 && !(off % 8)) {
ret = vfio_pci_iordwr64(vdev, iswrite, test_mem,
io, buf, off, &filled);
@@ -163,7 +157,6 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
return ret;
} else
-#endif
if (fillable >= 4 && !(off % 4)) {
ret = vfio_pci_iordwr32(vdev, iswrite, test_mem,
io, buf, off, &filled);
@@ -244,9 +237,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
if (pci_resource_start(pdev, bar))
end = pci_resource_len(pdev, bar);
- else if (bar == PCI_ROM_RESOURCE &&
- pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
- end = 0x20000;
+ else if (bar == PCI_ROM_RESOURCE && pdev->rom && pdev->romlen)
+ end = roundup_pow_of_two(pdev->romlen);
else
return -EINVAL;
@@ -261,11 +253,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
* excluded range at the end of the actual ROM. This makes
* filling large ROM BARs much faster.
*/
- io = pci_map_rom(pdev, &x_start);
- if (!io) {
- done = -ENOMEM;
- goto out;
+ if (pci_resource_start(pdev, bar)) {
+ io = pci_map_rom(pdev, &x_start);
+ } else {
+ io = ioremap(pdev->rom, pdev->romlen);
+ x_start = pdev->romlen;
}
+ if (!io)
+ return -ENOMEM;
x_end = end;
} else {
int ret = vfio_pci_core_setup_barmap(vdev, bar);
@@ -288,8 +283,13 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
if (done >= 0)
*ppos += done;
- if (bar == PCI_ROM_RESOURCE)
- pci_unmap_rom(pdev, io);
+ if (bar == PCI_ROM_RESOURCE) {
+ if (pci_resource_start(pdev, bar))
+ pci_unmap_rom(pdev, io);
+ else
+ iounmap(io);
+ }
+
out:
return done;
}
@@ -381,12 +381,10 @@ static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem,
ioeventfd->data, ioeventfd->addr);
break;
-#ifdef iowrite64
case 8:
vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem,
ioeventfd->data, ioeventfd->addr);
break;
-#endif
}
}
@@ -440,10 +438,8 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
pos >= vdev->msix_offset + vdev->msix_size))
return -EINVAL;
-#ifndef iowrite64
if (count == 8)
return -EINVAL;
-#endif
ret = vfio_pci_core_setup_barmap(vdev, bar);
if (ret)
diff --git a/drivers/vfio/pci/virtio/migrate.c b/drivers/vfio/pci/virtio/migrate.c
index ee54f4c17857..ba92bb4e9af9 100644
--- a/drivers/vfio/pci/virtio/migrate.c
+++ b/drivers/vfio/pci/virtio/migrate.c
@@ -77,8 +77,8 @@ static int virtiovf_add_migration_pages(struct virtiovf_data_buffer *buf,
return -ENOMEM;
do {
- filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
- page_list);
+ filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill,
+ page_list);
if (!filled) {
ret = -ENOMEM;
goto err;
@@ -112,7 +112,7 @@ static void virtiovf_free_data_buffer(struct virtiovf_data_buffer *buf)
{
struct sg_page_iter sg_iter;
- /* Undo alloc_pages_bulk_array() */
+ /* Undo alloc_pages_bulk() */
for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
__free_page(sg_page_iter_page(&sg_iter));
sg_free_append_table(&buf->table);
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index e53757d1d095..3bf1043cd795 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -388,6 +388,11 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region *reg,
{
unsigned int done = 0;
+ if (off >= reg->size)
+ return -EINVAL;
+
+ count = min_t(size_t, count, reg->size - off);
+
if (!reg->ioaddr) {
reg->ioaddr =
ioremap(reg->addr, reg->size);
@@ -467,6 +472,11 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region *reg,
{
unsigned int done = 0;
+ if (off >= reg->size)
+ return -EINVAL;
+
+ count = min_t(size_t, count, reg->size - off);
+
if (!reg->ioaddr) {
reg->ioaddr =
ioremap(reg->addr, reg->size);