diff options
Diffstat (limited to 'drivers/vfio/pci/vfio_pci_config.c')
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 56 |
1 files changed, 53 insertions, 3 deletions
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 6e58b4bf7a60..9343f597182d 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -402,11 +402,14 @@ bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev) u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]); /* + * Memory region cannot be accessed if device power state is D3. + * * SR-IOV VF memory enable is handled by the MSE bit in the * PF SR-IOV capability, there's therefore no need to trigger * faults based on the virtual value. */ - return pdev->no_command_memory || (cmd & PCI_COMMAND_MEMORY); + return pdev->current_state < PCI_D3hot && + (pdev->no_command_memory || (cmd & PCI_COMMAND_MEMORY)); } /* @@ -692,6 +695,22 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm) return 0; } +/* + * It takes all the required locks to protect the access of power related + * variables and then invokes vfio_pci_set_power_state(). + */ +static void vfio_lock_and_set_power_state(struct vfio_pci_core_device *vdev, + pci_power_t state) +{ + if (state >= PCI_D3hot) + vfio_pci_zap_and_down_write_memory_lock(vdev); + else + down_write(&vdev->memory_lock); + + vfio_pci_set_power_state(vdev, state); + up_write(&vdev->memory_lock); +} + static int vfio_pm_config_write(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 val) @@ -718,7 +737,7 @@ static int vfio_pm_config_write(struct vfio_pci_core_device *vdev, int pos, break; } - vfio_pci_set_power_state(vdev, state); + vfio_lock_and_set_power_state(vdev, state); } return count; @@ -739,11 +758,28 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm) p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); /* + * The guests can't process PME events. If any PME event will be + * generated, then it will be mostly handled in the host and the + * host will clear the PME_STATUS. So virtualize PME_Support bits. + * The vconfig bits will be cleared during device capability + * initialization. + */ + p_setw(perm, PCI_PM_PMC, PCI_PM_CAP_PME_MASK, NO_WRITE); + + /* * Power management is defined *per function*, so we can let * the user change power state, but we trap and initiate the * change ourselves, so the state bits are read-only. + * + * The guest can't process PME from D3cold so virtualize PME_Status + * and PME_En bits. The vconfig bits will be cleared during device + * capability initialization. */ - p_setd(perm, PCI_PM_CTRL, NO_VIRT, ~PCI_PM_CTRL_STATE_MASK); + p_setd(perm, PCI_PM_CTRL, + PCI_PM_CTRL_PME_ENABLE | PCI_PM_CTRL_PME_STATUS, + ~(PCI_PM_CTRL_PME_ENABLE | PCI_PM_CTRL_PME_STATUS | + PCI_PM_CTRL_STATE_MASK)); + return 0; } @@ -1412,6 +1448,17 @@ static int vfio_ext_cap_len(struct vfio_pci_core_device *vdev, u16 ecap, u16 epo return 0; } +static void vfio_update_pm_vconfig_bytes(struct vfio_pci_core_device *vdev, + int offset) +{ + __le16 *pmc = (__le16 *)&vdev->vconfig[offset + PCI_PM_PMC]; + __le16 *ctrl = (__le16 *)&vdev->vconfig[offset + PCI_PM_CTRL]; + + /* Clear vconfig PME_Support, PME_Status, and PME_En bits */ + *pmc &= ~cpu_to_le16(PCI_PM_CAP_PME_MASK); + *ctrl &= ~cpu_to_le16(PCI_PM_CTRL_PME_ENABLE | PCI_PM_CTRL_PME_STATUS); +} + static int vfio_fill_vconfig_bytes(struct vfio_pci_core_device *vdev, int offset, int size) { @@ -1535,6 +1582,9 @@ static int vfio_cap_init(struct vfio_pci_core_device *vdev) if (ret) return ret; + if (cap == PCI_CAP_ID_PM) + vfio_update_pm_vconfig_bytes(vdev, pos); + prev = &vdev->vconfig[pos + PCI_CAP_LIST_NEXT]; pos = next; caps++; |