diff options
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r-- | drivers/iommu/intel/iommu.c | 277 |
1 files changed, 118 insertions, 159 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7c2f4bd33582..b871a6afd803 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -876,7 +876,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, return; } /* For request-without-pasid, get the pasid from context entry */ - if (intel_iommu_sm && pasid == INVALID_IOASID) + if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) pasid = PASID_RID2PASID; dir_index = pasid >> PASID_PDE_SHIFT; @@ -915,8 +915,6 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, int level = agaw_to_level(domain->agaw); int offset; - BUG_ON(!domain->pgd); - if (!domain_pfn_supported(domain, pfn)) /* Address beyond IOMMU's addressing capabilities. */ return NULL; @@ -1005,9 +1003,9 @@ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned int large_page; struct dma_pte *first_pte, *pte; - BUG_ON(!domain_pfn_supported(domain, start_pfn)); - BUG_ON(!domain_pfn_supported(domain, last_pfn)); - BUG_ON(start_pfn > last_pfn); + if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) || + WARN_ON(start_pfn > last_pfn)) + return; /* we don't need lock here; nobody else touches the iova range */ do { @@ -1166,9 +1164,9 @@ next: static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn, struct list_head *freelist) { - BUG_ON(!domain_pfn_supported(domain, start_pfn)); - BUG_ON(!domain_pfn_supported(domain, last_pfn)); - BUG_ON(start_pfn > last_pfn); + if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) || + WARN_ON(start_pfn > last_pfn)) + return; /* we don't need lock here; nobody else touches the iova range */ dma_pte_clear_level(domain, agaw_to_level(domain->agaw), @@ -1272,7 +1270,9 @@ static void __iommu_flush_context(struct intel_iommu *iommu, | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); break; default: - BUG(); + pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n", + iommu->name, type); + return; } val |= DMA_CCMD_ICC; @@ -1308,7 +1308,9 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, val_iva = size_order | addr; break; default: - BUG(); + pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n", + iommu->name, type); + return; } /* Note: set drain read/write */ #if 0 @@ -1406,20 +1408,6 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); - /* For IOMMU that supports device IOTLB throttling (DIT), we assign - * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge - * queue depth at PF level. If DIT is not set, PFSID will be treated as - * reserved, which should be set to 0. - */ - if (!ecap_dit(info->iommu->ecap)) - info->pfsid = 0; - else { - struct pci_dev *pf_pdev; - - /* pdev will be returned if device is not a vf */ - pf_pdev = pci_physfn(pdev); - info->pfsid = pci_dev_id(pf_pdev); - } /* The PCIe spec, in its wisdom, declares that the behaviour of the device if you enable PASID support after ATS support is @@ -1429,16 +1417,10 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (info->pri_supported && - (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) && - !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH)) - info->pri_enabled = 1; - if (info->ats_supported && pci_ats_page_aligned(pdev) && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { info->ats_enabled = 1; domain_update_iotlb(info->domain); - info->ats_qdep = pci_ats_queue_depth(pdev); } } @@ -1457,11 +1439,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info) domain_update_iotlb(info->domain); } - if (info->pri_enabled) { - pci_disable_pri(pdev); - info->pri_enabled = 0; - } - if (info->pasid_enabled) { pci_disable_pasid(pdev); info->pasid_enabled = 0; @@ -1508,7 +1485,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; u16 did = domain_id_iommu(domain, iommu); - BUG_ON(pages == 0); + if (WARN_ON(!pages)) + return; if (ih) ih = 1 << 6; @@ -1722,9 +1700,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu) if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); } - if (vccap_pasid(iommu->vccap)) - ioasid_unregister_allocator(&iommu->pasid_allocator); - #endif } @@ -1895,7 +1870,7 @@ context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) */ static inline void context_set_sm_dte(struct context_entry *context) { - context->lo |= (1 << 2); + context->lo |= BIT_ULL(2); } /* @@ -1904,7 +1879,7 @@ static inline void context_set_sm_dte(struct context_entry *context) */ static inline void context_set_sm_pre(struct context_entry *context) { - context->lo |= (1 << 4); + context->lo |= BIT_ULL(4); } /* Convert value to context PASID directory size field coding. */ @@ -1930,8 +1905,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - BUG_ON(!domain->pgd); - spin_lock(&iommu->lock); ret = -ENOMEM; context = iommu_context_addr(iommu, bus, devfn, 1); @@ -2183,7 +2156,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, phys_addr_t pteval; u64 attr; - BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); + if (unlikely(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1))) + return -EINVAL; if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; @@ -2341,8 +2315,6 @@ static int domain_setup_first_level(struct intel_iommu *iommu, if (level != 4 && level != 5) return -EINVAL; - if (pasid != PASID_RID2PASID) - flags |= PASID_FLAG_SUPERVISOR_MODE; if (level == 5) flags |= PASID_FLAG_FL5LP; @@ -2797,85 +2769,6 @@ out_unmap: return ret; } -#ifdef CONFIG_INTEL_IOMMU_SVM -static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data) -{ - struct intel_iommu *iommu = data; - ioasid_t ioasid; - - if (!iommu) - return INVALID_IOASID; - /* - * VT-d virtual command interface always uses the full 20 bit - * PASID range. Host can partition guest PASID range based on - * policies but it is out of guest's control. - */ - if (min < PASID_MIN || max > intel_pasid_max_id) - return INVALID_IOASID; - - if (vcmd_alloc_pasid(iommu, &ioasid)) - return INVALID_IOASID; - - return ioasid; -} - -static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data) -{ - struct intel_iommu *iommu = data; - - if (!iommu) - return; - /* - * Sanity check the ioasid owner is done at upper layer, e.g. VFIO - * We can only free the PASID when all the devices are unbound. - */ - if (ioasid_find(NULL, ioasid, NULL)) { - pr_alert("Cannot free active IOASID %d\n", ioasid); - return; - } - vcmd_free_pasid(iommu, ioasid); -} - -static void register_pasid_allocator(struct intel_iommu *iommu) -{ - /* - * If we are running in the host, no need for custom allocator - * in that PASIDs are allocated from the host system-wide. - */ - if (!cap_caching_mode(iommu->cap)) - return; - - if (!sm_supported(iommu)) { - pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n"); - return; - } - - /* - * Register a custom PASID allocator if we are running in a guest, - * guest PASID must be obtained via virtual command interface. - * There can be multiple vIOMMUs in each guest but only one allocator - * is active. All vIOMMU allocators will eventually be calling the same - * host allocator. - */ - if (!vccap_pasid(iommu->vccap)) - return; - - pr_info("Register custom PASID allocator\n"); - iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc; - iommu->pasid_allocator.free = intel_vcmd_ioasid_free; - iommu->pasid_allocator.pdata = (void *)iommu; - if (ioasid_register_allocator(&iommu->pasid_allocator)) { - pr_warn("Custom PASID allocator failed, scalable mode disabled\n"); - /* - * Disable scalable mode on this IOMMU if there - * is no custom allocator. Mixing SM capable vIOMMU - * and non-SM vIOMMU are not supported. - */ - intel_iommu_sm = 0; - } -} -#endif - static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; @@ -2964,9 +2857,6 @@ static int __init init_dmars(void) */ for_each_active_iommu(iommu, drhd) { iommu_flush_write_buffer(iommu); -#ifdef CONFIG_INTEL_IOMMU_SVM - register_pasid_allocator(iommu); -#endif iommu_set_root_entry(iommu); } @@ -3760,8 +3650,8 @@ static ssize_t version_show(struct device *dev, { struct intel_iommu *iommu = dev_to_intel_iommu(dev); u32 ver = readl(iommu->reg + DMAR_VER_REG); - return sprintf(buf, "%d:%d\n", - DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); + return sysfs_emit(buf, "%d:%d\n", + DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); } static DEVICE_ATTR_RO(version); @@ -3769,7 +3659,7 @@ static ssize_t address_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%llx\n", iommu->reg_phys); + return sysfs_emit(buf, "%llx\n", iommu->reg_phys); } static DEVICE_ATTR_RO(address); @@ -3777,7 +3667,7 @@ static ssize_t cap_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%llx\n", iommu->cap); + return sysfs_emit(buf, "%llx\n", iommu->cap); } static DEVICE_ATTR_RO(cap); @@ -3785,7 +3675,7 @@ static ssize_t ecap_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%llx\n", iommu->ecap); + return sysfs_emit(buf, "%llx\n", iommu->ecap); } static DEVICE_ATTR_RO(ecap); @@ -3793,7 +3683,7 @@ static ssize_t domains_supported_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap)); + return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap)); } static DEVICE_ATTR_RO(domains_supported); @@ -3801,8 +3691,9 @@ static ssize_t domains_used_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids, - cap_ndoms(iommu->cap))); + return sysfs_emit(buf, "%d\n", + bitmap_weight(iommu->domain_ids, + cap_ndoms(iommu->cap))); } static DEVICE_ATTR_RO(domains_used); @@ -4340,8 +4231,9 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, - GFP_ATOMIC)); + if (unlikely(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, + &level, GFP_ATOMIC))) + return 0; if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -4521,6 +4413,17 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) dmar_ats_supported(pdev, iommu)) { info->ats_supported = 1; info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); + + /* + * For IOMMU that supports device IOTLB throttling + * (DIT), we assign PFSID to the invalidation desc + * of a VF such that IOMMU HW can gauge queue depth + * at PF level. If DIT is not set, PFSID will be + * treated as reserved, which should be set to 0. + */ + if (ecap_dit(iommu->ecap)) + info->pfsid = pci_dev_id(pci_physfn(pdev)); + info->ats_qdep = pci_ats_queue_depth(pdev); } if (sm_supported(iommu)) { if (pasid_supported(iommu)) { @@ -4638,7 +4541,6 @@ static int intel_iommu_enable_sva(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu; - int ret; if (!info || dmar_disabled) return -EINVAL; @@ -4650,45 +4552,102 @@ static int intel_iommu_enable_sva(struct device *dev) if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) return -ENODEV; - if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + if (!info->pasid_enabled || !info->ats_enabled) return -EINVAL; - ret = iopf_queue_add_device(iommu->iopf_queue, dev); - if (ret) - return ret; + /* + * Devices having device-specific I/O fault handling should not + * support PCI/PRI. The IOMMU side has no means to check the + * capability of device-specific IOPF. Therefore, IOMMU can only + * default that if the device driver enables SVA on a non-PRI + * device, it will handle IOPF in its own way. + */ + if (!info->pri_supported) + return 0; - ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); - if (ret) - iopf_queue_remove_device(iommu->iopf_queue, dev); + /* Devices supporting PRI should have it enabled. */ + if (!info->pri_enabled) + return -EINVAL; - return ret; + return 0; } -static int intel_iommu_disable_sva(struct device *dev) +static int intel_iommu_enable_iopf(struct device *dev) { + struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu = info->iommu; + struct intel_iommu *iommu; int ret; - ret = iommu_unregister_device_fault_handler(dev); + if (!pdev || !info || !info->ats_enabled || !info->pri_supported) + return -ENODEV; + + if (info->pri_enabled) + return -EBUSY; + + iommu = info->iommu; + if (!iommu) + return -EINVAL; + + /* PASID is required in PRG Response Message. */ + if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) + return -EINVAL; + + ret = pci_reset_pri(pdev); + if (ret) + return ret; + + ret = iopf_queue_add_device(iommu->iopf_queue, dev); if (ret) return ret; - ret = iopf_queue_remove_device(iommu->iopf_queue, dev); + ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + if (ret) + goto iopf_remove_device; + + ret = pci_enable_pri(pdev, PRQ_DEPTH); if (ret) - iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + goto iopf_unregister_handler; + info->pri_enabled = 1; + + return 0; + +iopf_unregister_handler: + iommu_unregister_device_fault_handler(dev); +iopf_remove_device: + iopf_queue_remove_device(iommu->iopf_queue, dev); return ret; } -static int intel_iommu_enable_iopf(struct device *dev) +static int intel_iommu_disable_iopf(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; - if (info && info->pri_supported) - return 0; + if (!info->pri_enabled) + return -EINVAL; + + /* + * PCIe spec states that by clearing PRI enable bit, the Page + * Request Interface will not issue new page requests, but has + * outstanding page requests that have been transmitted or are + * queued for transmission. This is supposed to be called after + * the device driver has stopped DMA, all PASIDs have been + * unbound and the outstanding PRQs have been drained. + */ + pci_disable_pri(to_pci_dev(dev)); + info->pri_enabled = 0; - return -ENODEV; + /* + * With PRI disabled and outstanding PRQs drained, unregistering + * fault handler and removing device from iopf queue should never + * fail. + */ + WARN_ON(iommu_unregister_device_fault_handler(dev)); + WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev)); + + return 0; } static int @@ -4711,10 +4670,10 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) { switch (feat) { case IOMMU_DEV_FEAT_IOPF: - return 0; + return intel_iommu_disable_iopf(dev); case IOMMU_DEV_FEAT_SVA: - return intel_iommu_disable_sva(dev); + return 0; default: return -ENODEV; |