diff options
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r-- | drivers/iommu/intel/iommu.c | 122 |
1 files changed, 100 insertions, 22 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 59df7e42fd53..86ee06ac058b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -16,7 +16,6 @@ #include <linux/crash_dump.h> #include <linux/dma-direct.h> #include <linux/dmi.h> -#include <linux/intel-svm.h> #include <linux/memory.h> #include <linux/pci.h> #include <linux/pci-ats.h> @@ -30,6 +29,7 @@ #include "../iommu-sva.h" #include "pasid.h" #include "cap_audit.h" +#include "perfmon.h" #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE @@ -362,12 +362,12 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -void *alloc_pgtable_page(int node) +void *alloc_pgtable_page(int node, gfp_t gfp) { struct page *page; void *vaddr = NULL; - page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); + page = alloc_pages_node(node, gfp | __GFP_ZERO, 0); if (page) vaddr = page_address(page); return vaddr; @@ -612,7 +612,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, if (!alloc) return NULL; - context = alloc_pgtable_page(iommu->node); + context = alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!context) return NULL; @@ -908,7 +908,8 @@ pgtable_walk: #endif static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn, int *target_level) + unsigned long pfn, int *target_level, + gfp_t gfp) { struct dma_pte *parent, *pte; int level = agaw_to_level(domain->agaw); @@ -935,7 +936,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(domain->nid); + tmp_page = alloc_pgtable_page(domain->nid, gfp); if (!tmp_page) return NULL; @@ -1186,7 +1187,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - root = (struct root_entry *)alloc_pgtable_page(iommu->node); + root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!root) { pr_err("Allocating root entry for %s failed\n", iommu->name); @@ -2150,7 +2151,8 @@ static void switch_to_super_page(struct dmar_domain *domain, while (start_pfn <= end_pfn) { if (!pte) - pte = pfn_to_dma_pte(domain, start_pfn, &level); + pte = pfn_to_dma_pte(domain, start_pfn, &level, + GFP_ATOMIC); if (dma_pte_present(pte)) { dma_pte_free_pagetable(domain, start_pfn, @@ -2172,7 +2174,8 @@ static void switch_to_super_page(struct dmar_domain *domain, static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, int prot) + unsigned long phys_pfn, unsigned long nr_pages, int prot, + gfp_t gfp) { struct dma_pte *first_pte = NULL, *pte = NULL; unsigned int largepage_lvl = 0; @@ -2202,7 +2205,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, nr_pages); - pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); + pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl, + gfp); if (!pte) return -ENOMEM; first_pte = pte; @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, return __domain_mapping(domain, first_vpfn, first_vpfn, last_vpfn - first_vpfn + 1, - DMA_PTE_READ|DMA_PTE_WRITE); + DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL); } static int md_domain_init(struct dmar_domain *domain, int guest_width); @@ -2676,7 +2680,7 @@ static int copy_context_table(struct intel_iommu *iommu, if (!old_ce) goto out; - new_ce = alloc_pgtable_page(iommu->node); + new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL); if (!new_ce) goto out_unmap; @@ -4005,7 +4009,8 @@ int __init intel_iommu_init(void) * is likely to be much lower than the overhead of synchronizing * the virtual and physical IOMMU page-tables. */ - if (cap_caching_mode(iommu->cap)) { + if (cap_caching_mode(iommu->cap) && + !first_level_by_default(IOMMU_DOMAIN_DMA)) { pr_info_once("IOMMU batching disallowed due to virtualization\n"); iommu_set_dma_strict(); } @@ -4013,6 +4018,8 @@ int __init intel_iommu_init(void) intel_iommu_groups, "%s", iommu->name); iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); + + iommu_pmu_register(iommu); } up_read(&dmar_global_lock); @@ -4136,7 +4143,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->max_addr = 0; /* always allocate the top pgd */ - domain->pgd = alloc_pgtable_page(domain->nid); + domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); @@ -4298,7 +4305,7 @@ static int intel_iommu_map(struct iommu_domain *domain, the low bits of hpa would take us onto the next page */ size = aligned_nrpages(hpa, size); return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, - hpa >> VTD_PAGE_SHIFT, size, prot); + hpa >> VTD_PAGE_SHIFT, size, prot, gfp); } static int intel_iommu_map_pages(struct iommu_domain *domain, @@ -4333,7 +4340,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); + BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC)); if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -4346,7 +4354,12 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, if (dmar_domain->max_addr == iova + size) dmar_domain->max_addr = iova; - iommu_iotlb_gather_add_page(domain, gather, iova, size); + /* + * We do not use page-selective IOTLB invalidation in flush queue, + * so there is no need to track page and sync iotlb. + */ + if (!iommu_iotlb_gather_queued(gather)) + iommu_iotlb_gather_add_page(domain, gather, iova, size); return size; } @@ -4392,7 +4405,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC); if (pte && dma_pte_present(pte)) phys = dma_pte_addr(pte) + (iova & (BIT_MASK(level_to_offset_bits(level) + @@ -4642,8 +4656,12 @@ static int intel_iommu_enable_sva(struct device *dev) return -EINVAL; ret = iopf_queue_add_device(iommu->iopf_queue, dev); - if (!ret) - ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + if (ret) + return ret; + + ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + if (ret) + iopf_queue_remove_device(iommu->iopf_queue, dev); return ret; } @@ -4655,8 +4673,12 @@ static int intel_iommu_disable_sva(struct device *dev) int ret; ret = iommu_unregister_device_fault_handler(dev); - if (!ret) - ret = iopf_queue_remove_device(iommu->iopf_queue, dev); + if (ret) + return ret; + + ret = iopf_queue_remove_device(iommu->iopf_queue, dev); + if (ret) + iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); return ret; } @@ -5023,3 +5045,59 @@ void quirk_extra_dev_tlb_flush(struct device_domain_info *info, pasid, qdep, address, mask); } } + +#define ecmd_get_status_code(res) (((res) & 0xff) >> 1) + +/* + * Function to submit a command to the enhanced command interface. The + * valid enhanced command descriptions are defined in Table 47 of the + * VT-d spec. The VT-d hardware implementation may support some but not + * all commands, which can be determined by checking the Enhanced + * Command Capability Register. + * + * Return values: + * - 0: Command successful without any error; + * - Negative: software error value; + * - Nonzero positive: failure status code defined in Table 48. + */ +int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob) +{ + unsigned long flags; + u64 res; + int ret; + + if (!cap_ecmds(iommu->cap)) + return -ENODEV; + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + + res = dmar_readq(iommu->reg + DMAR_ECRSP_REG); + if (res & DMA_ECMD_ECRSP_IP) { + ret = -EBUSY; + goto err; + } + + /* + * Unconditionally write the operand B, because + * - There is no side effect if an ecmd doesn't require an + * operand B, but we set the register to some value. + * - It's not invoked in any critical path. The extra MMIO + * write doesn't bring any performance concerns. + */ + dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob); + dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT)); + + IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq, + !(res & DMA_ECMD_ECRSP_IP), res); + + if (res & DMA_ECMD_ECRSP_IP) { + ret = -ETIMEDOUT; + goto err; + } + + ret = ecmd_get_status_code(res); +err: + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); + + return ret; +} |