From 0f725561e168485eff7277d683405c05b192f537 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 7 Jun 2018 09:56:59 -0700 Subject: iommu/vt-d: Add definitions for PFSID When SRIOV VF device IOTLB is invalidated, we need to provide the PF source ID such that IOMMU hardware can gauge the depth of invalidation queue which is shared among VFs. This is needed when device invalidation throttle (DIT) capability is supported. This patch adds bit definitions for checking and tracking PFSID. Signed-off-by: Jacob Pan Cc: stable@vger.kernel.org Cc: "Ashok Raj" Cc: "Lu Baolu" Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/intel-iommu.h') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 1df940196ab2..3b1c37155572 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -114,6 +114,7 @@ * Extended Capability Register */ +#define ecap_dit(e) ((e >> 41) & 0x1) #define ecap_pasid(e) ((e >> 40) & 0x1) #define ecap_pss(e) ((e >> 35) & 0x1f) #define ecap_eafs(e) ((e >> 34) & 0x1) @@ -283,6 +284,7 @@ enum { #define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) #define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16) #define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) +#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) #define QI_DEV_IOTLB_SIZE 1 #define QI_DEV_IOTLB_MAX_INVS 32 @@ -307,6 +309,7 @@ enum { #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) #define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) #define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) +#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) #define QI_DEV_EIOTLB_MAX_INVS 32 #define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) -- cgit v1.2.3-70-g09d2 From 1c48db44924298ad0cb5a6386b88017539be8822 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 7 Jun 2018 09:57:00 -0700 Subject: iommu/vt-d: Fix dev iotlb pfsid use PFSID should be used in the invalidation descriptor for flushing device IOTLBs on SRIOV VFs. Signed-off-by: Jacob Pan Cc: stable@vger.kernel.org Cc: "Ashok Raj" Cc: "Lu Baolu" Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 6 +++--- drivers/iommu/intel-iommu.c | 17 ++++++++++++++++- include/linux/intel-iommu.h | 5 ++--- 3 files changed, 21 insertions(+), 7 deletions(-) (limited to 'include/linux/intel-iommu.h') diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 75456b5aa825..d9c748b6f9e4 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1339,8 +1339,8 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, qi_submit_sync(&desc, iommu); } -void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, - u64 addr, unsigned mask) +void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u16 qdep, u64 addr, unsigned mask) { struct qi_desc desc; @@ -1355,7 +1355,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, qdep = 0; desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | - QI_DIOTLB_TYPE; + QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); qi_submit_sync(&desc, iommu); } diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7f6194ef48f4..497ef94c5a8c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1475,6 +1475,20 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); + /* For IOMMU that supports device IOTLB throttling (DIT), we assign + * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge + * queue depth at PF level. If DIT is not set, PFSID will be treated as + * reserved, which should be set to 0. + */ + if (!ecap_dit(info->iommu->ecap)) + info->pfsid = 0; + else { + struct pci_dev *pf_pdev; + + /* pdev will be returned if device is not a vf */ + pf_pdev = pci_physfn(pdev); + info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn); + } #ifdef CONFIG_INTEL_IOMMU_SVM /* The PCIe spec, in its wisdom, declares that the behaviour of @@ -1540,7 +1554,8 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, sid = info->bus << 8 | info->devfn; qdep = info->ats_qdep; - qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, addr, mask); } spin_unlock_irqrestore(&device_domain_lock, flags); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 3b1c37155572..6692b40ca814 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -455,9 +455,8 @@ extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, u64 type); extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type); -extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, - u64 addr, unsigned mask); - +extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u16 qdep, u64 addr, unsigned mask); extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern int dmar_ir_support(void); -- cgit v1.2.3-70-g09d2 From 2db1581e1f432ac6b4efe152c57fdfb4de85c154 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sun, 8 Jul 2018 14:23:21 +0800 Subject: Revert "iommu/vt-d: Clean up pasid quirk for pre-production devices" This reverts commit ab96746aaa344fb720a198245a837e266fad3b62. The commit ab96746aaa34 ("iommu/vt-d: Clean up pasid quirk for pre-production devices") triggers ECS mode on some platforms which have broken ECS support. As the result, graphic device will be inoperable on boot. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107017 Cc: Ashok Raj Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 32 ++++++++++++++++++++++++++++++-- include/linux/intel-iommu.h | 1 + 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'include/linux/intel-iommu.h') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b344a883f116..115ff26e9ced 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -484,14 +484,37 @@ static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int intel_iommu_ecs = 1; +static int intel_iommu_pasid28; static int iommu_identity_mapping; #define IDENTMAP_ALL 1 #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap)) -#define pasid_enabled(iommu) (ecs_enabled(iommu) && ecap_pasid(iommu->ecap)) +/* Broadwell and Skylake have broken ECS support — normal so-called "second + * level" translation of DMA requests-without-PASID doesn't actually happen + * unless you also set the NESTE bit in an extended context-entry. Which of + * course means that SVM doesn't work because it's trying to do nested + * translation of the physical addresses it finds in the process page tables, + * through the IOVA->phys mapping found in the "second level" page tables. + * + * The VT-d specification was retroactively changed to change the definition + * of the capability bits and pretend that Broadwell/Skylake never happened... + * but unfortunately the wrong bit was changed. It's ECS which is broken, but + * for some reason it was the PASID capability bit which was redefined (from + * bit 28 on BDW/SKL to bit 40 in future). + * + * So our test for ECS needs to eschew those implementations which set the old + * PASID capabiity bit 28, since those are the ones on which ECS is broken. + * Unless we are working around the 'pasid28' limitations, that is, by putting + * the device into passthrough mode for normal DMA and thus masking the bug. + */ +#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ + (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap))) +/* PASID support is thus enabled if ECS is enabled and *either* of the old + * or new capability bits are set. */ +#define pasid_enabled(iommu) (ecs_enabled(iommu) && \ + (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap))) int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); @@ -554,6 +577,11 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable extended context table support\n"); intel_iommu_ecs = 0; + } else if (!strncmp(str, "pasid28", 7)) { + printk(KERN_INFO + "Intel-IOMMU: enable pre-production PASID support\n"); + intel_iommu_pasid28 = 1; + iommu_identity_mapping |= IDENTMAP_GFX; } else if (!strncmp(str, "tboot_noforce", 13)) { printk(KERN_INFO "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 1df940196ab2..ef169d67df92 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -121,6 +121,7 @@ #define ecap_srs(e) ((e >> 31) & 0x1) #define ecap_ers(e) ((e >> 30) & 0x1) #define ecap_prs(e) ((e >> 29) & 0x1) +#define ecap_broken_pasid(e) ((e >> 28) & 0x1) #define ecap_dis(e) ((e >> 27) & 0x1) #define ecap_nest(e) ((e >> 26) & 0x1) #define ecap_mts(e) ((e >> 25) & 0x1) -- cgit v1.2.3-70-g09d2 From 51261aac51a05c791ef880a100ac2ceed201ef72 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 14 Jul 2018 15:46:55 +0800 Subject: iommu/vt-d: Avoid using idr_for_each_entry() idr_for_each_entry() is used to iteratte over idr elements of a given type. It isn't suitable for the globle pasid idr since the pasid idr consumer could specify different types of pointers to bind with a pasid. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Liu Yi L Reviewed-by: Peter Xu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 14 ++++++++++---- include/linux/intel-iommu.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux/intel-iommu.h') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 2cc0aac93201..36cc1d1b8afc 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -298,6 +298,7 @@ static const struct mmu_notifier_ops intel_mmuops = { }; static DEFINE_MUTEX(pasid_mutex); +static LIST_HEAD(global_svm_list); int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) { @@ -329,13 +330,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ mutex_lock(&pasid_mutex); if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) { - int i; + struct intel_svm *t; - idr_for_each_entry(&iommu->pasid_idr, svm, i) { - if (svm->mm != mm || - (svm->flags & SVM_FLAG_PRIVATE_PASID)) + list_for_each_entry(t, &global_svm_list, list) { + if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID)) continue; + svm = t; if (svm->pasid >= pasid_max) { dev_warn(dev, "Limited PASID width. Cannot use existing PASID %d\n", @@ -404,6 +405,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ svm->mm = mm; svm->flags = flags; INIT_LIST_HEAD_RCU(&svm->devs); + INIT_LIST_HEAD(&svm->list); ret = -ENOMEM; if (mm) { ret = mmu_notifier_register(&svm->notifier, mm); @@ -430,6 +432,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ */ if (cap_caching_mode(iommu->cap)) intel_flush_pasid_dev(svm, sdev, svm->pasid); + + list_add_tail(&svm->list, &global_svm_list); } list_add_rcu(&sdev->list, &svm->devs); @@ -485,6 +489,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) if (svm->mm) mmu_notifier_unregister(&svm->notifier, svm->mm); + list_del(&svm->list); + /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. * If that is not obeyed, subtle errors will happen. diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 6692b40ca814..e1e193855581 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -487,6 +487,7 @@ struct intel_svm { int flags; int pasid; struct list_head devs; + struct list_head list; }; extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev); -- cgit v1.2.3-70-g09d2 From af39507305fb83a5d3c475c2851f4d59545d8a18 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 14 Jul 2018 15:46:56 +0800 Subject: iommu/vt-d: Apply global PASID in SVA This patch applies the global pasid name space in the shared virtual address (SVA) implementation. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Liu Yi L Reviewed-by: Peter Xu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 22 +++++++++++----------- include/linux/intel-iommu.h | 1 - 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include/linux/intel-iommu.h') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 36cc1d1b8afc..56e65d0b1871 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -26,6 +26,8 @@ #include #include +#include "intel-pasid.h" + #define PASID_ENTRY_P BIT_ULL(0) #define PASID_ENTRY_FLPM_5LP BIT_ULL(9) #define PASID_ENTRY_SRE BIT_ULL(11) @@ -85,8 +87,6 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) iommu->name); } - idr_init(&iommu->pasid_idr); - return 0; } @@ -102,7 +102,7 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu) free_pages((unsigned long)iommu->pasid_state_table, order); iommu->pasid_state_table = NULL; } - idr_destroy(&iommu->pasid_idr); + return 0; } @@ -392,9 +392,9 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ pasid_max = iommu->pasid_max; /* Do not use PASID 0 in caching mode (virtualised IOMMU) */ - ret = idr_alloc(&iommu->pasid_idr, svm, - !!cap_caching_mode(iommu->cap), - pasid_max - 1, GFP_KERNEL); + ret = intel_pasid_alloc_id(svm, + !!cap_caching_mode(iommu->cap), + pasid_max - 1, GFP_KERNEL); if (ret < 0) { kfree(svm); kfree(sdev); @@ -410,7 +410,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ if (mm) { ret = mmu_notifier_register(&svm->notifier, mm); if (ret) { - idr_remove(&svm->iommu->pasid_idr, svm->pasid); + intel_pasid_free_id(svm->pasid); kfree(svm); kfree(sdev); goto out; @@ -460,7 +460,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) if (!iommu || !iommu->pasid_table) goto out; - svm = idr_find(&iommu->pasid_idr, pasid); + svm = intel_pasid_lookup_id(pasid); if (!svm) goto out; @@ -485,7 +485,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) svm->iommu->pasid_table[svm->pasid].val = 0; wmb(); - idr_remove(&svm->iommu->pasid_idr, svm->pasid); + intel_pasid_free_id(svm->pasid); if (svm->mm) mmu_notifier_unregister(&svm->notifier, svm->mm); @@ -520,7 +520,7 @@ int intel_svm_is_pasid_valid(struct device *dev, int pasid) if (!iommu || !iommu->pasid_table) goto out; - svm = idr_find(&iommu->pasid_idr, pasid); + svm = intel_pasid_lookup_id(pasid); if (!svm) goto out; @@ -618,7 +618,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (!svm || svm->pasid != req->pasid) { rcu_read_lock(); - svm = idr_find(&iommu->pasid_idr, req->pasid); + svm = intel_pasid_lookup_id(req->pasid); /* It *can't* go away, because the driver is not permitted * to unbind the mm while any page faults are outstanding. * So we only need RCU to protect the internal idr code. */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index e1e193855581..2ff15195f73d 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -420,7 +420,6 @@ struct intel_iommu { struct pasid_state_entry *pasid_state_table; struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ - struct idr pasid_idr; u32 pasid_max; #endif struct q_inval *qi; /* Queued invalidation info */ -- cgit v1.2.3-70-g09d2 From 9ddbfb42138d84bb326023616c40a3dc30ea2837 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 14 Jul 2018 15:46:57 +0800 Subject: iommu/vt-d: Move device_domain_info to header This allows the per device iommu data and some helpers to be used in other files. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Liu Yi L Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 63 +++------------------------------------------ include/linux/intel-iommu.h | 61 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 59 deletions(-) (limited to 'include/linux/intel-iommu.h') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index fa15ed036ddc..bb7f2b2a37ee 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -381,61 +381,6 @@ static int hw_pass_through = 1; for (idx = 0; idx < g_num_of_iommus; idx++) \ if (domain->iommu_refcnt[idx]) -struct dmar_domain { - int nid; /* node id */ - - unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED]; - /* Refcount of devices per iommu */ - - - u16 iommu_did[DMAR_UNITS_SUPPORTED]; - /* Domain ids per IOMMU. Use u16 since - * domain ids are 16 bit wide according - * to VT-d spec, section 9.3 */ - - bool has_iotlb_device; - struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ - - struct dma_pte *pgd; /* virtual address */ - int gaw; /* max guest address width */ - - /* adjusted guest address width, 0 is level 2 30-bit */ - int agaw; - - int flags; /* flags to find out type of domain */ - - int iommu_coherency;/* indicate coherency of iommu access */ - int iommu_snooping; /* indicate snooping control feature*/ - int iommu_count; /* reference count of iommu */ - int iommu_superpage;/* Level of superpages supported: - 0 == 4KiB (no superpages), 1 == 2MiB, - 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ - u64 max_addr; /* maximum mapped address */ - - struct iommu_domain domain; /* generic domain data structure for - iommu core */ -}; - -/* PCI domain-device relationship */ -struct device_domain_info { - struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ - u8 bus; /* PCI bus number */ - u8 devfn; /* PCI devfn number */ - u16 pfsid; /* SRIOV physical function source ID */ - u8 pasid_supported:3; - u8 pasid_enabled:1; - u8 pri_supported:1; - u8 pri_enabled:1; - u8 ats_supported:1; - u8 ats_enabled:1; - u8 ats_qdep; - struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ - struct intel_iommu *iommu; /* IOMMU used by this device */ - struct dmar_domain *domain; /* pointer to domain */ -}; - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -604,7 +549,7 @@ static void set_iommu_domain(struct intel_iommu *iommu, u16 did, domains[did & 0xff] = domain; } -static inline void *alloc_pgtable_page(int node) +void *alloc_pgtable_page(int node) { struct page *page; void *vaddr = NULL; @@ -615,7 +560,7 @@ static inline void *alloc_pgtable_page(int node) return vaddr; } -static inline void free_pgtable_page(void *vaddr) +void free_pgtable_page(void *vaddr) { free_page((unsigned long)vaddr); } @@ -698,7 +643,7 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) } /* This functionin only returns single iommu in a domain */ -static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) +struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) { int iommu_id; @@ -3528,7 +3473,7 @@ static unsigned long intel_alloc_iova(struct device *dev, return iova_pfn; } -static struct dmar_domain *get_valid_domain_for_dev(struct device *dev) +struct dmar_domain *get_valid_domain_for_dev(struct device *dev) { struct dmar_domain *domain, *tmp; struct dmar_rmrr_unit *rmrr; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2ff15195f73d..2e1fbde020ca 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -387,6 +388,42 @@ struct pasid_entry; struct pasid_state_entry; struct page_req_dsc; +struct dmar_domain { + int nid; /* node id */ + + unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED]; + /* Refcount of devices per iommu */ + + + u16 iommu_did[DMAR_UNITS_SUPPORTED]; + /* Domain ids per IOMMU. Use u16 since + * domain ids are 16 bit wide according + * to VT-d spec, section 9.3 */ + + bool has_iotlb_device; + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + + int flags; /* flags to find out type of domain */ + + int iommu_coherency;/* indicate coherency of iommu access */ + int iommu_snooping; /* indicate snooping control feature*/ + int iommu_count; /* reference count of iommu */ + int iommu_superpage;/* Level of superpages supported: + 0 == 4KiB (no superpages), 1 == 2MiB, + 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ + u64 max_addr; /* maximum mapped address */ + + struct iommu_domain domain; /* generic domain data structure for + iommu core */ +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 reg_phys; /* physical address of hw register set */ @@ -435,6 +472,25 @@ struct intel_iommu { u32 flags; /* Software defined flags */ }; +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u8 bus; /* PCI bus number */ + u8 devfn; /* PCI devfn number */ + u16 pfsid; /* SRIOV physical function source ID */ + u8 pasid_supported:3; + u8 pasid_enabled:1; + u8 pri_supported:1; + u8 pri_enabled:1; + u8 ats_supported:1; + u8 ats_enabled:1; + u8 ats_qdep; + struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ + struct intel_iommu *iommu; /* IOMMU used by this device */ + struct dmar_domain *domain; /* pointer to domain */ +}; + static inline void __iommu_flush_cache( struct intel_iommu *iommu, void *addr, int size) { @@ -460,6 +516,11 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern int dmar_ir_support(void); +struct dmar_domain *get_valid_domain_for_dev(struct device *dev); +void *alloc_pgtable_page(int node); +void free_pgtable_page(void *vaddr); +struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); + #ifdef CONFIG_INTEL_IOMMU_SVM extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu); extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu); -- cgit v1.2.3-70-g09d2 From 85319dcc8955f8f31828dc8bafff29f6aa011d93 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 14 Jul 2018 15:46:58 +0800 Subject: iommu/vt-d: Add for_each_device_domain() helper This adds a helper named for_each_device_domain() to iterate over the elements in device_domain_list and invoke a callback against each element. This allows to search the device_domain list in other source files. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Liu Yi L Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 21 +++++++++++++++++++++ include/linux/intel-iommu.h | 2 ++ 2 files changed, 23 insertions(+) (limited to 'include/linux/intel-iommu.h') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index bb7f2b2a37ee..f020da439ace 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -448,6 +448,27 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); static DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); +/* + * Iterate over elements in device_domain_list and call the specified + * callback @fn against each element. This helper should only be used + * in the context where the device_domain_lock has already been holden. + */ +int for_each_device_domain(int (*fn)(struct device_domain_info *info, + void *data), void *data) +{ + int ret = 0; + struct device_domain_info *info; + + assert_spin_locked(&device_domain_lock); + list_for_each_entry(info, &device_domain_list, global) { + ret = fn(info, data); + if (ret) + return ret; + } + + return 0; +} + const struct iommu_ops intel_iommu_ops; static bool translation_pre_enabled(struct intel_iommu *iommu) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2e1fbde020ca..4fd4c6fee93e 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -520,6 +520,8 @@ struct dmar_domain *get_valid_domain_for_dev(struct device *dev); void *alloc_pgtable_page(int node); void free_pgtable_page(void *vaddr); struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); +int for_each_device_domain(int (*fn)(struct device_domain_info *info, + void *data), void *data); #ifdef CONFIG_INTEL_IOMMU_SVM extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu); -- cgit v1.2.3-70-g09d2 From cc580e41260dbf1a46269235f1f2b572137d9d03 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 14 Jul 2018 15:46:59 +0800 Subject: iommu/vt-d: Per PCI device pasid table interfaces This patch adds the interfaces for per PCI device pasid table management. Currently we allocate one pasid table for all PCI devices under the scope of an IOMMU. It's insecure in some cases where multiple devices under one single IOMMU unit support PASID features. With per PCI device pasid table, we can achieve finer protection and isolation granularity. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Suggested-by: Ashok Raj Signed-off-by: Lu Baolu Reviewed-by: Liu Yi L Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 1 + drivers/iommu/intel-pasid.c | 179 ++++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/intel-pasid.h | 18 +++++ drivers/iommu/intel-svm.c | 4 - include/linux/intel-iommu.h | 2 + 5 files changed, 200 insertions(+), 4 deletions(-) (limited to 'include/linux/intel-iommu.h') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f020da439ace..211925a75fb4 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2451,6 +2451,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->dev = dev; info->domain = domain; info->iommu = iommu; + info->pasid_table = NULL; if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index e918fe01ce7f..fe95c9bd4d33 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include "intel-pasid.h" @@ -58,3 +60,180 @@ void *intel_pasid_lookup_id(int pasid) return p; } + +/* + * Per device pasid table management: + */ +static inline void +device_attach_pasid_table(struct device_domain_info *info, + struct pasid_table *pasid_table) +{ + info->pasid_table = pasid_table; + list_add(&info->table, &pasid_table->dev); +} + +static inline void +device_detach_pasid_table(struct device_domain_info *info, + struct pasid_table *pasid_table) +{ + info->pasid_table = NULL; + list_del(&info->table); +} + +struct pasid_table_opaque { + struct pasid_table **pasid_table; + int segment; + int bus; + int devfn; +}; + +static int search_pasid_table(struct device_domain_info *info, void *opaque) +{ + struct pasid_table_opaque *data = opaque; + + if (info->iommu->segment == data->segment && + info->bus == data->bus && + info->devfn == data->devfn && + info->pasid_table) { + *data->pasid_table = info->pasid_table; + return 1; + } + + return 0; +} + +static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque) +{ + struct pasid_table_opaque *data = opaque; + + data->segment = pci_domain_nr(pdev->bus); + data->bus = PCI_BUS_NUM(alias); + data->devfn = alias & 0xff; + + return for_each_device_domain(&search_pasid_table, data); +} + +/* + * Allocate a pasid table for @dev. It should be called in a + * single-thread context. + */ +int intel_pasid_alloc_table(struct device *dev) +{ + struct device_domain_info *info; + struct pasid_table *pasid_table; + struct pasid_table_opaque data; + struct page *pages; + size_t size, count; + int ret, order; + + info = dev->archdata.iommu; + if (WARN_ON(!info || !dev_is_pci(dev) || + !info->pasid_supported || info->pasid_table)) + return -EINVAL; + + /* DMA alias device already has a pasid table, use it: */ + data.pasid_table = &pasid_table; + ret = pci_for_each_dma_alias(to_pci_dev(dev), + &get_alias_pasid_table, &data); + if (ret) + goto attach_out; + + pasid_table = kzalloc(sizeof(*pasid_table), GFP_ATOMIC); + if (!pasid_table) + return -ENOMEM; + INIT_LIST_HEAD(&pasid_table->dev); + + size = sizeof(struct pasid_entry); + count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id); + order = get_order(size * count); + pages = alloc_pages_node(info->iommu->node, + GFP_ATOMIC | __GFP_ZERO, + order); + if (!pages) + return -ENOMEM; + + pasid_table->table = page_address(pages); + pasid_table->order = order; + pasid_table->max_pasid = count; + +attach_out: + device_attach_pasid_table(info, pasid_table); + + return 0; +} + +void intel_pasid_free_table(struct device *dev) +{ + struct device_domain_info *info; + struct pasid_table *pasid_table; + + info = dev->archdata.iommu; + if (!info || !dev_is_pci(dev) || + !info->pasid_supported || !info->pasid_table) + return; + + pasid_table = info->pasid_table; + device_detach_pasid_table(info, pasid_table); + + if (!list_empty(&pasid_table->dev)) + return; + + free_pages((unsigned long)pasid_table->table, pasid_table->order); + kfree(pasid_table); +} + +struct pasid_table *intel_pasid_get_table(struct device *dev) +{ + struct device_domain_info *info; + + info = dev->archdata.iommu; + if (!info) + return NULL; + + return info->pasid_table; +} + +int intel_pasid_get_dev_max_id(struct device *dev) +{ + struct device_domain_info *info; + + info = dev->archdata.iommu; + if (!info || !info->pasid_table) + return 0; + + return info->pasid_table->max_pasid; +} + +struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) +{ + struct pasid_table *pasid_table; + struct pasid_entry *entries; + + pasid_table = intel_pasid_get_table(dev); + if (WARN_ON(!pasid_table || pasid < 0 || + pasid >= intel_pasid_get_dev_max_id(dev))) + return NULL; + + entries = pasid_table->table; + + return &entries[pasid]; +} + +/* + * Interfaces for PASID table entry manipulation: + */ +static inline void pasid_clear_entry(struct pasid_entry *pe) +{ + WRITE_ONCE(pe->val, 0); +} + +void intel_pasid_clear_entry(struct device *dev, int pasid) +{ + struct pasid_entry *pe; + + pe = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pe)) + return; + + pasid_clear_entry(pe); +} diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index b1c5296290e5..1c05ed6fc5a5 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -13,9 +13,27 @@ #define PASID_MIN 0x1 #define PASID_MAX 0x100000 +struct pasid_entry { + u64 val; +}; + +/* The representative of a PASID table */ +struct pasid_table { + void *table; /* pasid table pointer */ + int order; /* page order of pasid table */ + int max_pasid; /* max pasid */ + struct list_head dev; /* device list */ +}; + extern u32 intel_pasid_max_id; int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp); void intel_pasid_free_id(int pasid); void *intel_pasid_lookup_id(int pasid); +int intel_pasid_alloc_table(struct device *dev); +void intel_pasid_free_table(struct device *dev); +struct pasid_table *intel_pasid_get_table(struct device *dev); +int intel_pasid_get_dev_max_id(struct device *dev); +struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid); +void intel_pasid_clear_entry(struct device *dev, int pasid); #endif /* __INTEL_PASID_H */ diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 56e65d0b1871..9b5dc7262677 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -34,10 +34,6 @@ static irqreturn_t prq_event_thread(int irq, void *d); -struct pasid_entry { - u64 val; -}; - struct pasid_state_entry { u64 val; }; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4fd4c6fee93e..e7901d402337 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -476,6 +476,7 @@ struct intel_iommu { struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ + struct list_head table; /* link to pasid table */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ u16 pfsid; /* SRIOV physical function source ID */ @@ -489,6 +490,7 @@ struct device_domain_info { struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ + struct pasid_table *pasid_table; /* pasid table */ }; static inline void __iommu_flush_cache( -- cgit v1.2.3-70-g09d2 From d9737953d85131436b09668b5e8d3389c37c1f28 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 14 Jul 2018 15:47:02 +0800 Subject: iommu/vt-d: Remove the obsolete per iommu pasid tables The obsolete per iommu pasid tables are no longer used. Hence, clean up them. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Liu Yi L Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 6 +++--- drivers/iommu/intel-svm.c | 17 ++--------------- include/linux/intel-iommu.h | 5 ++--- 3 files changed, 7 insertions(+), 21 deletions(-) (limited to 'include/linux/intel-iommu.h') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f1a3c5e5aff0..7ed0221dccf8 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1756,7 +1756,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu) if (pasid_enabled(iommu)) { if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); - intel_svm_free_pasid_tables(iommu); + intel_svm_exit(iommu); } #endif } @@ -3337,7 +3337,7 @@ static int __init init_dmars(void) hw_pass_through = 0; #ifdef CONFIG_INTEL_IOMMU_SVM if (pasid_enabled(iommu)) - intel_svm_alloc_pasid_tables(iommu); + intel_svm_init(iommu); #endif } @@ -4300,7 +4300,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) #ifdef CONFIG_INTEL_IOMMU_SVM if (pasid_enabled(iommu)) - intel_svm_alloc_pasid_tables(iommu); + intel_svm_init(iommu); #endif if (dmaru->ignored) { diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index a253cdeabd61..eb308363e541 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -38,7 +38,7 @@ struct pasid_state_entry { u64 val; }; -int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) +int intel_svm_init(struct intel_iommu *iommu) { struct page *pages; int order; @@ -63,15 +63,6 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) iommu->pasid_max = 0x20000; order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!pages) { - pr_warn("IOMMU: %s: Failed to allocate PASID table\n", - iommu->name); - return -ENOMEM; - } - iommu->pasid_table = page_address(pages); - pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order); - if (ecap_dis(iommu->ecap)) { /* Just making it explicit... */ BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry)); @@ -86,14 +77,10 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) return 0; } -int intel_svm_free_pasid_tables(struct intel_iommu *iommu) +int intel_svm_exit(struct intel_iommu *iommu) { int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); - if (iommu->pasid_table) { - free_pages((unsigned long)iommu->pasid_table, order); - iommu->pasid_table = NULL; - } if (iommu->pasid_state_table) { free_pages((unsigned long)iommu->pasid_state_table, order); iommu->pasid_state_table = NULL; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index e7901d402337..3c43882d3b77 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -453,7 +453,6 @@ struct intel_iommu { * devices away to userspace processes (e.g. for DPDK) and don't * want to trust that userspace will use *only* the PASID it was * told to. But while it's all driver-arbitrated, we're fine. */ - struct pasid_entry *pasid_table; struct pasid_state_entry *pasid_state_table; struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ @@ -526,8 +525,8 @@ int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data); #ifdef CONFIG_INTEL_IOMMU_SVM -extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu); -extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu); +int intel_svm_init(struct intel_iommu *iommu); +int intel_svm_exit(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); -- cgit v1.2.3-70-g09d2