summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/acpi/arm64/iort.c13
-rw-r--r--drivers/iommu/Kconfig9
-rw-r--r--drivers/iommu/amd/amd_iommu.h3
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h23
-rw-r--r--drivers/iommu/amd/init.c45
-rw-r--r--drivers/iommu/amd/io_pgtable.c11
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c3
-rw-r--r--drivers/iommu/amd/iommu.c364
-rw-r--r--drivers/iommu/amd/pasid.c6
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/Makefile1
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c31
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c5
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c93
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h45
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c16
-rw-r--r--drivers/iommu/intel/Kconfig2
-rw-r--r--drivers/iommu/intel/Makefile2
-rw-r--r--drivers/iommu/intel/dmar.c15
-rw-r--r--drivers/iommu/intel/iommu.c576
-rw-r--r--drivers/iommu/intel/iommu.h56
-rw-r--r--drivers/iommu/intel/irq_remapping.c4
-rw-r--r--drivers/iommu/intel/nested.c53
-rw-r--r--drivers/iommu/intel/pasid.c409
-rw-r--r--drivers/iommu/intel/pasid.h22
-rw-r--r--drivers/iommu/intel/prq.c396
-rw-r--r--drivers/iommu/intel/svm.c433
-rw-r--r--drivers/iommu/iommu.c13
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c3
-rw-r--r--drivers/iommu/iommufd/vfio_compat.c7
-rw-r--r--drivers/vfio/vfio_iommu_type1.c12
-rw-r--r--include/acpi/actbl2.h3
-rw-r--r--include/linux/dmar.h1
-rw-r--r--include/linux/iommu.h10
-rw-r--r--include/uapi/linux/iommufd.h35
-rw-r--r--include/uapi/linux/vfio.h2
35 files changed, 1470 insertions, 1252 deletions
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 4c745a26226b..1f7e4c691d9e 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1218,6 +1218,17 @@ static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node)
return pci_rc->ats_attribute & ACPI_IORT_ATS_SUPPORTED;
}
+static bool iort_pci_rc_supports_canwbs(struct acpi_iort_node *node)
+{
+ struct acpi_iort_memory_access *memory_access;
+ struct acpi_iort_root_complex *pci_rc;
+
+ pci_rc = (struct acpi_iort_root_complex *)node->node_data;
+ memory_access =
+ (struct acpi_iort_memory_access *)&pci_rc->memory_properties;
+ return memory_access->memory_flags & ACPI_IORT_MF_CANWBS;
+}
+
static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node,
u32 streamid)
{
@@ -1335,6 +1346,8 @@ int iort_iommu_configure_id(struct device *dev, const u32 *id_in)
fwspec = dev_iommu_fwspec_get(dev);
if (fwspec && iort_pci_rc_supports_ats(node))
fwspec->flags |= IOMMU_FWSPEC_PCI_RC_ATS;
+ if (fwspec && iort_pci_rc_supports_canwbs(node))
+ fwspec->flags |= IOMMU_FWSPEC_PCI_RC_CANWBS;
} else {
node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
iort_match_node_callback, dev);
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index ade4e8cf2a3e..47c46e4b739e 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -416,6 +416,15 @@ config ARM_SMMU_V3_SVA
Say Y here if your system supports SVA extensions such as PCIe PASID
and PRI.
+config ARM_SMMU_V3_IOMMUFD
+ bool "Enable IOMMUFD features for ARM SMMUv3 (EXPERIMENTAL)"
+ depends on IOMMUFD
+ help
+ Support for IOMMUFD features intended to support virtual machines
+ with accelerated virtual IOMMUs.
+
+ Say Y here if you are doing development and testing on this feature.
+
config ARM_SMMU_V3_KUNIT_TEST
tristate "KUnit tests for arm-smmu-v3 driver" if !KUNIT_ALL_TESTS
depends on KUNIT
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 38509e1019e9..1bef5d55b2f9 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -53,7 +53,8 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct mm_struct *mm);
void amd_iommu_domain_free(struct iommu_domain *dom);
int iommu_sva_set_dev_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid);
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old);
void amd_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain);
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 601fb4ee6900..fdb0357e0bb9 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -565,6 +565,12 @@ struct pdom_dev_data {
struct list_head list;
};
+/* Keeps track of the IOMMUs attached to protection domain */
+struct pdom_iommu_info {
+ struct amd_iommu *iommu; /* IOMMUs attach to protection domain */
+ u32 refcnt; /* Count of attached dev/pasid per domain/IOMMU */
+};
+
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
@@ -578,8 +584,7 @@ struct protection_domain {
u16 id; /* the domain id written to the device table */
enum protection_domain_mode pd_mode; /* Track page table type */
bool dirty_tracking; /* dirty tracking is enabled in the domain */
- unsigned dev_cnt; /* devices assigned to this domain */
- unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
+ struct xarray iommu_array; /* per-IOMMU reference count */
struct mmu_notifier mn; /* mmu notifier for the SVA domain */
struct list_head dev_data_list; /* List of pdom_dev_data */
@@ -831,7 +836,7 @@ struct devid_map {
*/
struct iommu_dev_data {
/*Protect against attach/detach races */
- spinlock_t lock;
+ struct mutex mutex;
struct list_head list; /* For domain->dev_list */
struct llist_node dev_data_list; /* For global dev_data_list */
@@ -873,12 +878,6 @@ extern struct list_head amd_iommu_pci_seg_list;
extern struct list_head amd_iommu_list;
/*
- * Array with pointers to each IOMMU struct
- * The indices are referenced in the protection domains
- */
-extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
-
-/*
* Structure defining one entry in the device table
*/
struct dev_table_entry {
@@ -912,14 +911,14 @@ struct unity_map_entry {
/* size of the dma_ops aperture as power of 2 */
extern unsigned amd_iommu_aperture_order;
-/* allocation bitmap for domain ids */
-extern unsigned long *amd_iommu_pd_alloc_bitmap;
-
extern bool amd_iommu_force_isolation;
/* Max levels of glxval supported */
extern int amd_iommu_max_glx_val;
+/* IDA to track protection domain IDs */
+extern struct ida pdom_ids;
+
/* Global EFR and EFR2 registers */
extern u64 amd_iommu_efr;
extern u64 amd_iommu_efr2;
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 45fdba48d54d..0e0a531042ac 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -177,9 +177,6 @@ LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
system */
-/* Array to assign indices to IOMMUs*/
-struct amd_iommu *amd_iommus[MAX_IOMMUS];
-
/* Number of IOMMUs present in the system */
static int amd_iommus_present;
@@ -194,12 +191,6 @@ bool amd_iommu_force_isolation __read_mostly;
unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES;
-/*
- * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
- * to know which ones are already in use.
- */
-unsigned long *amd_iommu_pd_alloc_bitmap;
-
enum iommu_init_state {
IOMMU_START_STATE,
IOMMU_IVRS_DETECTED,
@@ -1082,7 +1073,12 @@ static bool __copy_device_table(struct amd_iommu *iommu)
if (dte_v && dom_id) {
pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
- __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+ /* Reserve the Domain IDs used by previous kernel */
+ if (ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC) != dom_id) {
+ pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
+ memunmap(old_devtb);
+ return false;
+ }
/* If gcr3 table existed, mask it out */
if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
@@ -1744,9 +1740,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
return -ENOSYS;
}
- /* Index is fine - add IOMMU to the array */
- amd_iommus[iommu->index] = iommu;
-
/*
* Copy data from ACPI table entry to the iommu struct
*/
@@ -2877,11 +2870,6 @@ static void enable_iommus_vapic(void)
#endif
}
-static void enable_iommus(void)
-{
- early_enable_iommus();
-}
-
static void disable_iommus(void)
{
struct amd_iommu *iommu;
@@ -2908,7 +2896,8 @@ static void amd_iommu_resume(void)
iommu_apply_resume_quirks(iommu);
/* re-load the hardware */
- enable_iommus();
+ for_each_iommu(iommu)
+ early_enable_iommu(iommu);
amd_iommu_enable_interrupts();
}
@@ -2989,9 +2978,7 @@ static bool __init check_ioapic_information(void)
static void __init free_dma_resources(void)
{
- iommu_free_pages(amd_iommu_pd_alloc_bitmap,
- get_order(MAX_DOMAIN_ID / 8));
- amd_iommu_pd_alloc_bitmap = NULL;
+ ida_destroy(&pdom_ids);
free_unity_maps();
}
@@ -3059,20 +3046,6 @@ static int __init early_amd_iommu_init(void)
amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
- /* Device table - directly used by all IOMMUs */
- ret = -ENOMEM;
-
- amd_iommu_pd_alloc_bitmap = iommu_alloc_pages(GFP_KERNEL,
- get_order(MAX_DOMAIN_ID / 8));
- if (amd_iommu_pd_alloc_bitmap == NULL)
- goto out;
-
- /*
- * never allocate domain 0 because its used as the non-allocated and
- * error value placeholder
- */
- __set_bit(0, amd_iommu_pd_alloc_bitmap);
-
/*
* now the data structures are allocated and basically initialized
* start the real acpi table scan
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 804b788f3f16..f3399087859f 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -118,6 +118,7 @@ static void free_sub_pt(u64 *root, int mode, struct list_head *freelist)
*/
static bool increase_address_space(struct amd_io_pgtable *pgtable,
unsigned long address,
+ unsigned int page_size_level,
gfp_t gfp)
{
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
@@ -133,7 +134,8 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable,
spin_lock_irqsave(&domain->lock, flags);
- if (address <= PM_LEVEL_SIZE(pgtable->mode))
+ if (address <= PM_LEVEL_SIZE(pgtable->mode) &&
+ pgtable->mode - 1 >= page_size_level)
goto out;
ret = false;
@@ -163,18 +165,21 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
gfp_t gfp,
bool *updated)
{
+ unsigned long last_addr = address + (page_size - 1);
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
int level, end_lvl;
u64 *pte, *page;
BUG_ON(!is_power_of_2(page_size));
- while (address > PM_LEVEL_SIZE(pgtable->mode)) {
+ while (last_addr > PM_LEVEL_SIZE(pgtable->mode) ||
+ pgtable->mode - 1 < PAGE_SIZE_LEVEL(page_size)) {
/*
* Return an error if there is no memory to update the
* page-table.
*/
- if (!increase_address_space(pgtable, address, gfp))
+ if (!increase_address_space(pgtable, last_addr,
+ PAGE_SIZE_LEVEL(page_size), gfp))
return NULL;
}
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index 25b9042fa453..c616de2c5926 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -268,8 +268,11 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
out:
if (updated) {
struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
+ unsigned long flags;
+ spin_lock_irqsave(&pdom->lock, flags);
amd_iommu_domain_flush_pages(pdom, o_iova, size);
+ spin_unlock_irqrestore(&pdom->lock, flags);
}
if (mapped)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 477aaf76b7ad..5ce8e6504ba7 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -18,6 +18,7 @@
#include <linux/scatterlist.h>
#include <linux/dma-map-ops.h>
#include <linux/dma-direct.h>
+#include <linux/idr.h>
#include <linux/iommu-helper.h>
#include <linux/delay.h>
#include <linux/amd-iommu.h>
@@ -52,8 +53,6 @@
#define HT_RANGE_START (0xfd00000000ULL)
#define HT_RANGE_END (0xffffffffffULL)
-static DEFINE_SPINLOCK(pd_bitmap_lock);
-
LIST_HEAD(ioapic_map);
LIST_HEAD(hpet_map);
LIST_HEAD(acpihid_map);
@@ -70,9 +69,13 @@ struct iommu_cmd {
u32 data[4];
};
-struct kmem_cache *amd_iommu_irq_cache;
+/*
+ * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
+ * to know which ones are already in use.
+ */
+DEFINE_IDA(pdom_ids);
-static void detach_device(struct device *dev);
+struct kmem_cache *amd_iommu_irq_cache;
static int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev);
@@ -205,7 +208,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid)
if (!dev_data)
return NULL;
- spin_lock_init(&dev_data->lock);
+ mutex_init(&dev_data->mutex);
dev_data->devid = devid;
ratelimit_default_init(&dev_data->rs);
@@ -558,22 +561,6 @@ static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev)
setup_aliases(iommu, dev);
}
-static void amd_iommu_uninit_device(struct device *dev)
-{
- struct iommu_dev_data *dev_data;
-
- dev_data = dev_iommu_priv_get(dev);
- if (!dev_data)
- return;
-
- if (dev_data->domain)
- detach_device(dev);
-
- /*
- * We keep dev_data around for unplugged devices and reuse it when the
- * device is re-plugged - not doing so would introduce a ton of races.
- */
-}
/****************************************************************************
*
@@ -1233,7 +1220,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
if (!iommu->need_sync)
return 0;
- data = atomic64_add_return(1, &iommu->cmd_sem_val);
+ data = atomic64_inc_return(&iommu->cmd_sem_val);
build_completion_wait(&cmd, iommu, data);
raw_spin_lock_irqsave(&iommu->lock, flags);
@@ -1252,18 +1239,17 @@ out_unlock:
static void domain_flush_complete(struct protection_domain *domain)
{
- int i;
+ struct pdom_iommu_info *pdom_iommu_info;
+ unsigned long i;
- for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
- if (domain && !domain->dev_iommu[i])
- continue;
+ lockdep_assert_held(&domain->lock);
- /*
- * Devices of this domain are behind this IOMMU
- * We need to wait for completion of all commands.
- */
- iommu_completion_wait(amd_iommus[i]);
- }
+ /*
+ * Devices of this domain are behind this IOMMU
+ * We need to wait for completion of all commands.
+ */
+ xa_for_each(&domain->iommu_array, i, pdom_iommu_info)
+ iommu_completion_wait(pdom_iommu_info->iommu);
}
static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
@@ -1445,21 +1431,22 @@ static int domain_flush_pages_v2(struct protection_domain *pdom,
static int domain_flush_pages_v1(struct protection_domain *pdom,
u64 address, size_t size)
{
+ struct pdom_iommu_info *pdom_iommu_info;
struct iommu_cmd cmd;
- int ret = 0, i;
+ int ret = 0;
+ unsigned long i;
+
+ lockdep_assert_held(&pdom->lock);
build_inv_iommu_pages(&cmd, address, size,
pdom->id, IOMMU_NO_PASID, false);
- for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
- if (!pdom->dev_iommu[i])
- continue;
-
+ xa_for_each(&pdom->iommu_array, i, pdom_iommu_info) {
/*
* Devices of this domain are behind this IOMMU
* We need a TLB flush
*/
- ret |= iommu_queue_command(amd_iommus[i], &cmd);
+ ret |= iommu_queue_command(pdom_iommu_info->iommu, &cmd);
}
return ret;
@@ -1498,6 +1485,8 @@ static void __domain_flush_pages(struct protection_domain *domain,
void amd_iommu_domain_flush_pages(struct protection_domain *domain,
u64 address, size_t size)
{
+ lockdep_assert_held(&domain->lock);
+
if (likely(!amd_iommu_np_cache)) {
__domain_flush_pages(domain, address, size);
@@ -1643,31 +1632,14 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag)
*
****************************************************************************/
-static u16 domain_id_alloc(void)
+static int pdom_id_alloc(void)
{
- unsigned long flags;
- int id;
-
- spin_lock_irqsave(&pd_bitmap_lock, flags);
- id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
- BUG_ON(id == 0);
- if (id > 0 && id < MAX_DOMAIN_ID)
- __set_bit(id, amd_iommu_pd_alloc_bitmap);
- else
- id = 0;
- spin_unlock_irqrestore(&pd_bitmap_lock, flags);
-
- return id;
+ return ida_alloc_range(&pdom_ids, 1, MAX_DOMAIN_ID - 1, GFP_ATOMIC);
}
-static void domain_id_free(int id)
+static void pdom_id_free(int id)
{
- unsigned long flags;
-
- spin_lock_irqsave(&pd_bitmap_lock, flags);
- if (id > 0 && id < MAX_DOMAIN_ID)
- __clear_bit(id, amd_iommu_pd_alloc_bitmap);
- spin_unlock_irqrestore(&pd_bitmap_lock, flags);
+ ida_free(&pdom_ids, id);
}
static void free_gcr3_tbl_level1(u64 *tbl)
@@ -1712,7 +1684,7 @@ static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info)
gcr3_info->glx = 0;
/* Free per device domain ID */
- domain_id_free(gcr3_info->domid);
+ pdom_id_free(gcr3_info->domid);
iommu_free_page(gcr3_info->gcr3_tbl);
gcr3_info->gcr3_tbl = NULL;
@@ -1739,6 +1711,7 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info,
{
int levels = get_gcr3_levels(pasids);
int nid = iommu ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
+ int domid;
if (levels > amd_iommu_max_glx_val)
return -EINVAL;
@@ -1747,11 +1720,14 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info,
return -EBUSY;
/* Allocate per device domain ID */
- gcr3_info->domid = domain_id_alloc();
+ domid = pdom_id_alloc();
+ if (domid <= 0)
+ return -ENOSPC;
+ gcr3_info->domid = domid;
gcr3_info->gcr3_tbl = iommu_alloc_page_node(nid, GFP_ATOMIC);
if (gcr3_info->gcr3_tbl == NULL) {
- domain_id_free(gcr3_info->domid);
+ pdom_id_free(domid);
return -ENOMEM;
}
@@ -2022,57 +1998,69 @@ static void destroy_gcr3_table(struct iommu_dev_data *dev_data,
free_gcr3_table(gcr3_info);
}
-static int do_attach(struct iommu_dev_data *dev_data,
- struct protection_domain *domain)
+static int pdom_attach_iommu(struct amd_iommu *iommu,
+ struct protection_domain *pdom)
{
- struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
- struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg;
+ struct pdom_iommu_info *pdom_iommu_info, *curr;
+ struct io_pgtable_cfg *cfg = &pdom->iop.pgtbl.cfg;
+ unsigned long flags;
int ret = 0;
- /* Update data structures */
- dev_data->domain = domain;
- list_add(&dev_data->list, &domain->dev_list);
+ spin_lock_irqsave(&pdom->lock, flags);
- /* Update NUMA Node ID */
- if (cfg->amd.nid == NUMA_NO_NODE)
- cfg->amd.nid = dev_to_node(dev_data->dev);
+ pdom_iommu_info = xa_load(&pdom->iommu_array, iommu->index);
+ if (pdom_iommu_info) {
+ pdom_iommu_info->refcnt++;
+ goto out_unlock;
+ }
- /* Do reference counting */
- domain->dev_iommu[iommu->index] += 1;
- domain->dev_cnt += 1;
+ pdom_iommu_info = kzalloc(sizeof(*pdom_iommu_info), GFP_ATOMIC);
+ if (!pdom_iommu_info) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
- /* Setup GCR3 table */
- if (pdom_is_sva_capable(domain)) {
- ret = init_gcr3_table(dev_data, domain);
- if (ret)
- return ret;
+ pdom_iommu_info->iommu = iommu;
+ pdom_iommu_info->refcnt = 1;
+
+ curr = xa_cmpxchg(&pdom->iommu_array, iommu->index,
+ NULL, pdom_iommu_info, GFP_ATOMIC);
+ if (curr) {
+ kfree(pdom_iommu_info);
+ ret = -ENOSPC;
+ goto out_unlock;
}
+ /* Update NUMA Node ID */
+ if (cfg->amd.nid == NUMA_NO_NODE)
+ cfg->amd.nid = dev_to_node(&iommu->dev->dev);
+
+out_unlock:
+ spin_unlock_irqrestore(&pdom->lock, flags);
return ret;
}
-static void do_detach(struct iommu_dev_data *dev_data)
+static void pdom_detach_iommu(struct amd_iommu *iommu,
+ struct protection_domain *pdom)
{
- struct protection_domain *domain = dev_data->domain;
- struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
-
- /* Clear DTE and flush the entry */
- dev_update_dte(dev_data, false);
+ struct pdom_iommu_info *pdom_iommu_info;
+ unsigned long flags;
- /* Flush IOTLB and wait for the flushes to finish */
- amd_iommu_domain_flush_all(domain);
+ spin_lock_irqsave(&pdom->lock, flags);
- /* Clear GCR3 table */
- if (pdom_is_sva_capable(domain))
- destroy_gcr3_table(dev_data, domain);
+ pdom_iommu_info = xa_load(&pdom->iommu_array, iommu->index);
+ if (!pdom_iommu_info) {
+ spin_unlock_irqrestore(&pdom->lock, flags);
+ return;
+ }
- /* Update data structures */
- dev_data->domain = NULL;
- list_del(&dev_data->list);
+ pdom_iommu_info->refcnt--;
+ if (pdom_iommu_info->refcnt == 0) {
+ xa_erase(&pdom->iommu_array, iommu->index);
+ kfree(pdom_iommu_info);
+ }
- /* decrease reference counters - needs to happen after the flushes */
- domain->dev_iommu[iommu->index] -= 1;
- domain->dev_cnt -= 1;
+ spin_unlock_irqrestore(&pdom->lock, flags);
}
/*
@@ -2082,27 +2070,56 @@ static void do_detach(struct iommu_dev_data *dev_data)
static int attach_device(struct device *dev,
struct protection_domain *domain)
{
- struct iommu_dev_data *dev_data;
- unsigned long flags;
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+ struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
+ struct pci_dev *pdev;
int ret = 0;
- spin_lock_irqsave(&domain->lock, flags);
-
- dev_data = dev_iommu_priv_get(dev);
-
- spin_lock(&dev_data->lock);
+ mutex_lock(&dev_data->mutex);
if (dev_data->domain != NULL) {
ret = -EBUSY;
goto out;
}
- ret = do_attach(dev_data, domain);
+ /* Do reference counting */
+ ret = pdom_attach_iommu(iommu, domain);
+ if (ret)
+ goto out;
-out:
- spin_unlock(&dev_data->lock);
+ /* Setup GCR3 table */
+ if (pdom_is_sva_capable(domain)) {
+ ret = init_gcr3_table(dev_data, domain);
+ if (ret) {
+ pdom_detach_iommu(iommu, domain);
+ goto out;
+ }
+ }
- spin_unlock_irqrestore(&domain->lock, flags);
+ pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL;
+ if (pdev && pdom_is_sva_capable(domain)) {
+ pdev_enable_caps(pdev);
+
+ /*
+ * Device can continue to function even if IOPF
+ * enablement failed. Hence in error path just
+ * disable device PRI support.
+ */
+ if (amd_iommu_iopf_add_device(iommu, dev_data))
+ pdev_disable_cap_pri(pdev);
+ } else if (pdev) {
+ pdev_enable_cap_ats(pdev);
+ }
+
+ /* Update data structures */
+ dev_data->domain = domain;
+ list_add(&dev_data->list, &domain->dev_list);
+
+ /* Update device table */
+ dev_update_dte(dev_data, true);
+
+out:
+ mutex_unlock(&dev_data->mutex);
return ret;
}
@@ -2113,14 +2130,11 @@ out:
static void detach_device(struct device *dev)
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
- struct protection_domain *domain = dev_data->domain;
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
+ struct protection_domain *domain = dev_data->domain;
unsigned long flags;
- bool ppr = dev_data->ppr;
-
- spin_lock_irqsave(&domain->lock, flags);
- spin_lock(&dev_data->lock);
+ mutex_lock(&dev_data->mutex);
/*
* First check if the device is still attached. It might already
@@ -2131,27 +2145,36 @@ static void detach_device(struct device *dev)
if (WARN_ON(!dev_data->domain))
goto out;
- if (ppr) {
+ /* Remove IOPF handler */
+ if (dev_data->ppr) {
iopf_queue_flush_dev(dev);
-
- /* Updated here so that it gets reflected in DTE */
- dev_data->ppr = false;
+ amd_iommu_iopf_remove_device(iommu, dev_data);
}
- do_detach(dev_data);
+ if (dev_is_pci(dev))
+ pdev_disable_caps(to_pci_dev(dev));
-out:
- spin_unlock(&dev_data->lock);
+ /* Clear DTE and flush the entry */
+ dev_update_dte(dev_data, false);
+ /* Flush IOTLB and wait for the flushes to finish */
+ spin_lock_irqsave(&domain->lock, flags);
+ amd_iommu_domain_flush_all(domain);
spin_unlock_irqrestore(&domain->lock, flags);
- /* Remove IOPF handler */
- if (ppr)
- amd_iommu_iopf_remove_device(iommu, dev_data);
+ /* Clear GCR3 table */
+ if (pdom_is_sva_capable(domain))
+ destroy_gcr3_table(dev_data, domain);
- if (dev_is_pci(dev))
- pdev_disable_caps(to_pci_dev(dev));
+ /* Update data structures */
+ dev_data->domain = NULL;
+ list_del(&dev_data->list);
+ /* decrease reference counters - needs to happen after the flushes */
+ pdom_detach_iommu(iommu, domain);
+
+out:
+ mutex_unlock(&dev_data->mutex);
}
static struct iommu_device *amd_iommu_probe_device(struct device *dev)
@@ -2208,17 +2231,14 @@ out_err:
static void amd_iommu_release_device(struct device *dev)
{
- struct amd_iommu *iommu;
-
- if (!check_device(dev))
- return;
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
- iommu = rlookup_amd_iommu(dev);
- if (!iommu)
- return;
+ WARN_ON(dev_data->domain);
- amd_iommu_uninit_device(dev);
- iommu_completion_wait(iommu);
+ /*
+ * We keep dev_data around for unplugged devices and reuse it when the
+ * device is re-plugged - not doing so would introduce a ton of races.
+ */
}
static struct iommu_group *amd_iommu_device_group(struct device *dev)
@@ -2239,30 +2259,12 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
*
*****************************************************************************/
-static void cleanup_domain(struct protection_domain *domain)
-{
- struct iommu_dev_data *entry;
-
- lockdep_assert_held(&domain->lock);
-
- if (!domain->dev_cnt)
- return;
-
- while (!list_empty(&domain->dev_list)) {
- entry = list_first_entry(&domain->dev_list,
- struct iommu_dev_data, list);
- BUG_ON(!entry->domain);
- do_detach(entry);
- }
- WARN_ON(domain->dev_cnt != 0);
-}
-
void protection_domain_free(struct protection_domain *domain)
{
WARN_ON(!list_empty(&domain->dev_list));
if (domain->domain.type & __IOMMU_DOMAIN_PAGING)
free_io_pgtable_ops(&domain->iop.pgtbl.ops);
- domain_id_free(domain->id);
+ pdom_id_free(domain->id);
kfree(domain);
}
@@ -2271,22 +2273,25 @@ static void protection_domain_init(struct protection_domain *domain, int nid)
spin_lock_init(&domain->lock);
INIT_LIST_HEAD(&domain->dev_list);
INIT_LIST_HEAD(&domain->dev_data_list);
+ xa_init(&domain->iommu_array);
domain->iop.pgtbl.cfg.amd.nid = nid;
}
struct protection_domain *protection_domain_alloc(unsigned int type, int nid)
{
struct protection_domain *domain;
+ int domid;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
- domain->id = domain_id_alloc();
- if (!domain->id) {
+ domid = pdom_id_alloc();
+ if (domid <= 0) {
kfree(domain);
return NULL;
}
+ domain->id = domid;
protection_domain_init(domain, nid);
@@ -2361,7 +2366,7 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
ret = pdom_setup_pgtable(domain, type, pgtable);
if (ret) {
- domain_id_free(domain->id);
+ pdom_id_free(domain->id);
kfree(domain);
return ERR_PTR(ret);
}
@@ -2442,16 +2447,7 @@ amd_iommu_domain_alloc_user(struct device *dev, u32 flags,
void amd_iommu_domain_free(struct iommu_domain *dom)
{
- struct protection_domain *domain;
- unsigned long flags;
-
- domain = to_pdomain(dom);
-
- spin_lock_irqsave(&domain->lock, flags);
-
- cleanup_domain(domain);
-
- spin_unlock_irqrestore(&domain->lock, flags);
+ struct protection_domain *domain = to_pdomain(dom);
protection_domain_free(domain);
}
@@ -2465,9 +2461,9 @@ static int blocked_domain_attach_device(struct iommu_domain *domain,
detach_device(dev);
/* Clear DTE and flush the entry */
- spin_lock(&dev_data->lock);
+ mutex_lock(&dev_data->mutex);
dev_update_dte(dev_data, false);
- spin_unlock(&dev_data->lock);
+ mutex_unlock(&dev_data->mutex);
return 0;
}
@@ -2493,18 +2489,25 @@ void amd_iommu_init_identity_domain(void)
domain->ops = &identity_domain_ops;
domain->owner = &amd_iommu_ops;
- identity_domain.id = domain_id_alloc();
+ identity_domain.id = pdom_id_alloc();
protection_domain_init(&identity_domain, NUMA_NO_NODE);
}
+/* Same as blocked domain except it supports only ops->attach_dev() */
+static struct iommu_domain release_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
+ .ops = &(const struct iommu_domain_ops) {
+ .attach_dev = blocked_domain_attach_device,
+ }
+};
+
static int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev)
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
struct protection_domain *domain = to_pdomain(dom);
struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
- struct pci_dev *pdev;
int ret;
/*
@@ -2537,24 +2540,6 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
}
#endif
- pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL;
- if (pdev && pdom_is_sva_capable(domain)) {
- pdev_enable_caps(pdev);
-
- /*
- * Device can continue to function even if IOPF
- * enablement failed. Hence in error path just
- * disable device PRI support.
- */
- if (amd_iommu_iopf_add_device(iommu, dev_data))
- pdev_disable_cap_pri(pdev);
- } else if (pdev) {
- pdev_enable_cap_ats(pdev);
- }
-
- /* Update device table */
- dev_update_dte(dev_data, true);
-
return ret;
}
@@ -2896,6 +2881,7 @@ static int amd_iommu_dev_disable_feature(struct device *dev,
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.blocked_domain = &blocked_domain,
+ .release_domain = &release_domain,
.identity_domain = &identity_domain.domain,
.domain_alloc = amd_iommu_domain_alloc,
.domain_alloc_user = amd_iommu_domain_alloc_user,
@@ -2945,7 +2931,7 @@ static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
return;
build_inv_irt(&cmd, devid);
- data = atomic64_add_return(1, &iommu->cmd_sem_val);
+ data = atomic64_inc_return(&iommu->cmd_sem_val);
build_completion_wait(&cmd2, iommu, data);
raw_spin_lock_irqsave(&iommu->lock, flags);
diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c
index 0657b9373be5..8c73a30c2800 100644
--- a/drivers/iommu/amd/pasid.c
+++ b/drivers/iommu/amd/pasid.c
@@ -100,7 +100,8 @@ static const struct mmu_notifier_ops sva_mn = {
};
int iommu_sva_set_dev_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
{
struct pdom_dev_data *pdom_dev_data;
struct protection_domain *sva_pdom = to_pdomain(domain);
@@ -108,6 +109,9 @@ int iommu_sva_set_dev_pasid(struct iommu_domain *domain,
unsigned long flags;
int ret = -EINVAL;
+ if (old)
+ return -EOPNOTSUPP;
+
/* PASID zero is used for requests from the I/O device without PASID */
if (!is_pasid_valid(dev_data, pasid))
return ret;
diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile
index dc98c88b48c8..493a659cc66b 100644
--- a/drivers/iommu/arm/arm-smmu-v3/Makefile
+++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
arm_smmu_v3-y := arm-smmu-v3.o
+arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_IOMMUFD) += arm-smmu-v3-iommufd.o
arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
arm_smmu_v3-$(CONFIG_TEGRA241_CMDQV) += tegra241-cmdqv.o
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
new file mode 100644
index 000000000000..3d2671031c9b
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include <uapi/linux/iommufd.h>
+
+#include "arm-smmu-v3.h"
+
+void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type)
+{
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ struct iommu_hw_info_arm_smmuv3 *info;
+ u32 __iomem *base_idr;
+ unsigned int i;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+
+ base_idr = master->smmu->base + ARM_SMMU_IDR0;
+ for (i = 0; i <= 5; i++)
+ info->idr[i] = readl_relaxed(base_idr + i);
+ info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR);
+ info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR);
+
+ *length = sizeof(*info);
+ *type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3;
+
+ return info;
+}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index a7c36654dee5..1d3e71569775 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -332,7 +332,8 @@ void arm_smmu_sva_notifier_synchronize(void)
}
static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t id)
+ struct device *dev, ioasid_t id,
+ struct iommu_domain *old)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
@@ -348,7 +349,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
* get reassigned
*/
arm_smmu_make_sva_cd(&target, master, domain->mm, smmu_domain->cd.asid);
- ret = arm_smmu_set_pasid(master, smmu_domain, id, &target);
+ ret = arm_smmu_set_pasid(master, smmu_domain, id, &target, old);
mmput(domain->mm);
return ret;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 826db8894fb7..7ee3cbbe3744 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1549,7 +1549,6 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
}
}
-VISIBLE_IF_KUNIT
void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
{
memset(target, 0, sizeof(*target));
@@ -1632,7 +1631,6 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
-VISIBLE_IF_KUNIT
void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
struct arm_smmu_master *master,
struct arm_smmu_domain *smmu_domain,
@@ -2293,6 +2291,8 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
case IOMMU_CAP_CACHE_COHERENCY:
/* Assume that a coherent TCU implies coherent TBUs */
return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
+ case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
+ return arm_smmu_master_canwbs(master);
case IOMMU_CAP_NOEXEC:
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
@@ -2303,6 +2303,26 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
}
}
+static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_master_domain *master_domain;
+ unsigned long flags;
+ bool ret = true;
+
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master_domain, &smmu_domain->devices,
+ devices_elm) {
+ if (!arm_smmu_master_canwbs(master_domain->master)) {
+ ret = false;
+ break;
+ }
+ }
+ smmu_domain->enforce_cache_coherency = ret;
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+ return ret;
+}
+
struct arm_smmu_domain *arm_smmu_domain_alloc(void)
{
struct arm_smmu_domain *smmu_domain;
@@ -2483,8 +2503,8 @@ arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
}
}
-static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
- const struct arm_smmu_ste *target)
+void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
+ const struct arm_smmu_ste *target)
{
int i, j;
struct arm_smmu_device *smmu = master->smmu;
@@ -2649,16 +2669,6 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
}
-struct arm_smmu_attach_state {
- /* Inputs */
- struct iommu_domain *old_domain;
- struct arm_smmu_master *master;
- bool cd_needs_ats;
- ioasid_t ssid;
- /* Resulting state */
- bool ats_enabled;
-};
-
/*
* Start the sequence to attach a domain to a master. The sequence contains three
* steps:
@@ -2679,8 +2689,8 @@ struct arm_smmu_attach_state {
* new_domain can be a non-paging domain. In this case ATS will not be enabled,
* and invalidations won't be tracked.
*/
-static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
- struct iommu_domain *new_domain)
+int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
+ struct iommu_domain *new_domain)
{
struct arm_smmu_master *master = state->master;
struct arm_smmu_master_domain *master_domain;
@@ -2731,6 +2741,14 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
* one of them.
*/
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ if (smmu_domain->enforce_cache_coherency &&
+ !arm_smmu_master_canwbs(master)) {
+ spin_unlock_irqrestore(&smmu_domain->devices_lock,
+ flags);
+ kfree(master_domain);
+ return -EINVAL;
+ }
+
if (state->ats_enabled)
atomic_inc(&smmu_domain->nr_ats_masters);
list_add(&master_domain->devices_elm, &smmu_domain->devices);
@@ -2754,7 +2772,7 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
* completes synchronizing the PCI device's ATC and finishes manipulating the
* smmu_domain->devices list.
*/
-static void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
+void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
{
struct arm_smmu_master *master = state->master;
@@ -2856,7 +2874,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
}
static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t id)
+ struct device *dev, ioasid_t id,
+ struct iommu_domain *old)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
@@ -2882,7 +2901,7 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
*/
arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
return arm_smmu_set_pasid(master, to_smmu_domain(domain), id,
- &target_cd);
+ &target_cd, old);
}
static void arm_smmu_update_ste(struct arm_smmu_master *master,
@@ -2912,16 +2931,13 @@ static void arm_smmu_update_ste(struct arm_smmu_master *master,
int arm_smmu_set_pasid(struct arm_smmu_master *master,
struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
- struct arm_smmu_cd *cd)
+ struct arm_smmu_cd *cd, struct iommu_domain *old)
{
struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);
struct arm_smmu_attach_state state = {
.master = master,
- /*
- * For now the core code prevents calling this when a domain is
- * already attached, no need to set old_domain.
- */
.ssid = pasid,
+ .old_domain = old,
};
struct arm_smmu_cd *cdptr;
int ret;
@@ -3085,7 +3101,8 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
- IOMMU_HWPT_ALLOC_PASID;
+ IOMMU_HWPT_ALLOC_PASID |
+ IOMMU_HWPT_ALLOC_NEST_PARENT;
struct arm_smmu_domain *smmu_domain;
int ret;
@@ -3101,6 +3118,14 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
if (IS_ERR(smmu_domain))
return ERR_CAST(smmu_domain);
+ if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) {
+ if (!(master->smmu->features & ARM_SMMU_FEAT_NESTING)) {
+ ret = -EOPNOTSUPP;
+ goto err_free;
+ }
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+ }
+
smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags);
@@ -3382,21 +3407,6 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
return group;
}
-static int arm_smmu_enable_nesting(struct iommu_domain *domain)
-{
- struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- int ret = 0;
-
- mutex_lock(&smmu_domain->init_mutex);
- if (smmu_domain->smmu)
- ret = -EPERM;
- else
- smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
- mutex_unlock(&smmu_domain->init_mutex);
-
- return ret;
-}
-
static int arm_smmu_of_xlate(struct device *dev,
const struct of_phandle_args *args)
{
@@ -3495,6 +3505,7 @@ static struct iommu_ops arm_smmu_ops = {
.identity_domain = &arm_smmu_identity_domain,
.blocked_domain = &arm_smmu_blocked_domain,
.capable = arm_smmu_capable,
+ .hw_info = arm_smmu_hw_info,
.domain_alloc_paging = arm_smmu_domain_alloc_paging,
.domain_alloc_sva = arm_smmu_sva_domain_alloc,
.domain_alloc_user = arm_smmu_domain_alloc_user,
@@ -3512,13 +3523,13 @@ static struct iommu_ops arm_smmu_ops = {
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = arm_smmu_attach_dev,
+ .enforce_cache_coherency = arm_smmu_enforce_cache_coherency,
.set_dev_pasid = arm_smmu_s1_set_dev_pasid,
.map_pages = arm_smmu_map_pages,
.unmap_pages = arm_smmu_unmap_pages,
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
- .enable_nesting = arm_smmu_enable_nesting,
.free = arm_smmu_domain_free_paging,
}
};
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 1e9952ca989f..1e96d4af03f8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -81,6 +81,8 @@ struct arm_smmu_device;
#define IIDR_REVISION GENMASK(15, 12)
#define IIDR_IMPLEMENTER GENMASK(11, 0)
+#define ARM_SMMU_AIDR 0x1C
+
#define ARM_SMMU_CR0 0x20
#define CR0_ATSCHK (1 << 4)
#define CR0_CMDQEN (1 << 3)
@@ -811,6 +813,7 @@ struct arm_smmu_domain {
/* List of struct arm_smmu_master_domain */
struct list_head devices;
spinlock_t devices_lock;
+ bool enforce_cache_coherency : 1;
struct mmu_notifier mmu_notifier;
};
@@ -827,21 +830,22 @@ struct arm_smmu_entry_writer_ops {
void (*sync)(struct arm_smmu_entry_writer *writer);
};
+void arm_smmu_make_abort_ste(struct arm_smmu_ste *target);
+void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
+ struct arm_smmu_master *master,
+ struct arm_smmu_domain *smmu_domain,
+ bool ats_enabled);
+
#if IS_ENABLED(CONFIG_KUNIT)
void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits);
void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *cur,
const __le64 *target);
void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits);
-void arm_smmu_make_abort_ste(struct arm_smmu_ste *target);
void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
struct arm_smmu_ste *target);
void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
struct arm_smmu_master *master, bool ats_enabled,
unsigned int s1dss);
-void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
- struct arm_smmu_master *master,
- struct arm_smmu_domain *smmu_domain,
- bool ats_enabled);
void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
struct arm_smmu_master *master, struct mm_struct *mm,
u16 asid);
@@ -875,7 +879,7 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
int arm_smmu_set_pasid(struct arm_smmu_master *master,
struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
- struct arm_smmu_cd *cd);
+ struct arm_smmu_cd *cd, struct iommu_domain *old);
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
@@ -893,6 +897,28 @@ int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq);
+static inline bool arm_smmu_master_canwbs(struct arm_smmu_master *master)
+{
+ return dev_iommu_fwspec_get(master->dev)->flags &
+ IOMMU_FWSPEC_PCI_RC_CANWBS;
+}
+
+struct arm_smmu_attach_state {
+ /* Inputs */
+ struct iommu_domain *old_domain;
+ struct arm_smmu_master *master;
+ bool cd_needs_ats;
+ ioasid_t ssid;
+ /* Resulting state */
+ bool ats_enabled;
+};
+
+int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
+ struct iommu_domain *new_domain);
+void arm_smmu_attach_commit(struct arm_smmu_attach_state *state);
+void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
+ const struct arm_smmu_ste *target);
+
#ifdef CONFIG_ARM_SMMU_V3_SVA
bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
@@ -949,4 +975,11 @@ tegra241_cmdqv_probe(struct arm_smmu_device *smmu)
return ERR_PTR(-ENODEV);
}
#endif /* CONFIG_TEGRA241_CMDQV */
+
+#if IS_ENABLED(CONFIG_ARM_SMMU_V3_IOMMUFD)
+void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type);
+#else
+#define arm_smmu_hw_info NULL
+#endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */
+
#endif /* _ARM_SMMU_V3_H */
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 14618772a3d6..ade4684c14c9 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1569,21 +1569,6 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
return group;
}
-static int arm_smmu_enable_nesting(struct iommu_domain *domain)
-{
- struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- int ret = 0;
-
- mutex_lock(&smmu_domain->init_mutex);
- if (smmu_domain->smmu)
- ret = -EPERM;
- else
- smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
- mutex_unlock(&smmu_domain->init_mutex);
-
- return ret;
-}
-
static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
unsigned long quirks)
{
@@ -1667,7 +1652,6 @@ static struct iommu_ops arm_smmu_ops = {
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
- .enable_nesting = arm_smmu_enable_nesting,
.set_pgtable_quirks = arm_smmu_set_pgtable_quirks,
.free = arm_smmu_domain_free,
}
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 88fd32a9323c..f2f538c70650 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -14,6 +14,7 @@ config INTEL_IOMMU
depends on PCI_MSI && ACPI && X86
select IOMMU_API
select IOMMU_IOVA
+ select IOMMU_IOPF
select IOMMUFD_DRIVER if IOMMUFD
select NEED_DMA_MAP_STATE
select DMAR_TABLE
@@ -50,7 +51,6 @@ config INTEL_IOMMU_SVM
depends on X86_64
select MMU_NOTIFIER
select IOMMU_SVA
- select IOMMU_IOPF
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index c8beb0281559..d3bb0798092d 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DMAR_TABLE) += dmar.o
-obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o
+obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o prq.o
obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index eaf862e8dea1..9f424acf474e 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1060,7 +1060,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
err = iommu->seq_id;
goto error;
}
- sprintf(iommu->name, "dmar%d", iommu->seq_id);
+ snprintf(iommu->name, sizeof(iommu->name), "dmar%d", iommu->seq_id);
err = map_iommu(iommu, drhd);
if (err) {
@@ -1895,19 +1895,6 @@ void dmar_msi_write(int irq, struct msi_msg *msg)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-void dmar_msi_read(int irq, struct msi_msg *msg)
-{
- struct intel_iommu *iommu = irq_get_handler_data(irq);
- int reg = dmar_msi_reg(iommu, irq);
- unsigned long flag;
-
- raw_spin_lock_irqsave(&iommu->register_lock, flag);
- msg->data = readl(iommu->reg + reg + 4);
- msg->address_lo = readl(iommu->reg + reg + 8);
- msg->address_hi = readl(iommu->reg + reg + 12);
- raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
u8 fault_reason, u32 pasid, u16 source_id,
unsigned long long addr)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index e860bc9439a2..527f6f89d8a1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -352,89 +352,6 @@ static bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
}
-static void domain_update_iommu_coherency(struct dmar_domain *domain)
-{
- struct iommu_domain_info *info;
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- bool found = false;
- unsigned long i;
-
- domain->iommu_coherency = true;
- xa_for_each(&domain->iommu_array, i, info) {
- found = true;
- if (!iommu_paging_structure_coherency(info->iommu)) {
- domain->iommu_coherency = false;
- break;
- }
- }
- if (found)
- return;
-
- /* No hardware attached; use lowest common denominator */
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (!iommu_paging_structure_coherency(iommu)) {
- domain->iommu_coherency = false;
- break;
- }
- }
- rcu_read_unlock();
-}
-
-static int domain_update_iommu_superpage(struct dmar_domain *domain,
- struct intel_iommu *skip)
-{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- int mask = 0x3;
-
- if (!intel_iommu_superpage)
- return 0;
-
- /* set iommu_superpage to the smallest common denominator */
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (iommu != skip) {
- if (domain && domain->use_first_level) {
- if (!cap_fl1gp_support(iommu->cap))
- mask = 0x1;
- } else {
- mask &= cap_super_page_val(iommu->cap);
- }
-
- if (!mask)
- break;
- }
- }
- rcu_read_unlock();
-
- return fls(mask);
-}
-
-static int domain_update_device_node(struct dmar_domain *domain)
-{
- struct device_domain_info *info;
- int nid = NUMA_NO_NODE;
- unsigned long flags;
-
- spin_lock_irqsave(&domain->lock, flags);
- list_for_each_entry(info, &domain->devices, link) {
- /*
- * There could possibly be multiple device numa nodes as devices
- * within the same domain may sit behind different IOMMUs. There
- * isn't perfect answer in such situation, so we select first
- * come first served policy.
- */
- nid = dev_to_node(info->dev);
- if (nid != NUMA_NO_NODE)
- break;
- }
- spin_unlock_irqrestore(&domain->lock, flags);
-
- return nid;
-}
-
/* Return the super pagesize bitmap if supported. */
static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
{
@@ -452,34 +369,6 @@ static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
return bitmap;
}
-/* Some capabilities may be different across iommus */
-void domain_update_iommu_cap(struct dmar_domain *domain)
-{
- domain_update_iommu_coherency(domain);
- domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
-
- /*
- * If RHSA is missing, we should default to the device numa domain
- * as fall back.
- */
- if (domain->nid == NUMA_NO_NODE)
- domain->nid = domain_update_device_node(domain);
-
- /*
- * First-level translation restricts the input-address to a
- * canonical address (i.e., address bits 63:N have the same
- * value as address bit [N-1], where N is 48-bits with 4-level
- * paging and 57-bits with 5-level paging). Hence, skip bit
- * [N-1].
- */
- if (domain->use_first_level)
- domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
- else
- domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
-
- domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
-}
-
struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
u8 devfn, int alloc)
{
@@ -707,14 +596,15 @@ static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
while (1) {
offset = pfn_level_offset(pfn, level);
pte = &parent[offset];
- if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) {
- pr_info("PTE not present at level %d\n", level);
- break;
- }
pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
- if (level == 1)
+ if (!dma_pte_present(pte)) {
+ pr_info("page table not present at level %d\n", level - 1);
+ break;
+ }
+
+ if (level == 1 || dma_pte_superpage(pte))
break;
parent = phys_to_virt(dma_pte_addr(pte));
@@ -737,11 +627,11 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
/* root entry dump */
- rt_entry = &iommu->root_entry[bus];
- if (!rt_entry) {
- pr_info("root table entry is not present\n");
+ if (!iommu->root_entry) {
+ pr_info("root table is not present\n");
return;
}
+ rt_entry = &iommu->root_entry[bus];
if (sm_supported(iommu))
pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
@@ -752,7 +642,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
/* context entry dump */
ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
if (!ctx_entry) {
- pr_info("context table entry is not present\n");
+ pr_info("context table is not present\n");
return;
}
@@ -761,17 +651,23 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
/* legacy mode does not require PASID entries */
if (!sm_supported(iommu)) {
+ if (!context_present(ctx_entry)) {
+ pr_info("legacy mode page table is not present\n");
+ return;
+ }
level = agaw_to_level(ctx_entry->hi & 7);
pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
goto pgtable_walk;
}
- /* get the pointer to pasid directory entry */
- dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
- if (!dir) {
- pr_info("pasid directory entry is not present\n");
+ if (!context_present(ctx_entry)) {
+ pr_info("pasid directory table is not present\n");
return;
}
+
+ /* get the pointer to pasid directory entry */
+ dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
+
/* For request-without-pasid, get the pasid from context entry */
if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
pasid = IOMMU_NO_PASID;
@@ -783,7 +679,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
/* get the pointer to the pasid table entry */
entries = get_pasid_table_from_pde(pde);
if (!entries) {
- pr_info("pasid table entry is not present\n");
+ pr_info("pasid table is not present\n");
return;
}
index = pasid & PASID_PTE_MASK;
@@ -791,6 +687,11 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
for (i = 0; i < ARRAY_SIZE(pte->val); i++)
pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
+ if (!pasid_pte_is_present(pte)) {
+ pr_info("scalable mode page table is not present\n");
+ return;
+ }
+
if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
@@ -1428,51 +1329,25 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
/* free context mapping */
free_context_table(iommu);
-#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_supported(iommu)) {
- if (ecap_prs(iommu->ecap))
- intel_svm_finish_prq(iommu);
- }
-#endif
+ if (ecap_prs(iommu->ecap))
+ intel_iommu_finish_prq(iommu);
}
/*
* Check and return whether first level is used by default for
* DMA translation.
*/
-static bool first_level_by_default(unsigned int type)
+static bool first_level_by_default(struct intel_iommu *iommu)
{
/* Only SL is available in legacy mode */
- if (!scalable_mode_support())
+ if (!sm_supported(iommu))
return false;
/* Only level (either FL or SL) is available, just use it */
- if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
- return intel_cap_flts_sanity();
-
- /* Both levels are available, decide it based on domain type */
- return type != IOMMU_DOMAIN_UNMANAGED;
-}
+ if (ecap_flts(iommu->ecap) ^ ecap_slts(iommu->ecap))
+ return ecap_flts(iommu->ecap);
-static struct dmar_domain *alloc_domain(unsigned int type)
-{
- struct dmar_domain *domain;
-
- domain = kzalloc(sizeof(*domain), GFP_KERNEL);
- if (!domain)
- return NULL;
-
- domain->nid = NUMA_NO_NODE;
- if (first_level_by_default(type))
- domain->use_first_level = true;
- INIT_LIST_HEAD(&domain->devices);
- INIT_LIST_HEAD(&domain->dev_pasids);
- INIT_LIST_HEAD(&domain->cache_tags);
- spin_lock_init(&domain->lock);
- spin_lock_init(&domain->cache_lock);
- xa_init(&domain->iommu_array);
-
- return domain;
+ return true;
}
int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
@@ -1514,7 +1389,6 @@ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
ret = xa_err(curr) ? : -EBUSY;
goto err_clear;
}
- domain_update_iommu_cap(domain);
spin_unlock(&iommu->lock);
return 0;
@@ -1540,26 +1414,11 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
clear_bit(info->did, iommu->domain_ids);
xa_erase(&domain->iommu_array, iommu->seq_id);
domain->nid = NUMA_NO_NODE;
- domain_update_iommu_cap(domain);
kfree(info);
}
spin_unlock(&iommu->lock);
}
-static int guestwidth_to_adjustwidth(int gaw)
-{
- int agaw;
- int r = (gaw - 12) % 9;
-
- if (r == 0)
- agaw = gaw;
- else
- agaw = gaw + 9 - r;
- if (agaw > 64)
- agaw = 64;
- return agaw;
-}
-
static void domain_exit(struct dmar_domain *domain)
{
if (domain->pgd) {
@@ -1601,7 +1460,7 @@ static void copied_context_tear_down(struct intel_iommu *iommu,
if (did_old < cap_ndoms(iommu->cap)) {
iommu->flush.flush_context(iommu, did_old,
- (((u16)bus) << 8) | devfn,
+ PCI_DEVID(bus, devfn),
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
@@ -1622,7 +1481,7 @@ static void context_present_cache_flush(struct intel_iommu *iommu, u16 did,
{
if (cap_caching_mode(iommu->cap)) {
iommu->flush.flush_context(iommu, 0,
- (((u16)bus) << 8) | devfn,
+ PCI_DEVID(bus, devfn),
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
@@ -1641,7 +1500,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
int translation = CONTEXT_TT_MULTI_LEVEL;
struct dma_pte *pgd = domain->pgd;
struct context_entry *context;
- int agaw, ret;
+ int ret;
pr_debug("Set context mapping for %02x:%02x.%d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1658,27 +1517,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
copied_context_tear_down(iommu, context, bus, devfn);
context_clear_entry(context);
-
context_set_domain_id(context, did);
- /*
- * Skip top levels of page tables for iommu which has
- * less agaw than default. Unnecessary for PT mode.
- */
- for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
- ret = -ENOMEM;
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd))
- goto out_unlock;
- }
-
if (info && info->ats_supported)
translation = CONTEXT_TT_DEV_IOTLB;
else
translation = CONTEXT_TT_MULTI_LEVEL;
context_set_address_root(context, virt_to_phys(pgd));
- context_set_address_width(context, agaw);
+ context_set_address_width(context, domain->agaw);
context_set_translation_type(context, translation);
context_set_fault_enable(context);
context_set_present(context);
@@ -1905,26 +1752,52 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
intel_context_flush_present(info, context, did, true);
}
+int __domain_setup_first_level(struct intel_iommu *iommu,
+ struct device *dev, ioasid_t pasid,
+ u16 did, pgd_t *pgd, int flags,
+ struct iommu_domain *old)
+{
+ if (!old)
+ return intel_pasid_setup_first_level(iommu, dev, pgd,
+ pasid, did, flags);
+ return intel_pasid_replace_first_level(iommu, dev, pgd, pasid, did,
+ iommu_domain_did(old, iommu),
+ flags);
+}
+
+static int domain_setup_second_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ if (!old)
+ return intel_pasid_setup_second_level(iommu, domain,
+ dev, pasid);
+ return intel_pasid_replace_second_level(iommu, domain, dev,
+ iommu_domain_did(old, iommu),
+ pasid);
+}
+
+static int domain_setup_passthrough(struct intel_iommu *iommu,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ if (!old)
+ return intel_pasid_setup_pass_through(iommu, dev, pasid);
+ return intel_pasid_replace_pass_through(iommu, dev,
+ iommu_domain_did(old, iommu),
+ pasid);
+}
+
static int domain_setup_first_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev,
- u32 pasid)
+ u32 pasid, struct iommu_domain *old)
{
struct dma_pte *pgd = domain->pgd;
- int agaw, level;
- int flags = 0;
-
- /*
- * Skip top levels of page tables for iommu which has
- * less agaw than default. Unnecessary for PT mode.
- */
- for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd))
- return -ENOMEM;
- }
+ int level, flags = 0;
- level = agaw_to_level(agaw);
+ level = agaw_to_level(domain->agaw);
if (level != 4 && level != 5)
return -EINVAL;
@@ -1934,15 +1807,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
if (domain->force_snooping)
flags |= PASID_FLAG_PAGE_SNOOP;
- return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
- domain_id_iommu(domain, iommu),
- flags);
-}
-
-static bool dev_is_real_dma_subdevice(struct device *dev)
-{
- return dev && dev_is_pci(dev) &&
- pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
+ return __domain_setup_first_level(iommu, dev, pasid,
+ domain_id_iommu(domain, iommu),
+ (pgd_t *)pgd, flags, old);
}
static int dmar_domain_attach_device(struct dmar_domain *domain,
@@ -1968,9 +1835,11 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
if (!sm_supported(iommu))
ret = domain_context_mapping(domain, dev);
else if (domain->use_first_level)
- ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID);
+ ret = domain_setup_first_level(iommu, domain, dev,
+ IOMMU_NO_PASID, NULL);
else
- ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID);
+ ret = domain_setup_second_level(iommu, domain, dev,
+ IOMMU_NO_PASID, NULL);
if (ret)
goto out_block_translation;
@@ -2354,19 +2223,18 @@ static int __init init_dmars(void)
iommu_flush_write_buffer(iommu);
-#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
+ if (ecap_prs(iommu->ecap)) {
/*
* Call dmar_alloc_hwirq() with dmar_global_lock held,
* could cause possible lock race condition.
*/
up_write(&dmar_global_lock);
- ret = intel_svm_enable_prq(iommu);
+ ret = intel_iommu_enable_prq(iommu);
down_write(&dmar_global_lock);
if (ret)
goto free_iommu;
}
-#endif
+
ret = dmar_set_interrupt(iommu);
if (ret)
goto free_iommu;
@@ -2746,20 +2614,13 @@ int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
{
- int sp, ret;
struct intel_iommu *iommu = dmaru->iommu;
+ int ret;
ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
if (ret)
goto out;
- sp = domain_update_iommu_superpage(NULL, iommu) - 1;
- if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
- pr_warn("%s: Doesn't support large page.\n",
- iommu->name);
- return -ENXIO;
- }
-
/*
* Disable translation if already enabled prior to OS handover.
*/
@@ -2786,13 +2647,12 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
intel_iommu_init_qi(iommu);
iommu_flush_write_buffer(iommu);
-#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
- ret = intel_svm_enable_prq(iommu);
+ if (ecap_prs(iommu->ecap)) {
+ ret = intel_iommu_enable_prq(iommu);
if (ret)
goto disable_iommu;
}
-#endif
+
ret = dmar_set_interrupt(iommu);
if (ret)
goto disable_iommu;
@@ -3288,7 +3148,7 @@ int __init intel_iommu_init(void)
* the virtual and physical IOMMU page-tables.
*/
if (cap_caching_mode(iommu->cap) &&
- !first_level_by_default(IOMMU_DOMAIN_DMA)) {
+ !first_level_by_default(iommu)) {
pr_info_once("IOMMU batching disallowed due to virtualization\n");
iommu_set_dma_strict();
}
@@ -3381,27 +3241,6 @@ void device_block_translation(struct device *dev)
info->domain = NULL;
}
-static int md_domain_init(struct dmar_domain *domain, int guest_width)
-{
- int adjust_width;
-
- /* calculate AGAW */
- domain->gaw = guest_width;
- adjust_width = guestwidth_to_adjustwidth(guest_width);
- domain->agaw = width_to_agaw(adjust_width);
-
- domain->iommu_coherency = false;
- domain->iommu_superpage = 0;
- domain->max_addr = 0;
-
- /* always allocate the top pgd */
- domain->pgd = iommu_alloc_page_node(domain->nid, GFP_ATOMIC);
- if (!domain->pgd)
- return -ENOMEM;
- domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
- return 0;
-}
-
static int blocking_domain_attach_dev(struct iommu_domain *domain,
struct device *dev)
{
@@ -3488,39 +3327,6 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st
return domain;
}
-static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
-{
- struct dmar_domain *dmar_domain;
- struct iommu_domain *domain;
-
- switch (type) {
- case IOMMU_DOMAIN_DMA:
- case IOMMU_DOMAIN_UNMANAGED:
- dmar_domain = alloc_domain(type);
- if (!dmar_domain) {
- pr_err("Can't allocate dmar_domain\n");
- return NULL;
- }
- if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
- pr_err("Domain initialization failed\n");
- domain_exit(dmar_domain);
- return NULL;
- }
-
- domain = &dmar_domain->domain;
- domain->geometry.aperture_start = 0;
- domain->geometry.aperture_end =
- __DOMAIN_MAX_ADDR(dmar_domain->gaw);
- domain->geometry.force_aperture = true;
-
- return domain;
- default:
- return NULL;
- }
-
- return NULL;
-}
-
static struct iommu_domain *
intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
struct iommu_domain *parent,
@@ -3532,6 +3338,7 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
struct intel_iommu *iommu = info->iommu;
struct dmar_domain *dmar_domain;
struct iommu_domain *domain;
+ bool first_stage;
/* Must be NESTING domain */
if (parent) {
@@ -3541,15 +3348,28 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
}
if (flags &
- (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING)))
+ (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING
+ | IOMMU_HWPT_FAULT_ID_VALID)))
return ERR_PTR(-EOPNOTSUPP);
if (nested_parent && !nested_supported(iommu))
return ERR_PTR(-EOPNOTSUPP);
if (user_data || (dirty_tracking && !ssads_supported(iommu)))
return ERR_PTR(-EOPNOTSUPP);
- /* Do not use first stage for user domain translation. */
- dmar_domain = paging_domain_alloc(dev, false);
+ /*
+ * Always allocate the guest compatible page table unless
+ * IOMMU_HWPT_ALLOC_NEST_PARENT or IOMMU_HWPT_ALLOC_DIRTY_TRACKING
+ * is specified.
+ */
+ if (nested_parent || dirty_tracking) {
+ if (!sm_supported(iommu) || !ecap_slts(iommu->ecap))
+ return ERR_PTR(-EOPNOTSUPP);
+ first_stage = false;
+ } else {
+ first_stage = first_level_by_default(iommu);
+ }
+
+ dmar_domain = paging_domain_alloc(dev, first_stage);
if (IS_ERR(dmar_domain))
return ERR_CAST(dmar_domain);
domain = &dmar_domain->domain;
@@ -3583,42 +3403,41 @@ static void intel_iommu_domain_free(struct iommu_domain *domain)
domain_exit(dmar_domain);
}
-int prepare_domain_attach_device(struct iommu_domain *domain,
- struct device *dev)
+int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu = info->iommu;
int addr_width;
+ if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING)))
+ return -EPERM;
+
if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
return -EINVAL;
if (domain->dirty_ops && !ssads_supported(iommu))
return -EINVAL;
+ if (dmar_domain->iommu_coherency !=
+ iommu_paging_structure_coherency(iommu))
+ return -EINVAL;
+
+ if (dmar_domain->iommu_superpage !=
+ iommu_superpage_capability(iommu, dmar_domain->use_first_level))
+ return -EINVAL;
+
+ if (dmar_domain->use_first_level &&
+ (!sm_supported(iommu) || !ecap_flts(iommu->ecap)))
+ return -EINVAL;
+
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
addr_width = cap_mgaw(iommu->cap);
- if (dmar_domain->max_addr > (1LL << addr_width))
+ if (dmar_domain->gaw > addr_width || dmar_domain->agaw > iommu->agaw)
return -EINVAL;
- dmar_domain->gaw = addr_width;
-
- /*
- * Knock out extra levels of page tables if necessary
- */
- while (iommu->agaw < dmar_domain->agaw) {
- struct dma_pte *pte;
-
- pte = dmar_domain->pgd;
- if (dma_pte_present(pte)) {
- dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
- iommu_free_page(pte);
- }
- dmar_domain->agaw--;
- }
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
context_copied(iommu, info->bus, info->devfn))
@@ -3634,7 +3453,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
device_block_translation(dev);
- ret = prepare_domain_attach_device(domain, dev);
+ ret = paging_domain_compatible(domain, dev);
if (ret)
return ret;
@@ -4252,8 +4071,8 @@ static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
return 0;
}
-static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
- struct iommu_domain *domain)
+void domain_remove_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dev_pasid_info *curr, *dev_pasid = NULL;
@@ -4261,10 +4080,12 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct dmar_domain *dmar_domain;
unsigned long flags;
- if (domain->type == IOMMU_DOMAIN_IDENTITY) {
- intel_pasid_tear_down_entry(iommu, dev, pasid, false);
+ if (!domain)
+ return;
+
+ /* Identity domain has no meta data for pasid. */
+ if (domain->type == IOMMU_DOMAIN_IDENTITY)
return;
- }
dmar_domain = to_dmar_domain(domain);
spin_lock_irqsave(&dmar_domain->lock, flags);
@@ -4282,12 +4103,20 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
domain_detach_iommu(dmar_domain, iommu);
intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
kfree(dev_pasid);
- intel_pasid_tear_down_entry(iommu, dev, pasid, false);
- intel_drain_pasid_prq(dev, pasid);
}
-static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
+static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
+ struct iommu_domain *domain)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
+ domain_remove_dev_pasid(domain, dev, pasid);
+}
+
+struct dev_pasid_info *
+domain_add_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
@@ -4296,22 +4125,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
unsigned long flags;
int ret;
- if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
- return -EOPNOTSUPP;
-
- if (domain->dirty_ops)
- return -EINVAL;
-
- if (context_copied(iommu, info->bus, info->devfn))
- return -EBUSY;
-
- ret = prepare_domain_attach_device(domain, dev);
- if (ret)
- return ret;
-
dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
if (!dev_pasid)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
ret = domain_attach_iommu(dmar_domain, iommu);
if (ret)
@@ -4321,31 +4137,67 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
if (ret)
goto out_detach_iommu;
- if (dmar_domain->use_first_level)
- ret = domain_setup_first_level(iommu, dmar_domain,
- dev, pasid);
- else
- ret = intel_pasid_setup_second_level(iommu, dmar_domain,
- dev, pasid);
- if (ret)
- goto out_unassign_tag;
-
dev_pasid->dev = dev;
dev_pasid->pasid = pasid;
spin_lock_irqsave(&dmar_domain->lock, flags);
list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
- if (domain->type & __IOMMU_DOMAIN_PAGING)
- intel_iommu_debugfs_create_dev_pasid(dev_pasid);
-
- return 0;
-out_unassign_tag:
- cache_tag_unassign_domain(dmar_domain, dev, pasid);
+ return dev_pasid;
out_detach_iommu:
domain_detach_iommu(dmar_domain, iommu);
out_free:
kfree(dev_pasid);
+ return ERR_PTR(ret);
+}
+
+static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct intel_iommu *iommu = info->iommu;
+ struct dev_pasid_info *dev_pasid;
+ int ret;
+
+ if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING)))
+ return -EINVAL;
+
+ if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
+ return -EOPNOTSUPP;
+
+ if (domain->dirty_ops)
+ return -EINVAL;
+
+ if (context_copied(iommu, info->bus, info->devfn))
+ return -EBUSY;
+
+ ret = paging_domain_compatible(domain, dev);
+ if (ret)
+ return ret;
+
+ dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
+ if (IS_ERR(dev_pasid))
+ return PTR_ERR(dev_pasid);
+
+ if (dmar_domain->use_first_level)
+ ret = domain_setup_first_level(iommu, dmar_domain,
+ dev, pasid, old);
+ else
+ ret = domain_setup_second_level(iommu, dmar_domain,
+ dev, pasid, old);
+ if (ret)
+ goto out_remove_dev_pasid;
+
+ domain_remove_dev_pasid(old, dev, pasid);
+
+ intel_iommu_debugfs_create_dev_pasid(dev_pasid);
+
+ return 0;
+
+out_remove_dev_pasid:
+ domain_remove_dev_pasid(domain, dev, pasid);
return ret;
}
@@ -4573,15 +4425,22 @@ static int identity_domain_attach_dev(struct iommu_domain *domain, struct device
}
static int identity_domain_set_dev_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
+ int ret;
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
return -EOPNOTSUPP;
- return intel_pasid_setup_pass_through(iommu, dev, pasid);
+ ret = domain_setup_passthrough(iommu, dev, pasid, old);
+ if (ret)
+ return ret;
+
+ domain_remove_dev_pasid(old, dev, pasid);
+ return 0;
}
static struct iommu_domain identity_domain = {
@@ -4592,15 +4451,30 @@ static struct iommu_domain identity_domain = {
},
};
+static struct iommu_domain *intel_iommu_domain_alloc_paging(struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct dmar_domain *dmar_domain;
+ bool first_stage;
+
+ first_stage = first_level_by_default(iommu);
+ dmar_domain = paging_domain_alloc(dev, first_stage);
+ if (IS_ERR(dmar_domain))
+ return ERR_CAST(dmar_domain);
+
+ return &dmar_domain->domain;
+}
+
const struct iommu_ops intel_iommu_ops = {
.blocked_domain = &blocking_domain,
.release_domain = &blocking_domain,
.identity_domain = &identity_domain,
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
- .domain_alloc = intel_iommu_domain_alloc,
.domain_alloc_user = intel_iommu_domain_alloc_user,
.domain_alloc_sva = intel_svm_domain_alloc,
+ .domain_alloc_paging = intel_iommu_domain_alloc_paging,
.probe_device = intel_iommu_probe_device,
.release_device = intel_iommu_release_device,
.get_resv_regions = intel_iommu_get_resv_regions,
@@ -4611,9 +4485,7 @@ const struct iommu_ops intel_iommu_ops = {
.def_domain_type = device_def_domain_type,
.remove_dev_pasid = intel_iommu_remove_dev_pasid,
.pgsize_bitmap = SZ_4K,
-#ifdef CONFIG_INTEL_IOMMU_SVM
- .page_response = intel_svm_page_response,
-#endif
+ .page_response = intel_iommu_page_response,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = intel_iommu_attach_device,
.set_dev_pasid = intel_iommu_set_dev_pasid,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 1497f3112b12..2cca094c259d 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -22,6 +22,7 @@
#include <linux/bitfield.h>
#include <linux/xarray.h>
#include <linux/perf_event.h>
+#include <linux/pci.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
@@ -653,8 +654,6 @@ struct dmar_domain {
struct {
/* parent page table which the user domain is nested on */
struct dmar_domain *s2_domain;
- /* user page table pointer (in GPA) */
- unsigned long s1_pgtbl;
/* page table attributes */
struct iommu_hwpt_vtd_s1 s1_cfg;
/* link to parent domain siblings */
@@ -720,7 +719,7 @@ struct intel_iommu {
int msagaw; /* max sagaw of this iommu */
unsigned int irq, pr_irq, perf_irq;
u16 segment; /* PCI segment# */
- unsigned char name[13]; /* Device Name */
+ unsigned char name[16]; /* Device Name */
#ifdef CONFIG_INTEL_IOMMU
unsigned long *domain_ids; /* bitmap of domains */
@@ -730,12 +729,10 @@ struct intel_iommu {
struct iommu_flush flush;
#endif
-#ifdef CONFIG_INTEL_IOMMU_SVM
struct page_req_dsc *prq;
unsigned char prq_name[16]; /* Name for PRQ interrupt */
unsigned long prq_seq_number;
struct completion prq_complete;
-#endif
struct iopf_queue *iopf_queue;
unsigned char iopfq_name[16];
/* Synchronization between fault report and iommu device release. */
@@ -810,6 +807,13 @@ static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
return container_of(dom, struct dmar_domain, domain);
}
+/*
+ * Domain ID reserved for pasid entries programmed for first-level
+ * only and pass-through transfer modes.
+ */
+#define FLPT_DEFAULT_DID 1
+#define NUM_RESERVED_DID 2
+
/* Retrieve the domain ID which has allocated to the domain */
static inline u16
domain_id_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
@@ -820,6 +824,21 @@ domain_id_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
return info->did;
}
+static inline u16
+iommu_domain_did(struct iommu_domain *domain, struct intel_iommu *iommu)
+{
+ if (domain->type == IOMMU_DOMAIN_SVA ||
+ domain->type == IOMMU_DOMAIN_IDENTITY)
+ return FLPT_DEFAULT_DID;
+ return domain_id_iommu(to_dmar_domain(domain), iommu);
+}
+
+static inline bool dev_is_real_dma_subdevice(struct device *dev)
+{
+ return dev && dev_is_pci(dev) &&
+ pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
+}
+
/*
* 0: readable
* 1: writable
@@ -1230,9 +1249,18 @@ void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
void device_block_translation(struct device *dev);
-int prepare_domain_attach_device(struct iommu_domain *domain,
- struct device *dev);
-void domain_update_iommu_cap(struct dmar_domain *domain);
+int paging_domain_compatible(struct iommu_domain *domain, struct device *dev);
+
+struct dev_pasid_info *
+domain_add_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid);
+void domain_remove_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid);
+
+int __domain_setup_first_level(struct intel_iommu *iommu,
+ struct device *dev, ioasid_t pasid,
+ u16 did, pgd_t *pgd, int flags,
+ struct iommu_domain *old);
int dmar_ir_support(void);
@@ -1278,18 +1306,18 @@ void intel_context_flush_present(struct device_domain_info *info,
struct context_entry *context,
u16 did, bool affect_domains);
+int intel_iommu_enable_prq(struct intel_iommu *iommu);
+int intel_iommu_finish_prq(struct intel_iommu *iommu);
+void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt,
+ struct iommu_page_response *msg);
+void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid);
+
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
-int intel_svm_enable_prq(struct intel_iommu *iommu);
-int intel_svm_finish_prq(struct intel_iommu *iommu);
-void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
- struct iommu_page_response *msg);
struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
struct mm_struct *mm);
-void intel_drain_pasid_prq(struct device *dev, u32 pasid);
#else
static inline void intel_svm_check(struct intel_iommu *iommu) {}
-static inline void intel_drain_pasid_prq(struct device *dev, u32 pasid) {}
static inline struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
struct mm_struct *mm)
{
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 7a6d188e3bea..466c1412dd45 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -312,7 +312,7 @@ static int set_ioapic_sid(struct irte *irte, int apic)
for (i = 0; i < MAX_IO_APICS; i++) {
if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) {
- sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
+ sid = PCI_DEVID(ir_ioapic[i].bus, ir_ioapic[i].devfn);
break;
}
}
@@ -337,7 +337,7 @@ static int set_hpet_sid(struct irte *irte, u8 id)
for (i = 0; i < MAX_HPET_TBS; i++) {
if (ir_hpet[i].iommu && ir_hpet[i].id == id) {
- sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
+ sid = PCI_DEVID(ir_hpet[i].bus, ir_hpet[i].devfn);
break;
}
}
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index 433c58944401..42c4533a6ea2 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -40,7 +40,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
* The s2_domain will be used in nested translation, hence needs
* to ensure the s2_domain is compatible with this IOMMU.
*/
- ret = prepare_domain_attach_device(&dmar_domain->s2_domain->domain, dev);
+ ret = paging_domain_compatible(&dmar_domain->s2_domain->domain, dev);
if (ret) {
dev_err_ratelimited(dev, "s2 domain is not compatible\n");
return ret;
@@ -130,8 +130,58 @@ out:
return ret;
}
+static int domain_setup_nested(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ if (!old)
+ return intel_pasid_setup_nested(iommu, dev, pasid, domain);
+ return intel_pasid_replace_nested(iommu, dev, pasid,
+ iommu_domain_did(old, iommu),
+ domain);
+}
+
+static int intel_nested_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct intel_iommu *iommu = info->iommu;
+ struct dev_pasid_info *dev_pasid;
+ int ret;
+
+ if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
+ return -EOPNOTSUPP;
+
+ if (context_copied(iommu, info->bus, info->devfn))
+ return -EBUSY;
+
+ ret = paging_domain_compatible(&dmar_domain->s2_domain->domain, dev);
+ if (ret)
+ return ret;
+
+ dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
+ if (IS_ERR(dev_pasid))
+ return PTR_ERR(dev_pasid);
+
+ ret = domain_setup_nested(iommu, dmar_domain, dev, pasid, old);
+ if (ret)
+ goto out_remove_dev_pasid;
+
+ domain_remove_dev_pasid(old, dev, pasid);
+
+ return 0;
+
+out_remove_dev_pasid:
+ domain_remove_dev_pasid(domain, dev, pasid);
+ return ret;
+}
+
static const struct iommu_domain_ops intel_nested_domain_ops = {
.attach_dev = intel_nested_attach_dev,
+ .set_dev_pasid = intel_nested_set_dev_pasid,
.free = intel_nested_domain_free,
.cache_invalidate_user = intel_nested_cache_invalidate_user,
};
@@ -162,7 +212,6 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
domain->use_first_level = true;
domain->s2_domain = s2_domain;
- domain->s1_pgtbl = vtd.pgtbl_addr;
domain->s1_cfg = vtd;
domain->domain.ops = &intel_nested_domain_ops;
domain->domain.type = IOMMU_DOMAIN_NESTED;
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 2e5fa0a23299..0f2a926d3bd5 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -220,7 +220,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
if (pci_dev_is_disconnected(to_pci_dev(dev)))
return;
- sid = info->bus << 8 | info->devfn;
+ sid = PCI_DEVID(info->bus, info->devfn);
qdep = info->ats_qdep;
pfsid = info->pfsid;
@@ -265,6 +265,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
devtlb_invalidation_with_pasid(iommu, dev, pasid);
+ intel_iommu_drain_pasid_prq(dev, pasid);
}
/*
@@ -287,9 +288,68 @@ static void pasid_flush_caches(struct intel_iommu *iommu,
}
/*
+ * This function is supposed to be used after caller updates the fields
+ * except for the SSADE and P bit of a pasid table entry. It does the
+ * below:
+ * - Flush cacheline if needed
+ * - Flush the caches per Table 28 ”Guidance to Software for Invalidations“
+ * of VT-d spec 5.0.
+ */
+static void intel_pasid_flush_present(struct intel_iommu *iommu,
+ struct device *dev,
+ u32 pasid, u16 did,
+ struct pasid_entry *pte)
+{
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ /*
+ * VT-d spec 5.0 table28 states guides for cache invalidation:
+ *
+ * - PASID-selective-within-Domain PASID-cache invalidation
+ * - PASID-selective PASID-based IOTLB invalidation
+ * - If (pasid is RID_PASID)
+ * - Global Device-TLB invalidation to affected functions
+ * Else
+ * - PASID-based Device-TLB invalidation (with S=1 and
+ * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
+ */
+ pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+ qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
+
+ devtlb_invalidation_with_pasid(iommu, dev, pasid);
+}
+
+/*
* Set up the scalable mode pasid table entry for first only
* translation type.
*/
+static void pasid_pte_config_first_level(struct intel_iommu *iommu,
+ struct pasid_entry *pte,
+ pgd_t *pgd, u16 did, int flags)
+{
+ lockdep_assert_held(&iommu->lock);
+
+ pasid_clear_entry(pte);
+
+ /* Setup the first level page table pointer: */
+ pasid_set_flptr(pte, (u64)__pa(pgd));
+
+ if (flags & PASID_FLAG_FL5LP)
+ pasid_set_flpm(pte, 1);
+
+ if (flags & PASID_FLAG_PAGE_SNOOP)
+ pasid_set_pgsnp(pte);
+
+ pasid_set_domain_id(pte, did);
+ pasid_set_address_width(pte, iommu->agaw);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ /* Setup Present and PASID Granular Transfer Type: */
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
+ pasid_set_present(pte);
+}
+
int intel_pasid_setup_first_level(struct intel_iommu *iommu,
struct device *dev, pgd_t *pgd,
u32 pasid, u16 did, int flags)
@@ -320,53 +380,82 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
return -EBUSY;
}
- pasid_clear_entry(pte);
+ pasid_pte_config_first_level(iommu, pte, pgd, did, flags);
- /* Setup the first level page table pointer: */
- pasid_set_flptr(pte, (u64)__pa(pgd));
+ spin_unlock(&iommu->lock);
- if (flags & PASID_FLAG_FL5LP)
- pasid_set_flpm(pte, 1);
+ pasid_flush_caches(iommu, pte, pasid, did);
- if (flags & PASID_FLAG_PAGE_SNOOP)
- pasid_set_pgsnp(pte);
+ return 0;
+}
- pasid_set_domain_id(pte, did);
- pasid_set_address_width(pte, iommu->agaw);
- pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+int intel_pasid_replace_first_level(struct intel_iommu *iommu,
+ struct device *dev, pgd_t *pgd,
+ u32 pasid, u16 did, u16 old_did,
+ int flags)
+{
+ struct pasid_entry *pte, new_pte;
- /* Setup Present and PASID Granular Transfer Type: */
- pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
- pasid_set_present(pte);
+ if (!ecap_flts(iommu->ecap)) {
+ pr_err("No first level translation support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
+ pr_err("No 5-level paging support for first-level on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ pasid_pte_config_first_level(iommu, &new_pte, pgd, did, flags);
+
+ spin_lock(&iommu->lock);
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ spin_unlock(&iommu->lock);
+ return -ENODEV;
+ }
+
+ if (!pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
+ return -EINVAL;
+ }
+
+ WARN_ON(old_did != pasid_get_domain_id(pte));
+
+ *pte = new_pte;
spin_unlock(&iommu->lock);
- pasid_flush_caches(iommu, pte, pasid, did);
+ intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
+ intel_iommu_drain_pasid_prq(dev, pasid);
return 0;
}
/*
- * Skip top levels of page tables for iommu which has less agaw
- * than default. Unnecessary for PT mode.
+ * Set up the scalable mode pasid entry for second only translation type.
*/
-static int iommu_skip_agaw(struct dmar_domain *domain,
- struct intel_iommu *iommu,
- struct dma_pte **pgd)
+static void pasid_pte_config_second_level(struct intel_iommu *iommu,
+ struct pasid_entry *pte,
+ u64 pgd_val, int agaw, u16 did,
+ bool dirty_tracking)
{
- int agaw;
+ lockdep_assert_held(&iommu->lock);
- for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
- *pgd = phys_to_virt(dma_pte_addr(*pgd));
- if (!dma_pte_present(*pgd))
- return -EINVAL;
- }
+ pasid_clear_entry(pte);
+ pasid_set_domain_id(pte, did);
+ pasid_set_slptr(pte, pgd_val);
+ pasid_set_address_width(pte, agaw);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
+ pasid_set_fault_enable(pte);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+ if (dirty_tracking)
+ pasid_set_ssade(pte);
- return agaw;
+ pasid_set_present(pte);
}
-/*
- * Set up the scalable mode pasid entry for second only translation type.
- */
int intel_pasid_setup_second_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, u32 pasid)
@@ -374,7 +463,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
struct pasid_entry *pte;
struct dma_pte *pgd;
u64 pgd_val;
- int agaw;
u16 did;
/*
@@ -388,15 +476,58 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
}
pgd = domain->pgd;
- agaw = iommu_skip_agaw(domain, iommu, &pgd);
- if (agaw < 0) {
- dev_err(dev, "Invalid domain page table\n");
+ pgd_val = virt_to_phys(pgd);
+ did = domain_id_iommu(domain, iommu);
+
+ spin_lock(&iommu->lock);
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ spin_unlock(&iommu->lock);
+ return -ENODEV;
+ }
+
+ if (pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
+ return -EBUSY;
+ }
+
+ pasid_pte_config_second_level(iommu, pte, pgd_val, domain->agaw,
+ did, domain->dirty_tracking);
+ spin_unlock(&iommu->lock);
+
+ pasid_flush_caches(iommu, pte, pasid, did);
+
+ return 0;
+}
+
+int intel_pasid_replace_second_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, u16 old_did,
+ u32 pasid)
+{
+ struct pasid_entry *pte, new_pte;
+ struct dma_pte *pgd;
+ u64 pgd_val;
+ u16 did;
+
+ /*
+ * If hardware advertises no support for second level
+ * translation, return directly.
+ */
+ if (!ecap_slts(iommu->ecap)) {
+ pr_err("No second level translation support on %s\n",
+ iommu->name);
return -EINVAL;
}
+ pgd = domain->pgd;
pgd_val = virt_to_phys(pgd);
did = domain_id_iommu(domain, iommu);
+ pasid_pte_config_second_level(iommu, &new_pte, pgd_val,
+ domain->agaw, did,
+ domain->dirty_tracking);
+
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
@@ -404,25 +535,18 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
return -ENODEV;
}
- if (pasid_pte_is_present(pte)) {
+ if (!pasid_pte_is_present(pte)) {
spin_unlock(&iommu->lock);
- return -EBUSY;
+ return -EINVAL;
}
- pasid_clear_entry(pte);
- pasid_set_domain_id(pte, did);
- pasid_set_slptr(pte, pgd_val);
- pasid_set_address_width(pte, agaw);
- pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
- pasid_set_fault_enable(pte);
- pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
- if (domain->dirty_tracking)
- pasid_set_ssade(pte);
+ WARN_ON(old_did != pasid_get_domain_id(pte));
- pasid_set_present(pte);
+ *pte = new_pte;
spin_unlock(&iommu->lock);
- pasid_flush_caches(iommu, pte, pasid, did);
+ intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
+ intel_iommu_drain_pasid_prq(dev, pasid);
return 0;
}
@@ -499,6 +623,20 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
/*
* Set up the scalable mode pasid entry for passthrough translation type.
*/
+static void pasid_pte_config_pass_through(struct intel_iommu *iommu,
+ struct pasid_entry *pte, u16 did)
+{
+ lockdep_assert_held(&iommu->lock);
+
+ pasid_clear_entry(pte);
+ pasid_set_domain_id(pte, did);
+ pasid_set_address_width(pte, iommu->agaw);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
+ pasid_set_fault_enable(pte);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+ pasid_set_present(pte);
+}
+
int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
struct device *dev, u32 pasid)
{
@@ -517,13 +655,7 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
return -EBUSY;
}
- pasid_clear_entry(pte);
- pasid_set_domain_id(pte, did);
- pasid_set_address_width(pte, iommu->agaw);
- pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
- pasid_set_fault_enable(pte);
- pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
- pasid_set_present(pte);
+ pasid_pte_config_pass_through(iommu, pte, did);
spin_unlock(&iommu->lock);
pasid_flush_caches(iommu, pte, pasid, did);
@@ -531,6 +663,38 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
return 0;
}
+int intel_pasid_replace_pass_through(struct intel_iommu *iommu,
+ struct device *dev, u16 old_did,
+ u32 pasid)
+{
+ struct pasid_entry *pte, new_pte;
+ u16 did = FLPT_DEFAULT_DID;
+
+ pasid_pte_config_pass_through(iommu, &new_pte, did);
+
+ spin_lock(&iommu->lock);
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ spin_unlock(&iommu->lock);
+ return -ENODEV;
+ }
+
+ if (!pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
+ return -EINVAL;
+ }
+
+ WARN_ON(old_did != pasid_get_domain_id(pte));
+
+ *pte = new_pte;
+ spin_unlock(&iommu->lock);
+
+ intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
+ intel_iommu_drain_pasid_prq(dev, pasid);
+
+ return 0;
+}
+
/*
* Set the page snoop control for a pasid entry which has been set up.
*/
@@ -551,24 +715,47 @@ void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
did = pasid_get_domain_id(pte);
spin_unlock(&iommu->lock);
- if (!ecap_coherent(iommu->ecap))
- clflush_cache_range(pte, sizeof(*pte));
+ intel_pasid_flush_present(iommu, dev, pasid, did, pte);
+}
- /*
- * VT-d spec 3.4 table23 states guides for cache invalidation:
- *
- * - PASID-selective-within-Domain PASID-cache invalidation
- * - PASID-selective PASID-based IOTLB invalidation
- * - If (pasid is RID_PASID)
- * - Global Device-TLB invalidation to affected functions
- * Else
- * - PASID-based Device-TLB invalidation (with S=1 and
- * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
- */
- pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
+static void pasid_pte_config_nestd(struct intel_iommu *iommu,
+ struct pasid_entry *pte,
+ struct iommu_hwpt_vtd_s1 *s1_cfg,
+ struct dmar_domain *s2_domain,
+ u16 did)
+{
+ struct dma_pte *pgd = s2_domain->pgd;
- devtlb_invalidation_with_pasid(iommu, dev, pasid);
+ lockdep_assert_held(&iommu->lock);
+
+ pasid_clear_entry(pte);
+
+ if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
+ pasid_set_flpm(pte, 1);
+
+ pasid_set_flptr(pte, s1_cfg->pgtbl_addr);
+
+ if (s1_cfg->flags & IOMMU_VTD_S1_SRE) {
+ pasid_set_sre(pte);
+ if (s1_cfg->flags & IOMMU_VTD_S1_WPE)
+ pasid_set_wpe(pte);
+ }
+
+ if (s1_cfg->flags & IOMMU_VTD_S1_EAFE)
+ pasid_set_eafe(pte);
+
+ if (s2_domain->force_snooping)
+ pasid_set_pgsnp(pte);
+
+ pasid_set_slptr(pte, virt_to_phys(pgd));
+ pasid_set_fault_enable(pte);
+ pasid_set_domain_id(pte, did);
+ pasid_set_address_width(pte, s2_domain->agaw);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+ if (s2_domain->dirty_tracking)
+ pasid_set_ssade(pte);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
+ pasid_set_present(pte);
}
/**
@@ -586,10 +773,8 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
u32 pasid, struct dmar_domain *domain)
{
struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
- pgd_t *s1_gpgd = (pgd_t *)(uintptr_t)domain->s1_pgtbl;
struct dmar_domain *s2_domain = domain->s2_domain;
u16 did = domain_id_iommu(domain, iommu);
- struct dma_pte *pgd = s2_domain->pgd;
struct pasid_entry *pte;
/* Address width should match the address width supported by hardware */
@@ -632,37 +817,73 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
return -EBUSY;
}
- pasid_clear_entry(pte);
+ pasid_pte_config_nestd(iommu, pte, s1_cfg, s2_domain, did);
+ spin_unlock(&iommu->lock);
- if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
- pasid_set_flpm(pte, 1);
+ pasid_flush_caches(iommu, pte, pasid, did);
- pasid_set_flptr(pte, (uintptr_t)s1_gpgd);
+ return 0;
+}
- if (s1_cfg->flags & IOMMU_VTD_S1_SRE) {
- pasid_set_sre(pte);
- if (s1_cfg->flags & IOMMU_VTD_S1_WPE)
- pasid_set_wpe(pte);
+int intel_pasid_replace_nested(struct intel_iommu *iommu,
+ struct device *dev, u32 pasid,
+ u16 old_did, struct dmar_domain *domain)
+{
+ struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
+ struct dmar_domain *s2_domain = domain->s2_domain;
+ u16 did = domain_id_iommu(domain, iommu);
+ struct pasid_entry *pte, new_pte;
+
+ /* Address width should match the address width supported by hardware */
+ switch (s1_cfg->addr_width) {
+ case ADDR_WIDTH_4LEVEL:
+ break;
+ case ADDR_WIDTH_5LEVEL:
+ if (!cap_fl5lp_support(iommu->cap)) {
+ dev_err_ratelimited(dev,
+ "5-level paging not supported\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
+ s1_cfg->addr_width);
+ return -EINVAL;
}
- if (s1_cfg->flags & IOMMU_VTD_S1_EAFE)
- pasid_set_eafe(pte);
+ if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
+ pr_err_ratelimited("No supervisor request support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
- if (s2_domain->force_snooping)
- pasid_set_pgsnp(pte);
+ if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
+ pr_err_ratelimited("No extended access flag support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
- pasid_set_slptr(pte, virt_to_phys(pgd));
- pasid_set_fault_enable(pte);
- pasid_set_domain_id(pte, did);
- pasid_set_address_width(pte, s2_domain->agaw);
- pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
- if (s2_domain->dirty_tracking)
- pasid_set_ssade(pte);
- pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
- pasid_set_present(pte);
+ pasid_pte_config_nestd(iommu, &new_pte, s1_cfg, s2_domain, did);
+
+ spin_lock(&iommu->lock);
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ spin_unlock(&iommu->lock);
+ return -ENODEV;
+ }
+
+ if (!pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
+ return -EINVAL;
+ }
+
+ WARN_ON(old_did != pasid_get_domain_id(pte));
+
+ *pte = new_pte;
spin_unlock(&iommu->lock);
- pasid_flush_caches(iommu, pte, pasid, did);
+ intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
+ intel_iommu_drain_pasid_prq(dev, pasid);
return 0;
}
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index dde6d3ba5ae0..082f4fe20216 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -22,13 +22,6 @@
#define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1)
#define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7))
-/*
- * Domain ID reserved for pasid entries programmed for first-level
- * only and pass-through transfer modes.
- */
-#define FLPT_DEFAULT_DID 1
-#define NUM_RESERVED_DID 2
-
#define PASID_FLAG_NESTED BIT(1)
#define PASID_FLAG_PAGE_SNOOP BIT(2)
@@ -303,6 +296,21 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
struct device *dev, u32 pasid);
int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
u32 pasid, struct dmar_domain *domain);
+int intel_pasid_replace_first_level(struct intel_iommu *iommu,
+ struct device *dev, pgd_t *pgd,
+ u32 pasid, u16 did, u16 old_did,
+ int flags);
+int intel_pasid_replace_second_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, u16 old_did,
+ u32 pasid);
+int intel_pasid_replace_pass_through(struct intel_iommu *iommu,
+ struct device *dev, u16 old_did,
+ u32 pasid);
+int intel_pasid_replace_nested(struct intel_iommu *iommu,
+ struct device *dev, u32 pasid,
+ u16 old_did, struct dmar_domain *domain);
+
void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
struct device *dev, u32 pasid,
bool fault_ignore);
diff --git a/drivers/iommu/intel/prq.c b/drivers/iommu/intel/prq.c
new file mode 100644
index 000000000000..c2d792db52c3
--- /dev/null
+++ b/drivers/iommu/intel/prq.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 Intel Corporation
+ *
+ * Originally split from drivers/iommu/intel/svm.c
+ */
+
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+
+#include "iommu.h"
+#include "pasid.h"
+#include "../iommu-pages.h"
+#include "trace.h"
+
+/* Page request queue descriptor */
+struct page_req_dsc {
+ union {
+ struct {
+ u64 type:8;
+ u64 pasid_present:1;
+ u64 rsvd:7;
+ u64 rid:16;
+ u64 pasid:20;
+ u64 exe_req:1;
+ u64 pm_req:1;
+ u64 rsvd2:10;
+ };
+ u64 qw_0;
+ };
+ union {
+ struct {
+ u64 rd_req:1;
+ u64 wr_req:1;
+ u64 lpig:1;
+ u64 prg_index:9;
+ u64 addr:52;
+ };
+ u64 qw_1;
+ };
+ u64 qw_2;
+ u64 qw_3;
+};
+
+/**
+ * intel_iommu_drain_pasid_prq - Drain page requests and responses for a pasid
+ * @dev: target device
+ * @pasid: pasid for draining
+ *
+ * Drain all pending page requests and responses related to @pasid in both
+ * software and hardware. This is supposed to be called after the device
+ * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
+ * and DevTLB have been invalidated.
+ *
+ * It waits until all pending page requests for @pasid in the page fault
+ * queue are completed by the prq handling thread. Then follow the steps
+ * described in VT-d spec CH7.10 to drain all page requests and page
+ * responses pending in the hardware.
+ */
+void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid)
+{
+ struct device_domain_info *info;
+ struct dmar_domain *domain;
+ struct intel_iommu *iommu;
+ struct qi_desc desc[3];
+ int head, tail;
+ u16 sid, did;
+
+ info = dev_iommu_priv_get(dev);
+ if (!info->pri_enabled)
+ return;
+
+ iommu = info->iommu;
+ domain = info->domain;
+ sid = PCI_DEVID(info->bus, info->devfn);
+ did = domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID;
+
+ /*
+ * Check and wait until all pending page requests in the queue are
+ * handled by the prq handling thread.
+ */
+prq_retry:
+ reinit_completion(&iommu->prq_complete);
+ tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ while (head != tail) {
+ struct page_req_dsc *req;
+
+ req = &iommu->prq[head / sizeof(*req)];
+ if (!req->pasid_present || req->pasid != pasid) {
+ head = (head + sizeof(*req)) & PRQ_RING_MASK;
+ continue;
+ }
+
+ wait_for_completion(&iommu->prq_complete);
+ goto prq_retry;
+ }
+
+ iopf_queue_flush_dev(dev);
+
+ /*
+ * Perform steps described in VT-d spec CH7.10 to drain page
+ * requests and responses in hardware.
+ */
+ memset(desc, 0, sizeof(desc));
+ desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
+ QI_IWD_FENCE |
+ QI_IWD_TYPE;
+ if (pasid == IOMMU_NO_PASID) {
+ qi_desc_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, &desc[1]);
+ qi_desc_dev_iotlb(sid, info->pfsid, info->ats_qdep, 0,
+ MAX_AGAW_PFN_WIDTH, &desc[2]);
+ } else {
+ qi_desc_piotlb(did, pasid, 0, -1, 0, &desc[1]);
+ qi_desc_dev_iotlb_pasid(sid, info->pfsid, pasid, info->ats_qdep,
+ 0, MAX_AGAW_PFN_WIDTH, &desc[2]);
+ }
+qi_retry:
+ reinit_completion(&iommu->prq_complete);
+ qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
+ if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
+ wait_for_completion(&iommu->prq_complete);
+ goto qi_retry;
+ }
+}
+
+static bool is_canonical_address(u64 addr)
+{
+ int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
+ long saddr = (long)addr;
+
+ return (((saddr << shift) >> shift) == saddr);
+}
+
+static void handle_bad_prq_event(struct intel_iommu *iommu,
+ struct page_req_dsc *req, int result)
+{
+ struct qi_desc desc = { };
+
+ pr_err("%s: Invalid page request: %08llx %08llx\n",
+ iommu->name, ((unsigned long long *)req)[0],
+ ((unsigned long long *)req)[1]);
+
+ if (!req->lpig)
+ return;
+
+ desc.qw0 = QI_PGRP_PASID(req->pasid) |
+ QI_PGRP_DID(req->rid) |
+ QI_PGRP_PASID_P(req->pasid_present) |
+ QI_PGRP_RESP_CODE(result) |
+ QI_PGRP_RESP_TYPE;
+ desc.qw1 = QI_PGRP_IDX(req->prg_index) |
+ QI_PGRP_LPIG(req->lpig);
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
+
+static int prq_to_iommu_prot(struct page_req_dsc *req)
+{
+ int prot = 0;
+
+ if (req->rd_req)
+ prot |= IOMMU_FAULT_PERM_READ;
+ if (req->wr_req)
+ prot |= IOMMU_FAULT_PERM_WRITE;
+ if (req->exe_req)
+ prot |= IOMMU_FAULT_PERM_EXEC;
+ if (req->pm_req)
+ prot |= IOMMU_FAULT_PERM_PRIV;
+
+ return prot;
+}
+
+static void intel_prq_report(struct intel_iommu *iommu, struct device *dev,
+ struct page_req_dsc *desc)
+{
+ struct iopf_fault event = { };
+
+ /* Fill in event data for device specific processing */
+ event.fault.type = IOMMU_FAULT_PAGE_REQ;
+ event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
+ event.fault.prm.pasid = desc->pasid;
+ event.fault.prm.grpid = desc->prg_index;
+ event.fault.prm.perm = prq_to_iommu_prot(desc);
+
+ if (desc->lpig)
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+ if (desc->pasid_present) {
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
+ }
+
+ iommu_report_device_fault(dev, &event);
+}
+
+static irqreturn_t prq_event_thread(int irq, void *d)
+{
+ struct intel_iommu *iommu = d;
+ struct page_req_dsc *req;
+ int head, tail, handled;
+ struct device *dev;
+ u64 address;
+
+ /*
+ * Clear PPR bit before reading head/tail registers, to ensure that
+ * we get a new interrupt if needed.
+ */
+ writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
+
+ tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ handled = (head != tail);
+ while (head != tail) {
+ req = &iommu->prq[head / sizeof(*req)];
+ address = (u64)req->addr << VTD_PAGE_SHIFT;
+
+ if (unlikely(!is_canonical_address(address))) {
+ pr_err("IOMMU: %s: Address is not canonical\n",
+ iommu->name);
+bad_req:
+ handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
+ goto prq_advance;
+ }
+
+ if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
+ pr_err("IOMMU: %s: Page request in Privilege Mode\n",
+ iommu->name);
+ goto bad_req;
+ }
+
+ if (unlikely(req->exe_req && req->rd_req)) {
+ pr_err("IOMMU: %s: Execution request not supported\n",
+ iommu->name);
+ goto bad_req;
+ }
+
+ /* Drop Stop Marker message. No need for a response. */
+ if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
+ goto prq_advance;
+
+ /*
+ * If prq is to be handled outside iommu driver via receiver of
+ * the fault notifiers, we skip the page response here.
+ */
+ mutex_lock(&iommu->iopf_lock);
+ dev = device_rbtree_find(iommu, req->rid);
+ if (!dev) {
+ mutex_unlock(&iommu->iopf_lock);
+ goto bad_req;
+ }
+
+ intel_prq_report(iommu, dev, req);
+ trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
+ req->qw_2, req->qw_3,
+ iommu->prq_seq_number++);
+ mutex_unlock(&iommu->iopf_lock);
+prq_advance:
+ head = (head + sizeof(*req)) & PRQ_RING_MASK;
+ }
+
+ dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
+
+ /*
+ * Clear the page request overflow bit and wake up all threads that
+ * are waiting for the completion of this handling.
+ */
+ if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
+ pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
+ iommu->name);
+ head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ if (head == tail) {
+ iopf_queue_discard_partial(iommu->iopf_queue);
+ writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
+ pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
+ iommu->name);
+ }
+ }
+
+ if (!completion_done(&iommu->prq_complete))
+ complete(&iommu->prq_complete);
+
+ return IRQ_RETVAL(handled);
+}
+
+int intel_iommu_enable_prq(struct intel_iommu *iommu)
+{
+ struct iopf_queue *iopfq;
+ int irq, ret;
+
+ iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER);
+ if (!iommu->prq) {
+ pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
+ iommu->name);
+ return -ENOMEM;
+ }
+
+ irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
+ if (irq <= 0) {
+ pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
+ iommu->name);
+ ret = -EINVAL;
+ goto free_prq;
+ }
+ iommu->pr_irq = irq;
+
+ snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
+ "dmar%d-iopfq", iommu->seq_id);
+ iopfq = iopf_queue_alloc(iommu->iopfq_name);
+ if (!iopfq) {
+ pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
+ ret = -ENOMEM;
+ goto free_hwirq;
+ }
+ iommu->iopf_queue = iopfq;
+
+ snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
+
+ ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
+ iommu->prq_name, iommu);
+ if (ret) {
+ pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
+ iommu->name);
+ goto free_iopfq;
+ }
+ dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
+ dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
+ dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
+
+ init_completion(&iommu->prq_complete);
+
+ return 0;
+
+free_iopfq:
+ iopf_queue_free(iommu->iopf_queue);
+ iommu->iopf_queue = NULL;
+free_hwirq:
+ dmar_free_hwirq(irq);
+ iommu->pr_irq = 0;
+free_prq:
+ iommu_free_pages(iommu->prq, PRQ_ORDER);
+ iommu->prq = NULL;
+
+ return ret;
+}
+
+int intel_iommu_finish_prq(struct intel_iommu *iommu)
+{
+ dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
+ dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
+ dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
+
+ if (iommu->pr_irq) {
+ free_irq(iommu->pr_irq, iommu);
+ dmar_free_hwirq(iommu->pr_irq);
+ iommu->pr_irq = 0;
+ }
+
+ if (iommu->iopf_queue) {
+ iopf_queue_free(iommu->iopf_queue);
+ iommu->iopf_queue = NULL;
+ }
+
+ iommu_free_pages(iommu->prq, PRQ_ORDER);
+ iommu->prq = NULL;
+
+ return 0;
+}
+
+void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt,
+ struct iommu_page_response *msg)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ u8 bus = info->bus, devfn = info->devfn;
+ struct iommu_fault_page_request *prm;
+ struct qi_desc desc;
+ bool pasid_present;
+ bool last_page;
+ u16 sid;
+
+ prm = &evt->fault.prm;
+ sid = PCI_DEVID(bus, devfn);
+ pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+
+ desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
+ QI_PGRP_PASID_P(pasid_present) |
+ QI_PGRP_RESP_CODE(msg->code) |
+ QI_PGRP_RESP_TYPE;
+ desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 078d1e32a24e..f5569347591f 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -25,92 +25,6 @@
#include "../iommu-pages.h"
#include "trace.h"
-static irqreturn_t prq_event_thread(int irq, void *d);
-
-int intel_svm_enable_prq(struct intel_iommu *iommu)
-{
- struct iopf_queue *iopfq;
- int irq, ret;
-
- iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER);
- if (!iommu->prq) {
- pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
- iommu->name);
- return -ENOMEM;
- }
-
- irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
- if (irq <= 0) {
- pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
- iommu->name);
- ret = -EINVAL;
- goto free_prq;
- }
- iommu->pr_irq = irq;
-
- snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
- "dmar%d-iopfq", iommu->seq_id);
- iopfq = iopf_queue_alloc(iommu->iopfq_name);
- if (!iopfq) {
- pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
- ret = -ENOMEM;
- goto free_hwirq;
- }
- iommu->iopf_queue = iopfq;
-
- snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
-
- ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
- iommu->prq_name, iommu);
- if (ret) {
- pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
- iommu->name);
- goto free_iopfq;
- }
- dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
- dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
- dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
-
- init_completion(&iommu->prq_complete);
-
- return 0;
-
-free_iopfq:
- iopf_queue_free(iommu->iopf_queue);
- iommu->iopf_queue = NULL;
-free_hwirq:
- dmar_free_hwirq(irq);
- iommu->pr_irq = 0;
-free_prq:
- iommu_free_pages(iommu->prq, PRQ_ORDER);
- iommu->prq = NULL;
-
- return ret;
-}
-
-int intel_svm_finish_prq(struct intel_iommu *iommu)
-{
- dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
- dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
- dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
-
- if (iommu->pr_irq) {
- free_irq(iommu->pr_irq, iommu);
- dmar_free_hwirq(iommu->pr_irq);
- iommu->pr_irq = 0;
- }
-
- if (iommu->iopf_queue) {
- iopf_queue_free(iommu->iopf_queue);
- iommu->iopf_queue = NULL;
- }
-
- iommu_free_pages(iommu->prq, PRQ_ORDER);
- iommu->prq = NULL;
-
- return 0;
-}
-
void intel_svm_check(struct intel_iommu *iommu)
{
if (!pasid_supported(iommu))
@@ -197,360 +111,37 @@ static const struct mmu_notifier_ops intel_mmuops = {
};
static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu = info->iommu;
struct mm_struct *mm = domain->mm;
struct dev_pasid_info *dev_pasid;
unsigned long sflags;
- unsigned long flags;
int ret = 0;
- dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
- if (!dev_pasid)
- return -ENOMEM;
-
- dev_pasid->dev = dev;
- dev_pasid->pasid = pasid;
-
- ret = cache_tag_assign_domain(to_dmar_domain(domain), dev, pasid);
- if (ret)
- goto free_dev_pasid;
+ dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
+ if (IS_ERR(dev_pasid))
+ return PTR_ERR(dev_pasid);
/* Setup the pasid table: */
sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
- ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid,
- FLPT_DEFAULT_DID, sflags);
+ ret = __domain_setup_first_level(iommu, dev, pasid,
+ FLPT_DEFAULT_DID, mm->pgd,
+ sflags, old);
if (ret)
- goto unassign_tag;
+ goto out_remove_dev_pasid;
- spin_lock_irqsave(&dmar_domain->lock, flags);
- list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
- spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ domain_remove_dev_pasid(old, dev, pasid);
return 0;
-unassign_tag:
- cache_tag_unassign_domain(to_dmar_domain(domain), dev, pasid);
-free_dev_pasid:
- kfree(dev_pasid);
-
+out_remove_dev_pasid:
+ domain_remove_dev_pasid(domain, dev, pasid);
return ret;
}
-/* Page request queue descriptor */
-struct page_req_dsc {
- union {
- struct {
- u64 type:8;
- u64 pasid_present:1;
- u64 rsvd:7;
- u64 rid:16;
- u64 pasid:20;
- u64 exe_req:1;
- u64 pm_req:1;
- u64 rsvd2:10;
- };
- u64 qw_0;
- };
- union {
- struct {
- u64 rd_req:1;
- u64 wr_req:1;
- u64 lpig:1;
- u64 prg_index:9;
- u64 addr:52;
- };
- u64 qw_1;
- };
- u64 qw_2;
- u64 qw_3;
-};
-
-static bool is_canonical_address(u64 addr)
-{
- int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
- long saddr = (long) addr;
-
- return (((saddr << shift) >> shift) == saddr);
-}
-
-/**
- * intel_drain_pasid_prq - Drain page requests and responses for a pasid
- * @dev: target device
- * @pasid: pasid for draining
- *
- * Drain all pending page requests and responses related to @pasid in both
- * software and hardware. This is supposed to be called after the device
- * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
- * and DevTLB have been invalidated.
- *
- * It waits until all pending page requests for @pasid in the page fault
- * queue are completed by the prq handling thread. Then follow the steps
- * described in VT-d spec CH7.10 to drain all page requests and page
- * responses pending in the hardware.
- */
-void intel_drain_pasid_prq(struct device *dev, u32 pasid)
-{
- struct device_domain_info *info;
- struct dmar_domain *domain;
- struct intel_iommu *iommu;
- struct qi_desc desc[3];
- struct pci_dev *pdev;
- int head, tail;
- u16 sid, did;
- int qdep;
-
- info = dev_iommu_priv_get(dev);
- if (WARN_ON(!info || !dev_is_pci(dev)))
- return;
-
- if (!info->pri_enabled)
- return;
-
- iommu = info->iommu;
- domain = info->domain;
- pdev = to_pci_dev(dev);
- sid = PCI_DEVID(info->bus, info->devfn);
- did = domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID;
- qdep = pci_ats_queue_depth(pdev);
-
- /*
- * Check and wait until all pending page requests in the queue are
- * handled by the prq handling thread.
- */
-prq_retry:
- reinit_completion(&iommu->prq_complete);
- tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
- head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
- while (head != tail) {
- struct page_req_dsc *req;
-
- req = &iommu->prq[head / sizeof(*req)];
- if (!req->pasid_present || req->pasid != pasid) {
- head = (head + sizeof(*req)) & PRQ_RING_MASK;
- continue;
- }
-
- wait_for_completion(&iommu->prq_complete);
- goto prq_retry;
- }
-
- iopf_queue_flush_dev(dev);
-
- /*
- * Perform steps described in VT-d spec CH7.10 to drain page
- * requests and responses in hardware.
- */
- memset(desc, 0, sizeof(desc));
- desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
- QI_IWD_FENCE |
- QI_IWD_TYPE;
- desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
- QI_EIOTLB_DID(did) |
- QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
- QI_EIOTLB_TYPE;
- desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
- QI_DEV_EIOTLB_SID(sid) |
- QI_DEV_EIOTLB_QDEP(qdep) |
- QI_DEIOTLB_TYPE |
- QI_DEV_IOTLB_PFSID(info->pfsid);
-qi_retry:
- reinit_completion(&iommu->prq_complete);
- qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
- if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
- wait_for_completion(&iommu->prq_complete);
- goto qi_retry;
- }
-}
-
-static int prq_to_iommu_prot(struct page_req_dsc *req)
-{
- int prot = 0;
-
- if (req->rd_req)
- prot |= IOMMU_FAULT_PERM_READ;
- if (req->wr_req)
- prot |= IOMMU_FAULT_PERM_WRITE;
- if (req->exe_req)
- prot |= IOMMU_FAULT_PERM_EXEC;
- if (req->pm_req)
- prot |= IOMMU_FAULT_PERM_PRIV;
-
- return prot;
-}
-
-static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
- struct page_req_dsc *desc)
-{
- struct iopf_fault event = { };
-
- /* Fill in event data for device specific processing */
- event.fault.type = IOMMU_FAULT_PAGE_REQ;
- event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
- event.fault.prm.pasid = desc->pasid;
- event.fault.prm.grpid = desc->prg_index;
- event.fault.prm.perm = prq_to_iommu_prot(desc);
-
- if (desc->lpig)
- event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
- if (desc->pasid_present) {
- event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
- event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
- }
-
- iommu_report_device_fault(dev, &event);
-}
-
-static void handle_bad_prq_event(struct intel_iommu *iommu,
- struct page_req_dsc *req, int result)
-{
- struct qi_desc desc = { };
-
- pr_err("%s: Invalid page request: %08llx %08llx\n",
- iommu->name, ((unsigned long long *)req)[0],
- ((unsigned long long *)req)[1]);
-
- if (!req->lpig)
- return;
-
- desc.qw0 = QI_PGRP_PASID(req->pasid) |
- QI_PGRP_DID(req->rid) |
- QI_PGRP_PASID_P(req->pasid_present) |
- QI_PGRP_RESP_CODE(result) |
- QI_PGRP_RESP_TYPE;
- desc.qw1 = QI_PGRP_IDX(req->prg_index) |
- QI_PGRP_LPIG(req->lpig);
-
- qi_submit_sync(iommu, &desc, 1, 0);
-}
-
-static irqreturn_t prq_event_thread(int irq, void *d)
-{
- struct intel_iommu *iommu = d;
- struct page_req_dsc *req;
- int head, tail, handled;
- struct device *dev;
- u64 address;
-
- /*
- * Clear PPR bit before reading head/tail registers, to ensure that
- * we get a new interrupt if needed.
- */
- writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
-
- tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
- head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
- handled = (head != tail);
- while (head != tail) {
- req = &iommu->prq[head / sizeof(*req)];
- address = (u64)req->addr << VTD_PAGE_SHIFT;
-
- if (unlikely(!req->pasid_present)) {
- pr_err("IOMMU: %s: Page request without PASID\n",
- iommu->name);
-bad_req:
- handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
- goto prq_advance;
- }
-
- if (unlikely(!is_canonical_address(address))) {
- pr_err("IOMMU: %s: Address is not canonical\n",
- iommu->name);
- goto bad_req;
- }
-
- if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
- pr_err("IOMMU: %s: Page request in Privilege Mode\n",
- iommu->name);
- goto bad_req;
- }
-
- if (unlikely(req->exe_req && req->rd_req)) {
- pr_err("IOMMU: %s: Execution request not supported\n",
- iommu->name);
- goto bad_req;
- }
-
- /* Drop Stop Marker message. No need for a response. */
- if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
- goto prq_advance;
-
- /*
- * If prq is to be handled outside iommu driver via receiver of
- * the fault notifiers, we skip the page response here.
- */
- mutex_lock(&iommu->iopf_lock);
- dev = device_rbtree_find(iommu, req->rid);
- if (!dev) {
- mutex_unlock(&iommu->iopf_lock);
- goto bad_req;
- }
-
- intel_svm_prq_report(iommu, dev, req);
- trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
- req->qw_2, req->qw_3,
- iommu->prq_seq_number++);
- mutex_unlock(&iommu->iopf_lock);
-prq_advance:
- head = (head + sizeof(*req)) & PRQ_RING_MASK;
- }
-
- dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
-
- /*
- * Clear the page request overflow bit and wake up all threads that
- * are waiting for the completion of this handling.
- */
- if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
- pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
- iommu->name);
- head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
- tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
- if (head == tail) {
- iopf_queue_discard_partial(iommu->iopf_queue);
- writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
- pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
- iommu->name);
- }
- }
-
- if (!completion_done(&iommu->prq_complete))
- complete(&iommu->prq_complete);
-
- return IRQ_RETVAL(handled);
-}
-
-void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
- struct iommu_page_response *msg)
-{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu = info->iommu;
- u8 bus = info->bus, devfn = info->devfn;
- struct iommu_fault_page_request *prm;
- struct qi_desc desc;
- bool pasid_present;
- bool last_page;
- u16 sid;
-
- prm = &evt->fault.prm;
- sid = PCI_DEVID(bus, devfn);
- pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
- last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
-
- desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
- QI_PGRP_PASID_P(pasid_present) |
- QI_PGRP_RESP_CODE(msg->code) |
- QI_PGRP_RESP_TYPE;
- desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
- desc.qw2 = 0;
- desc.qw3 = 0;
-
- qi_submit_sync(iommu, &desc, 1, 0);
-}
-
static void intel_svm_domain_free(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 87a0721c7b6d..7618e9c65d3f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2710,16 +2710,6 @@ static int __init iommu_init(void)
}
core_initcall(iommu_init);
-int iommu_enable_nesting(struct iommu_domain *domain)
-{
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
- if (!domain->ops->enable_nesting)
- return -EINVAL;
- return domain->ops->enable_nesting(domain);
-}
-EXPORT_SYMBOL_GPL(iommu_enable_nesting);
-
int iommu_set_pgtable_quirks(struct iommu_domain *domain,
unsigned long quirk)
{
@@ -3329,7 +3319,8 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
int ret;
for_each_group_device(group, device) {
- ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
+ ret = domain->ops->set_dev_pasid(domain, device->dev,
+ pasid, NULL);
if (ret)
goto err_revert;
}
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index d06bf6e6c19f..8f020bc0815f 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -107,7 +107,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
const struct iommu_user_data *user_data)
{
const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
- IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
+ IOMMU_HWPT_FAULT_ID_VALID;
const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
struct iommufd_hwpt_paging *hwpt_paging;
struct iommufd_hw_pagetable *hwpt;
diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c
index a3ad5f0b6c59..514aacd64009 100644
--- a/drivers/iommu/iommufd/vfio_compat.c
+++ b/drivers/iommu/iommufd/vfio_compat.c
@@ -291,12 +291,7 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx,
case VFIO_DMA_CC_IOMMU:
return iommufd_vfio_cc_iommu(ictx);
- /*
- * This is obsolete, and to be removed from VFIO. It was an incomplete
- * idea that got merged.
- * https://lore.kernel.org/kvm/0-v1-0093c9b0e345+19-vfio_no_nesting_jgg@nvidia.com/
- */
- case VFIO_TYPE1_NESTING_IOMMU:
+ case __VFIO_RESERVED_TYPE1_NESTING_IOMMU:
return 0;
/*
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index bf391b40e576..50ebc9593c9d 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -72,7 +72,6 @@ struct vfio_iommu {
uint64_t pgsize_bitmap;
uint64_t num_non_pinned_groups;
bool v2;
- bool nesting;
bool dirty_page_tracking;
struct list_head emulated_iommu_groups;
};
@@ -2195,12 +2194,6 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
goto out_free_domain;
}
- if (iommu->nesting) {
- ret = iommu_enable_nesting(domain->domain);
- if (ret)
- goto out_domain;
- }
-
ret = iommu_attach_group(domain->domain, group->iommu_group);
if (ret)
goto out_domain;
@@ -2541,9 +2534,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
switch (arg) {
case VFIO_TYPE1_IOMMU:
break;
- case VFIO_TYPE1_NESTING_IOMMU:
- iommu->nesting = true;
- fallthrough;
+ case __VFIO_RESERVED_TYPE1_NESTING_IOMMU:
case VFIO_TYPE1v2_IOMMU:
iommu->v2 = true;
break;
@@ -2638,7 +2629,6 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu,
switch (arg) {
case VFIO_TYPE1_IOMMU:
case VFIO_TYPE1v2_IOMMU:
- case VFIO_TYPE1_NESTING_IOMMU:
case VFIO_UNMAP_ALL:
return 1;
case VFIO_UPDATE_VADDR:
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index d3858eebc255..2e917a8f8bca 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -453,7 +453,7 @@ struct acpi_table_ccel {
* IORT - IO Remapping Table
*
* Conforms to "IO Remapping Table System Software on ARM Platforms",
- * Document number: ARM DEN 0049E.e, Sep 2022
+ * Document number: ARM DEN 0049E.f, Apr 2024
*
******************************************************************************/
@@ -524,6 +524,7 @@ struct acpi_iort_memory_access {
#define ACPI_IORT_MF_COHERENCY (1)
#define ACPI_IORT_MF_ATTRIBUTES (1<<1)
+#define ACPI_IORT_MF_CANWBS (1<<2)
/*
* IORT node specific subtables
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 499bb2c63483..692b2b445761 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -292,7 +292,6 @@ static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src)
struct irq_data;
extern void dmar_msi_unmask(struct irq_data *data);
extern void dmar_msi_mask(struct irq_data *data);
-extern void dmar_msi_read(int irq, struct msi_msg *msg);
extern void dmar_msi_write(int irq, struct msi_msg *msg);
extern int dmar_set_interrupt(struct intel_iommu *iommu);
extern irqreturn_t dmar_fault(int irq, void *dev_id);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 9a3215b5c1e5..0c3bfb66dc7c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -614,7 +614,8 @@ struct iommu_ops {
* * EBUSY - device is attached to a domain and cannot be changed
* * ENODEV - device specific errors, not able to be attached
* * <others> - treated as ENODEV by the caller. Use is discouraged
- * @set_dev_pasid: set an iommu domain to a pasid of device
+ * @set_dev_pasid: set or replace an iommu domain to a pasid of device. The pasid of
+ * the device should be left in the old config in error case.
* @map_pages: map a physically contiguous set of pages of the same size to
* an iommu domain.
* @unmap_pages: unmap a number of pages of the same size from an iommu domain
@@ -633,14 +634,13 @@ struct iommu_ops {
* @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE,
* including no-snoop TLPs on PCIe or other platform
* specific mechanisms.
- * @enable_nesting: Enable nesting
* @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*)
* @free: Release the domain after use.
*/
struct iommu_domain_ops {
int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
- ioasid_t pasid);
+ ioasid_t pasid, struct iommu_domain *old);
int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
@@ -661,7 +661,6 @@ struct iommu_domain_ops {
dma_addr_t iova);
bool (*enforce_cache_coherency)(struct iommu_domain *domain);
- int (*enable_nesting)(struct iommu_domain *domain);
int (*set_pgtable_quirks)(struct iommu_domain *domain,
unsigned long quirks);
@@ -844,7 +843,6 @@ extern void iommu_group_put(struct iommu_group *group);
extern int iommu_group_id(struct iommu_group *group);
extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *);
-int iommu_enable_nesting(struct iommu_domain *domain);
int iommu_set_pgtable_quirks(struct iommu_domain *domain,
unsigned long quirks);
@@ -994,6 +992,8 @@ struct iommu_fwspec {
/* ATS is supported */
#define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0)
+/* CANWBS is supported */
+#define IOMMU_FWSPEC_PCI_RC_CANWBS (1 << 1)
/*
* An iommu attach handle represents a relationship between an iommu domain
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 0c0ed28ee113..cc1b94f43538 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -493,14 +493,49 @@ struct iommu_hw_info_vtd {
};
/**
+ * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
+ * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
+ *
+ * @flags: Must be set to 0
+ * @__reserved: Must be 0
+ * @idr: Implemented features for ARM SMMU Non-secure programming interface
+ * @iidr: Information about the implementation and implementer of ARM SMMU,
+ * and architecture version supported
+ * @aidr: ARM SMMU architecture version
+ *
+ * For the details of @idr, @iidr and @aidr, please refer to the chapters
+ * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
+ *
+ * User space should read the underlying ARM SMMUv3 hardware information for
+ * the list of supported features.
+ *
+ * Note that these values reflect the raw HW capability, without any insight if
+ * any required kernel driver support is present. Bits may be set indicating the
+ * HW has functionality that is lacking kernel software support, such as BTM. If
+ * a VMM is using this information to construct emulated copies of these
+ * registers it should only forward bits that it knows it can support.
+ *
+ * In future, presence of required kernel support will be indicated in flags.
+ */
+struct iommu_hw_info_arm_smmuv3 {
+ __u32 flags;
+ __u32 __reserved;
+ __u32 idr[6];
+ __u32 iidr;
+ __u32 aidr;
+};
+
+/**
* enum iommu_hw_info_type - IOMMU Hardware Info Types
* @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware
* info
* @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
+ * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
*/
enum iommu_hw_info_type {
IOMMU_HW_INFO_TYPE_NONE = 0,
IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
+ IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
};
/**
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 2b68e6cdf190..c8dbf8219c4f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -35,7 +35,7 @@
#define VFIO_EEH 5
/* Two-stage IOMMU */
-#define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */
+#define __VFIO_RESERVED_TYPE1_NESTING_IOMMU 6 /* Implies v2 */
#define VFIO_SPAPR_TCE_v2_IOMMU 7