summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/arm64/iort.c360
-rw-r--r--drivers/char/agp/intel-gtt.c17
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c1
-rw-r--r--drivers/iommu/Kconfig36
-rw-r--r--drivers/iommu/amd/amd_iommu.h18
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h186
-rw-r--r--drivers/iommu/amd/init.c942
-rw-r--r--drivers/iommu/amd/io_pgtable.c6
-rw-r--r--drivers/iommu/amd/iommu.c585
-rw-r--r--drivers/iommu/amd/iommu_v2.c67
-rw-r--r--drivers/iommu/amd/quirks.c4
-rw-r--r--drivers/iommu/apple-dart.c4
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c144
-rw-r--r--drivers/iommu/arm/arm-smmu/Makefile1
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c142
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c34
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h28
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c73
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.h1
-rw-r--r--drivers/iommu/arm/arm-smmu/qcom_iommu.c18
-rw-r--r--drivers/iommu/dma-iommu.c21
-rw-r--r--drivers/iommu/exynos-iommu.c182
-rw-r--r--drivers/iommu/fsl_pamu_domain.c5
-rw-r--r--drivers/iommu/intel/cap_audit.c2
-rw-r--r--drivers/iommu/intel/debugfs.c51
-rw-r--r--drivers/iommu/intel/dmar.c39
-rw-r--r--drivers/iommu/intel/iommu.c447
-rw-r--r--drivers/iommu/intel/iommu.h839
-rw-r--r--drivers/iommu/intel/irq_remapping.c2
-rw-r--r--drivers/iommu/intel/pasid.c107
-rw-r--r--drivers/iommu/intel/pasid.h1
-rw-r--r--drivers/iommu/intel/perf.c2
-rw-r--r--drivers/iommu/intel/svm.c11
-rw-r--r--drivers/iommu/intel/trace.c2
-rw-r--r--drivers/iommu/intel/trace.h99
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c75
-rw-r--r--drivers/iommu/iommu.c55
-rw-r--r--drivers/iommu/iova.c7
-rw-r--r--drivers/iommu/msm_iommu.c7
-rw-r--r--drivers/iommu/mtk_iommu.c71
-rw-r--r--drivers/iommu/mtk_iommu_v1.c5
-rw-r--r--drivers/iommu/sprd-iommu.c11
-rw-r--r--drivers/iommu/sun50i-iommu.c3
-rw-r--r--drivers/iommu/tegra-gart.c5
-rw-r--r--drivers/iommu/tegra-smmu.c3
-rw-r--r--drivers/iommu/virtio-iommu.c31
47 files changed, 3340 insertions, 1411 deletions
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index f2f8f05662de..ca2aed86b540 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -788,6 +788,294 @@ void acpi_configure_pmsi_domain(struct device *dev)
}
#ifdef CONFIG_IOMMU_API
+static void iort_rmr_free(struct device *dev,
+ struct iommu_resv_region *region)
+{
+ struct iommu_iort_rmr_data *rmr_data;
+
+ rmr_data = container_of(region, struct iommu_iort_rmr_data, rr);
+ kfree(rmr_data->sids);
+ kfree(rmr_data);
+}
+
+static struct iommu_iort_rmr_data *iort_rmr_alloc(
+ struct acpi_iort_rmr_desc *rmr_desc,
+ int prot, enum iommu_resv_type type,
+ u32 *sids, u32 num_sids)
+{
+ struct iommu_iort_rmr_data *rmr_data;
+ struct iommu_resv_region *region;
+ u32 *sids_copy;
+ u64 addr = rmr_desc->base_address, size = rmr_desc->length;
+
+ rmr_data = kmalloc(sizeof(*rmr_data), GFP_KERNEL);
+ if (!rmr_data)
+ return NULL;
+
+ /* Create a copy of SIDs array to associate with this rmr_data */
+ sids_copy = kmemdup(sids, num_sids * sizeof(*sids), GFP_KERNEL);
+ if (!sids_copy) {
+ kfree(rmr_data);
+ return NULL;
+ }
+ rmr_data->sids = sids_copy;
+ rmr_data->num_sids = num_sids;
+
+ if (!IS_ALIGNED(addr, SZ_64K) || !IS_ALIGNED(size, SZ_64K)) {
+ /* PAGE align base addr and size */
+ addr &= PAGE_MASK;
+ size = PAGE_ALIGN(size + offset_in_page(rmr_desc->base_address));
+
+ pr_err(FW_BUG "RMR descriptor[0x%llx - 0x%llx] not aligned to 64K, continue with [0x%llx - 0x%llx]\n",
+ rmr_desc->base_address,
+ rmr_desc->base_address + rmr_desc->length - 1,
+ addr, addr + size - 1);
+ }
+
+ region = &rmr_data->rr;
+ INIT_LIST_HEAD(&region->list);
+ region->start = addr;
+ region->length = size;
+ region->prot = prot;
+ region->type = type;
+ region->free = iort_rmr_free;
+
+ return rmr_data;
+}
+
+static void iort_rmr_desc_check_overlap(struct acpi_iort_rmr_desc *desc,
+ u32 count)
+{
+ int i, j;
+
+ for (i = 0; i < count; i++) {
+ u64 end, start = desc[i].base_address, length = desc[i].length;
+
+ if (!length) {
+ pr_err(FW_BUG "RMR descriptor[0x%llx] with zero length, continue anyway\n",
+ start);
+ continue;
+ }
+
+ end = start + length - 1;
+
+ /* Check for address overlap */
+ for (j = i + 1; j < count; j++) {
+ u64 e_start = desc[j].base_address;
+ u64 e_end = e_start + desc[j].length - 1;
+
+ if (start <= e_end && end >= e_start)
+ pr_err(FW_BUG "RMR descriptor[0x%llx - 0x%llx] overlaps, continue anyway\n",
+ start, end);
+ }
+ }
+}
+
+/*
+ * Please note, we will keep the already allocated RMR reserve
+ * regions in case of a memory allocation failure.
+ */
+static void iort_get_rmrs(struct acpi_iort_node *node,
+ struct acpi_iort_node *smmu,
+ u32 *sids, u32 num_sids,
+ struct list_head *head)
+{
+ struct acpi_iort_rmr *rmr = (struct acpi_iort_rmr *)node->node_data;
+ struct acpi_iort_rmr_desc *rmr_desc;
+ int i;
+
+ rmr_desc = ACPI_ADD_PTR(struct acpi_iort_rmr_desc, node,
+ rmr->rmr_offset);
+
+ iort_rmr_desc_check_overlap(rmr_desc, rmr->rmr_count);
+
+ for (i = 0; i < rmr->rmr_count; i++, rmr_desc++) {
+ struct iommu_iort_rmr_data *rmr_data;
+ enum iommu_resv_type type;
+ int prot = IOMMU_READ | IOMMU_WRITE;
+
+ if (rmr->flags & ACPI_IORT_RMR_REMAP_PERMITTED)
+ type = IOMMU_RESV_DIRECT_RELAXABLE;
+ else
+ type = IOMMU_RESV_DIRECT;
+
+ if (rmr->flags & ACPI_IORT_RMR_ACCESS_PRIVILEGE)
+ prot |= IOMMU_PRIV;
+
+ /* Attributes 0x00 - 0x03 represents device memory */
+ if (ACPI_IORT_RMR_ACCESS_ATTRIBUTES(rmr->flags) <=
+ ACPI_IORT_RMR_ATTR_DEVICE_GRE)
+ prot |= IOMMU_MMIO;
+ else if (ACPI_IORT_RMR_ACCESS_ATTRIBUTES(rmr->flags) ==
+ ACPI_IORT_RMR_ATTR_NORMAL_IWB_OWB)
+ prot |= IOMMU_CACHE;
+
+ rmr_data = iort_rmr_alloc(rmr_desc, prot, type,
+ sids, num_sids);
+ if (!rmr_data)
+ return;
+
+ list_add_tail(&rmr_data->rr.list, head);
+ }
+}
+
+static u32 *iort_rmr_alloc_sids(u32 *sids, u32 count, u32 id_start,
+ u32 new_count)
+{
+ u32 *new_sids;
+ u32 total_count = count + new_count;
+ int i;
+
+ new_sids = krealloc_array(sids, count + new_count,
+ sizeof(*new_sids), GFP_KERNEL);
+ if (!new_sids)
+ return NULL;
+
+ for (i = count; i < total_count; i++)
+ new_sids[i] = id_start++;
+
+ return new_sids;
+}
+
+static bool iort_rmr_has_dev(struct device *dev, u32 id_start,
+ u32 id_count)
+{
+ int i;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+ /*
+ * Make sure the kernel has preserved the boot firmware PCIe
+ * configuration. This is required to ensure that the RMR PCIe
+ * StreamIDs are still valid (Refer: ARM DEN 0049E.d Section 3.1.1.5).
+ */
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pci_host_bridge *host = pci_find_host_bridge(pdev->bus);
+
+ if (!host->preserve_config)
+ return false;
+ }
+
+ for (i = 0; i < fwspec->num_ids; i++) {
+ if (fwspec->ids[i] >= id_start &&
+ fwspec->ids[i] <= id_start + id_count)
+ return true;
+ }
+
+ return false;
+}
+
+static void iort_node_get_rmr_info(struct acpi_iort_node *node,
+ struct acpi_iort_node *iommu,
+ struct device *dev, struct list_head *head)
+{
+ struct acpi_iort_node *smmu = NULL;
+ struct acpi_iort_rmr *rmr;
+ struct acpi_iort_id_mapping *map;
+ u32 *sids = NULL;
+ u32 num_sids = 0;
+ int i;
+
+ if (!node->mapping_offset || !node->mapping_count) {
+ pr_err(FW_BUG "Invalid ID mapping, skipping RMR node %p\n",
+ node);
+ return;
+ }
+
+ rmr = (struct acpi_iort_rmr *)node->node_data;
+ if (!rmr->rmr_offset || !rmr->rmr_count)
+ return;
+
+ map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
+ node->mapping_offset);
+
+ /*
+ * Go through the ID mappings and see if we have a match for SMMU
+ * and dev(if !NULL). If found, get the sids for the Node.
+ * Please note, id_count is equal to the number of IDs in the
+ * range minus one.
+ */
+ for (i = 0; i < node->mapping_count; i++, map++) {
+ struct acpi_iort_node *parent;
+
+ if (!map->id_count)
+ continue;
+
+ parent = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
+ map->output_reference);
+ if (parent != iommu)
+ continue;
+
+ /* If dev is valid, check RMR node corresponds to the dev SID */
+ if (dev && !iort_rmr_has_dev(dev, map->output_base,
+ map->id_count))
+ continue;
+
+ /* Retrieve SIDs associated with the Node. */
+ sids = iort_rmr_alloc_sids(sids, num_sids, map->output_base,
+ map->id_count + 1);
+ if (!sids)
+ return;
+
+ num_sids += map->id_count + 1;
+ }
+
+ if (!sids)
+ return;
+
+ iort_get_rmrs(node, smmu, sids, num_sids, head);
+ kfree(sids);
+}
+
+static void iort_find_rmrs(struct acpi_iort_node *iommu, struct device *dev,
+ struct list_head *head)
+{
+ struct acpi_table_iort *iort;
+ struct acpi_iort_node *iort_node, *iort_end;
+ int i;
+
+ /* Only supports ARM DEN 0049E.d onwards */
+ if (iort_table->revision < 5)
+ return;
+
+ iort = (struct acpi_table_iort *)iort_table;
+
+ iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort,
+ iort->node_offset);
+ iort_end = ACPI_ADD_PTR(struct acpi_iort_node, iort,
+ iort_table->length);
+
+ for (i = 0; i < iort->node_count; i++) {
+ if (WARN_TAINT(iort_node >= iort_end, TAINT_FIRMWARE_WORKAROUND,
+ "IORT node pointer overflows, bad table!\n"))
+ return;
+
+ if (iort_node->type == ACPI_IORT_NODE_RMR)
+ iort_node_get_rmr_info(iort_node, iommu, dev, head);
+
+ iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort_node,
+ iort_node->length);
+ }
+}
+
+/*
+ * Populate the RMR list associated with a given IOMMU and dev(if provided).
+ * If dev is NULL, the function populates all the RMRs associated with the
+ * given IOMMU.
+ */
+static void iort_iommu_rmr_get_resv_regions(struct fwnode_handle *iommu_fwnode,
+ struct device *dev,
+ struct list_head *head)
+{
+ struct acpi_iort_node *iommu;
+
+ iommu = iort_get_iort_node(iommu_fwnode);
+ if (!iommu)
+ return;
+
+ iort_find_rmrs(iommu, dev, head);
+}
+
static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
{
struct acpi_iort_node *iommu;
@@ -806,27 +1094,22 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
return NULL;
}
-/**
- * iort_iommu_msi_get_resv_regions - Reserved region driver helper
- * @dev: Device from iommu_get_resv_regions()
- * @head: Reserved region list from iommu_get_resv_regions()
- *
- * Returns: Number of msi reserved regions on success (0 if platform
- * doesn't require the reservation or no associated msi regions),
- * appropriate error value otherwise. The ITS interrupt translation
- * spaces (ITS_base + SZ_64K, SZ_64K) associated with the device
- * are the msi reserved regions.
+/*
+ * Retrieve platform specific HW MSI reserve regions.
+ * The ITS interrupt translation spaces (ITS_base + SZ_64K, SZ_64K)
+ * associated with the device are the HW MSI reserved regions.
*/
-int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
+static void iort_iommu_msi_get_resv_regions(struct device *dev,
+ struct list_head *head)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct acpi_iort_its_group *its;
struct acpi_iort_node *iommu_node, *its_node = NULL;
- int i, resv = 0;
+ int i;
iommu_node = iort_get_msi_resv_iommu(dev);
if (!iommu_node)
- return 0;
+ return;
/*
* Current logic to reserve ITS regions relies on HW topologies
@@ -846,7 +1129,7 @@ int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
}
if (!its_node)
- return 0;
+ return;
/* Move to ITS specific data */
its = (struct acpi_iort_its_group *)its_node->node_data;
@@ -860,15 +1143,52 @@ int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
region = iommu_alloc_resv_region(base + SZ_64K, SZ_64K,
prot, IOMMU_RESV_MSI);
- if (region) {
+ if (region)
list_add_tail(&region->list, head);
- resv++;
- }
}
}
+}
+
+/**
+ * iort_iommu_get_resv_regions - Generic helper to retrieve reserved regions.
+ * @dev: Device from iommu_get_resv_regions()
+ * @head: Reserved region list from iommu_get_resv_regions()
+ */
+void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head)
+{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+ iort_iommu_msi_get_resv_regions(dev, head);
+ iort_iommu_rmr_get_resv_regions(fwspec->iommu_fwnode, dev, head);
+}
+
+/**
+ * iort_get_rmr_sids - Retrieve IORT RMR node reserved regions with
+ * associated StreamIDs information.
+ * @iommu_fwnode: fwnode associated with IOMMU
+ * @head: Resereved region list
+ */
+void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode,
+ struct list_head *head)
+{
+ iort_iommu_rmr_get_resv_regions(iommu_fwnode, NULL, head);
+}
+EXPORT_SYMBOL_GPL(iort_get_rmr_sids);
+
+/**
+ * iort_put_rmr_sids - Free memory allocated for RMR reserved regions.
+ * @iommu_fwnode: fwnode associated with IOMMU
+ * @head: Resereved region list
+ */
+void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode,
+ struct list_head *head)
+{
+ struct iommu_resv_region *entry, *next;
- return (resv == its->its_count) ? resv : -ENODEV;
+ list_for_each_entry_safe(entry, next, head, list)
+ entry->free(NULL, entry);
}
+EXPORT_SYMBOL_GPL(iort_put_rmr_sids);
static inline bool iort_iommu_driver_enabled(u8 type)
{
@@ -1034,8 +1354,8 @@ int iort_iommu_configure_id(struct device *dev, const u32 *id_in)
}
#else
-int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
-{ return 0; }
+void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head)
+{ }
int iort_iommu_configure_id(struct device *dev, const u32 *input_id)
{ return -ENODEV; }
#endif
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index fe7e2105e766..bf6716ff863b 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -20,7 +20,7 @@
#include <linux/kernel.h>
#include <linux/pagemap.h>
#include <linux/agp_backend.h>
-#include <linux/intel-iommu.h>
+#include <linux/iommu.h>
#include <linux/delay.h>
#include <asm/smp.h>
#include "agp.h"
@@ -573,18 +573,15 @@ static void intel_gtt_cleanup(void)
*/
static inline int needs_ilk_vtd_wa(void)
{
-#ifdef CONFIG_INTEL_IOMMU
const unsigned short gpu_devid = intel_private.pcidev->device;
- /* Query intel_iommu to see if we need the workaround. Presumably that
- * was loaded first.
+ /*
+ * Query iommu subsystem to see if we need the workaround. Presumably
+ * that was loaded first.
*/
- if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG ||
- gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG) &&
- intel_iommu_gfx_mapped)
- return 1;
-#endif
- return 0;
+ return ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG ||
+ gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG) &&
+ device_iommu_mapped(&intel_private.pcidev->dev));
}
static bool intel_gtt_can_wc(void)
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index a0f84cbe974f..fc5d94862ef3 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -27,7 +27,6 @@
#include <acpi/video.h>
#include <linux/i2c.h>
#include <linux/input.h>
-#include <linux/intel-iommu.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/dma-resv.h>
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b7b2c14fd9e1..cd75b0ca2555 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -6,7 +6,6 @@
#include <linux/dma-resv.h>
#include <linux/highmem.h>
-#include <linux/intel-iommu.h>
#include <linux/sync_file.h>
#include <linux/uaccess.h>
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index c79a0df090c0..50bc6962c084 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -144,6 +144,32 @@ config IOMMU_DMA
select IRQ_MSI_IOMMU
select NEED_SG_DMA_LENGTH
+config IOMMU_DMA_PCI_SAC
+ bool "Enable 64-bit legacy PCI optimisation by default"
+ depends on IOMMU_DMA
+ help
+ Enable by default an IOMMU optimisation for 64-bit legacy PCI devices,
+ wherein the DMA API layer will always first try to allocate a 32-bit
+ DMA address suitable for a single address cycle, before falling back
+ to allocating from the device's full usable address range. If your
+ system has 64-bit legacy PCI devices in 32-bit slots where using dual
+ address cycles reduces DMA throughput significantly, this may be
+ beneficial to overall performance.
+
+ If you have a modern PCI Express based system, this feature mostly just
+ represents extra overhead in the allocation path for no practical
+ benefit, and it should usually be preferable to say "n" here.
+
+ However, beware that this feature has also historically papered over
+ bugs where the IOMMU address width and/or device DMA mask is not set
+ correctly. If device DMA problems and IOMMU faults start occurring
+ after disabling this option, it is almost certainly indicative of a
+ latent driver or firmware/BIOS bug, which would previously have only
+ manifested with several gigabytes worth of concurrent DMA mappings.
+
+ If this option is not set, the feature can still be re-enabled at
+ boot time with the "iommu.forcedac=0" command-line argument.
+
# Shared Virtual Addressing
config IOMMU_SVA
bool
@@ -363,6 +389,16 @@ config ARM_SMMU_QCOM
When running on a Qualcomm platform that has the custom variant
of the ARM SMMU, this needs to be built into the SMMU driver.
+config ARM_SMMU_QCOM_DEBUG
+ bool "ARM SMMU QCOM implementation defined debug support"
+ depends on ARM_SMMU_QCOM
+ help
+ Support for implementation specific debug features in ARM SMMU
+ hardware found in QTI platforms.
+
+ Say Y here to enable debug for issues such as TLB sync timeouts
+ which requires implementation defined register dumps.
+
config ARM_SMMU_V3
tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
depends on ARM64
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 1ab31074f5b3..84e5bb1bf01b 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -13,12 +13,13 @@
extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
-extern void amd_iommu_apply_erratum_63(u16 devid);
+extern void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
extern int amd_iommu_init_devices(void);
extern void amd_iommu_uninit_devices(void);
extern void amd_iommu_init_notifier(void);
extern int amd_iommu_init_api(void);
+extern void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid);
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
void amd_iommu_debugfs_setup(struct amd_iommu *iommu);
@@ -114,10 +115,17 @@ void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
amd_iommu_domain_set_pt_root(domain, 0);
}
+static inline int get_pci_sbdf_id(struct pci_dev *pdev)
+{
+ int seg = pci_domain_nr(pdev->bus);
+ u16 devid = pci_dev_id(pdev);
+
+ return PCI_SEG_DEVID_TO_SBDF(seg, devid);
+}
extern bool translation_pre_enabled(struct amd_iommu *iommu);
extern bool amd_iommu_is_attach_deferred(struct device *dev);
-extern int __init add_special_device(u8 type, u8 id, u16 *devid,
+extern int __init add_special_device(u8 type, u8 id, u32 *devid,
bool cmd_line);
#ifdef CONFIG_DMI
@@ -128,4 +136,10 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { }
extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
u64 *root, int mode);
+extern struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
+
+extern u64 amd_iommu_efr;
+extern u64 amd_iommu_efr2;
+
+extern bool amd_iommu_snp_en;
#endif
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 72d0f5e2f651..5b1019dab328 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -67,6 +67,7 @@
#define MMIO_INTCAPXT_EVT_OFFSET 0x0170
#define MMIO_INTCAPXT_PPR_OFFSET 0x0178
#define MMIO_INTCAPXT_GALOG_OFFSET 0x0180
+#define MMIO_EXT_FEATURES2 0x01A0
#define MMIO_CMD_HEAD_OFFSET 0x2000
#define MMIO_CMD_TAIL_OFFSET 0x2008
#define MMIO_EVT_HEAD_OFFSET 0x2010
@@ -102,6 +103,12 @@
#define FEATURE_GLXVAL_SHIFT 14
#define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT)
+/* Extended Feature 2 Bits */
+#define FEATURE_SNPAVICSUP_SHIFT 5
+#define FEATURE_SNPAVICSUP_MASK (0x07ULL << FEATURE_SNPAVICSUP_SHIFT)
+#define FEATURE_SNPAVICSUP_GAM(x) \
+ ((x & FEATURE_SNPAVICSUP_MASK) >> FEATURE_SNPAVICSUP_SHIFT == 0x1)
+
/* Note:
* The current driver only support 16-bit PASID.
* Currently, hardware only implement upto 16-bit PASID
@@ -143,27 +150,28 @@
#define EVENT_FLAG_I 0x008
/* feature control bits */
-#define CONTROL_IOMMU_EN 0x00ULL
-#define CONTROL_HT_TUN_EN 0x01ULL
-#define CONTROL_EVT_LOG_EN 0x02ULL
-#define CONTROL_EVT_INT_EN 0x03ULL
-#define CONTROL_COMWAIT_EN 0x04ULL
-#define CONTROL_INV_TIMEOUT 0x05ULL
-#define CONTROL_PASSPW_EN 0x08ULL
-#define CONTROL_RESPASSPW_EN 0x09ULL
-#define CONTROL_COHERENT_EN 0x0aULL
-#define CONTROL_ISOC_EN 0x0bULL
-#define CONTROL_CMDBUF_EN 0x0cULL
-#define CONTROL_PPRLOG_EN 0x0dULL
-#define CONTROL_PPRINT_EN 0x0eULL
-#define CONTROL_PPR_EN 0x0fULL
-#define CONTROL_GT_EN 0x10ULL
-#define CONTROL_GA_EN 0x11ULL
-#define CONTROL_GAM_EN 0x19ULL
-#define CONTROL_GALOG_EN 0x1CULL
-#define CONTROL_GAINT_EN 0x1DULL
-#define CONTROL_XT_EN 0x32ULL
-#define CONTROL_INTCAPXT_EN 0x33ULL
+#define CONTROL_IOMMU_EN 0
+#define CONTROL_HT_TUN_EN 1
+#define CONTROL_EVT_LOG_EN 2
+#define CONTROL_EVT_INT_EN 3
+#define CONTROL_COMWAIT_EN 4
+#define CONTROL_INV_TIMEOUT 5
+#define CONTROL_PASSPW_EN 8
+#define CONTROL_RESPASSPW_EN 9
+#define CONTROL_COHERENT_EN 10
+#define CONTROL_ISOC_EN 11
+#define CONTROL_CMDBUF_EN 12
+#define CONTROL_PPRLOG_EN 13
+#define CONTROL_PPRINT_EN 14
+#define CONTROL_PPR_EN 15
+#define CONTROL_GT_EN 16
+#define CONTROL_GA_EN 17
+#define CONTROL_GAM_EN 25
+#define CONTROL_GALOG_EN 28
+#define CONTROL_GAINT_EN 29
+#define CONTROL_XT_EN 50
+#define CONTROL_INTCAPXT_EN 51
+#define CONTROL_SNPAVIC_EN 61
#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT)
#define CTRL_INV_TO_NONE 0
@@ -445,8 +453,6 @@ struct irq_remap_table {
u32 *table;
};
-extern struct irq_remap_table **irq_lookup_table;
-
/* Interrupt remapping feature used? */
extern bool amd_iommu_irq_remap;
@@ -456,6 +462,16 @@ extern bool amdr_ivrs_remap_support;
/* kmem_cache to get tables with 128 byte alignement */
extern struct kmem_cache *amd_iommu_irq_cache;
+#define PCI_SBDF_TO_SEGID(sbdf) (((sbdf) >> 16) & 0xffff)
+#define PCI_SBDF_TO_DEVID(sbdf) ((sbdf) & 0xffff)
+#define PCI_SEG_DEVID_TO_SBDF(seg, devid) ((((u32)(seg) & 0xffff) << 16) | \
+ ((devid) & 0xffff))
+
+/* Make iterating over all pci segment easier */
+#define for_each_pci_segment(pci_seg) \
+ list_for_each_entry((pci_seg), &amd_iommu_pci_seg_list, list)
+#define for_each_pci_segment_safe(pci_seg, next) \
+ list_for_each_entry_safe((pci_seg), (next), &amd_iommu_pci_seg_list, list)
/*
* Make iterating over all IOMMUs easier
*/
@@ -478,13 +494,14 @@ extern struct kmem_cache *amd_iommu_irq_cache;
struct amd_iommu_fault {
u64 address; /* IO virtual address of the fault*/
u32 pasid; /* Address space identifier */
- u16 device_id; /* Originating PCI device id */
+ u32 sbdf; /* Originating PCI device id */
u16 tag; /* PPR tag */
u16 flags; /* Fault flags */
};
+struct amd_iommu;
struct iommu_domain;
struct irq_domain;
struct amd_irte_ops;
@@ -531,6 +548,75 @@ struct protection_domain {
};
/*
+ * This structure contains information about one PCI segment in the system.
+ */
+struct amd_iommu_pci_seg {
+ /* List with all PCI segments in the system */
+ struct list_head list;
+
+ /* List of all available dev_data structures */
+ struct llist_head dev_data_list;
+
+ /* PCI segment number */
+ u16 id;
+
+ /* Largest PCI device id we expect translation requests for */
+ u16 last_bdf;
+
+ /* Size of the device table */
+ u32 dev_table_size;
+
+ /* Size of the alias table */
+ u32 alias_table_size;
+
+ /* Size of the rlookup table */
+ u32 rlookup_table_size;
+
+ /*
+ * device table virtual address
+ *
+ * Pointer to the per PCI segment device table.
+ * It is indexed by the PCI device id or the HT unit id and contains
+ * information about the domain the device belongs to as well as the
+ * page table root pointer.
+ */
+ struct dev_table_entry *dev_table;
+
+ /*
+ * The rlookup iommu table is used to find the IOMMU which is
+ * responsible for a specific device. It is indexed by the PCI
+ * device id.
+ */
+ struct amd_iommu **rlookup_table;
+
+ /*
+ * This table is used to find the irq remapping table for a given
+ * device id quickly.
+ */
+ struct irq_remap_table **irq_lookup_table;
+
+ /*
+ * Pointer to a device table which the content of old device table
+ * will be copied to. It's only be used in kdump kernel.
+ */
+ struct dev_table_entry *old_dev_tbl_cpy;
+
+ /*
+ * The alias table is a driver specific data structure which contains the
+ * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
+ * More than one device can share the same requestor id.
+ */
+ u16 *alias_table;
+
+ /*
+ * A list of required unity mappings we find in ACPI. It is not locked
+ * because as runtime it is only read. It is created at ACPI table
+ * parsing time.
+ */
+ struct list_head unity_map;
+};
+
+/*
* Structure where we save information about one hardware AMD IOMMU in the
* system.
*/
@@ -567,6 +653,9 @@ struct amd_iommu {
/* Extended features */
u64 features;
+ /* Extended features 2 */
+ u64 features2;
+
/* IOMMUv2 */
bool is_iommu_v2;
@@ -581,7 +670,7 @@ struct amd_iommu {
u16 cap_ptr;
/* pci domain of this IOMMU */
- u16 pci_seg;
+ struct amd_iommu_pci_seg *pci_seg;
/* start of exclusion range of that IOMMU */
u64 exclusion_start;
@@ -666,8 +755,8 @@ struct acpihid_map_entry {
struct list_head list;
u8 uid[ACPIHID_UID_LEN];
u8 hid[ACPIHID_HID_LEN];
- u16 devid;
- u16 root_devid;
+ u32 devid;
+ u32 root_devid;
bool cmd_line;
struct iommu_group *group;
};
@@ -675,7 +764,7 @@ struct acpihid_map_entry {
struct devid_map {
struct list_head list;
u8 id;
- u16 devid;
+ u32 devid;
bool cmd_line;
};
@@ -689,7 +778,7 @@ struct iommu_dev_data {
struct list_head list; /* For domain->dev_list */
struct llist_node dev_data_list; /* For global dev_data_list */
struct protection_domain *domain; /* Domain the device is bound to */
- struct pci_dev *pdev;
+ struct device *dev;
u16 devid; /* PCI Device ID */
bool iommu_v2; /* Device can make use of IOMMUv2 */
struct {
@@ -710,6 +799,12 @@ extern struct list_head hpet_map;
extern struct list_head acpihid_map;
/*
+ * List with all PCI segments in the system. This list is not locked because
+ * it is only written at driver initialization time
+ */
+extern struct list_head amd_iommu_pci_seg_list;
+
+/*
* List with all IOMMUs in the system. This list is not locked because it is
* only written and read at driver initialization or suspend time
*/
@@ -749,38 +844,12 @@ struct unity_map_entry {
};
/*
- * List of all unity mappings. It is not locked because as runtime it is only
- * read. It is created at ACPI table parsing time.
- */
-extern struct list_head amd_iommu_unity_map;
-
-/*
* Data structures for device handling
*/
-/*
- * Device table used by hardware. Read and write accesses by software are
- * locked with the amd_iommu_pd_table lock.
- */
-extern struct dev_table_entry *amd_iommu_dev_table;
-
-/*
- * Alias table to find requestor ids to device ids. Not locked because only
- * read on runtime.
- */
-extern u16 *amd_iommu_alias_table;
-
-/*
- * Reverse lookup table to find the IOMMU which translates a specific device.
- */
-extern struct amd_iommu **amd_iommu_rlookup_table;
-
/* size of the dma_ops aperture as power of 2 */
extern unsigned amd_iommu_aperture_order;
-/* largest PCI device id we expect translation requests for */
-extern u16 amd_iommu_last_bdf;
-
/* allocation bitmap for domain ids */
extern unsigned long *amd_iommu_pd_alloc_bitmap;
@@ -913,6 +982,7 @@ struct irq_2_irte {
struct amd_ir_data {
u32 cached_ga_tag;
+ struct amd_iommu *iommu;
struct irq_2_irte irq_2_irte;
struct msi_msg msi_entry;
void *entry; /* Pointer to union irte or struct irte_ga */
@@ -930,9 +1000,9 @@ struct amd_ir_data {
struct amd_irte_ops {
void (*prepare)(void *, u32, bool, u8, u32, int);
- void (*activate)(void *, u16, u16);
- void (*deactivate)(void *, u16, u16);
- void (*set_affinity)(void *, u16, u16, u8, u32);
+ void (*activate)(struct amd_iommu *iommu, void *, u16, u16);
+ void (*deactivate)(struct amd_iommu *iommu, void *, u16, u16);
+ void (*set_affinity)(struct amd_iommu *iommu, void *, u16, u16, u8, u32);
void *(*get)(struct irq_remap_table *, int);
void (*set_allocated)(struct irq_remap_table *, int);
bool (*is_allocated)(struct irq_remap_table *, int);
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 1d08f87e734b..fdc642362c14 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -84,6 +84,10 @@
#define ACPI_DEVFLAG_ATSDIS 0x10000000
#define LOOP_TIMEOUT 2000000
+
+#define IVRS_GET_SBDF_ID(seg, bus, dev, fd) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
+ | ((dev & 0x1f) << 3) | (fn & 0x7))
+
/*
* ACPI table definitions
*
@@ -110,7 +114,7 @@ struct ivhd_header {
/* Following only valid on IVHD type 11h and 40h */
u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
- u64 res;
+ u64 efr_reg2;
} __attribute__((packed));
/*
@@ -141,7 +145,8 @@ struct ivmd_header {
u16 length;
u16 devid;
u16 aux;
- u64 resv;
+ u16 pci_seg;
+ u8 resv[6];
u64 range_start;
u64 range_length;
} __attribute__((packed));
@@ -159,11 +164,15 @@ static bool amd_iommu_disabled __initdata;
static bool amd_iommu_force_enable __initdata;
static int amd_iommu_target_ivhd_type;
-u16 amd_iommu_last_bdf; /* largest PCI device id we have
- to handle */
-LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
- we find in ACPI */
+/* Global EFR and EFR2 registers */
+u64 amd_iommu_efr;
+u64 amd_iommu_efr2;
+/* SNP is enabled on the system? */
+bool amd_iommu_snp_en;
+EXPORT_SYMBOL(amd_iommu_snp_en);
+
+LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
system */
@@ -186,47 +195,11 @@ bool amdr_ivrs_remap_support __read_mostly;
bool amd_iommu_force_isolation __read_mostly;
/*
- * Pointer to the device table which is shared by all AMD IOMMUs
- * it is indexed by the PCI device id or the HT unit id and contains
- * information about the domain the device belongs to as well as the
- * page table root pointer.
- */
-struct dev_table_entry *amd_iommu_dev_table;
-/*
- * Pointer to a device table which the content of old device table
- * will be copied to. It's only be used in kdump kernel.
- */
-static struct dev_table_entry *old_dev_tbl_cpy;
-
-/*
- * The alias table is a driver specific data structure which contains the
- * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
- * More than one device can share the same requestor id.
- */
-u16 *amd_iommu_alias_table;
-
-/*
- * The rlookup table is used to find the IOMMU which is responsible
- * for a specific device. It is also indexed by the PCI device id.
- */
-struct amd_iommu **amd_iommu_rlookup_table;
-
-/*
- * This table is used to find the irq remapping table for a given device id
- * quickly.
- */
-struct irq_remap_table **irq_lookup_table;
-
-/*
* AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
* to know which ones are already in use.
*/
unsigned long *amd_iommu_pd_alloc_bitmap;
-static u32 dev_table_size; /* size of the device table */
-static u32 alias_table_size; /* size of the alias table */
-static u32 rlookup_table_size; /* size if the rlookup table */
-
enum iommu_init_state {
IOMMU_START_STATE,
IOMMU_IVRS_DETECTED,
@@ -256,7 +229,7 @@ static enum iommu_init_state init_state = IOMMU_START_STATE;
static int amd_iommu_enable_interrupts(void);
static int __init iommu_go_to_state(enum iommu_init_state state);
-static void init_device_table_dma(void);
+static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
static bool amd_iommu_pre_enabled = true;
@@ -281,16 +254,10 @@ static void init_translation_status(struct amd_iommu *iommu)
iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
}
-static inline void update_last_devid(u16 devid)
-{
- if (devid > amd_iommu_last_bdf)
- amd_iommu_last_bdf = devid;
-}
-
-static inline unsigned long tbl_size(int entry_size)
+static inline unsigned long tbl_size(int entry_size, int last_bdf)
{
unsigned shift = PAGE_SHIFT +
- get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
+ get_order((last_bdf + 1) * entry_size);
return 1UL << shift;
}
@@ -300,21 +267,46 @@ int amd_iommu_get_num_iommus(void)
return amd_iommus_present;
}
-#ifdef CONFIG_IRQ_REMAP
-static bool check_feature_on_all_iommus(u64 mask)
+/*
+ * Iterate through all the IOMMUs to get common EFR
+ * masks among all IOMMUs and warn if found inconsistency.
+ */
+static void get_global_efr(void)
{
- bool ret = false;
struct amd_iommu *iommu;
for_each_iommu(iommu) {
- ret = iommu_feature(iommu, mask);
- if (!ret)
- return false;
+ u64 tmp = iommu->features;
+ u64 tmp2 = iommu->features2;
+
+ if (list_is_first(&iommu->list, &amd_iommu_list)) {
+ amd_iommu_efr = tmp;
+ amd_iommu_efr2 = tmp2;
+ continue;
+ }
+
+ if (amd_iommu_efr == tmp &&
+ amd_iommu_efr2 == tmp2)
+ continue;
+
+ pr_err(FW_BUG
+ "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n",
+ tmp, tmp2, amd_iommu_efr, amd_iommu_efr2,
+ iommu->index, iommu->pci_seg->id,
+ PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid),
+ PCI_FUNC(iommu->devid));
+
+ amd_iommu_efr &= tmp;
+ amd_iommu_efr2 &= tmp2;
}
- return true;
+ pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
+}
+
+static bool check_feature_on_all_iommus(u64 mask)
+{
+ return !!(amd_iommu_efr & mask);
}
-#endif
/*
* For IVHD type 0x11/0x40, EFR is also available via IVHD.
@@ -324,8 +316,10 @@ static bool check_feature_on_all_iommus(u64 mask)
static void __init early_iommu_features_init(struct amd_iommu *iommu,
struct ivhd_header *h)
{
- if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP)
+ if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
iommu->features = h->efr_reg;
+ iommu->features2 = h->efr_reg2;
+ }
if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
amdr_ivrs_remap_support = true;
}
@@ -399,7 +393,7 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu)
u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
u64 entry = start & PM_ADDR_MASK;
- if (!iommu_feature(iommu, FEATURE_SNP))
+ if (!check_feature_on_all_iommus(FEATURE_SNP))
return;
/* Note:
@@ -421,10 +415,12 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu)
static void iommu_set_device_table(struct amd_iommu *iommu)
{
u64 entry;
+ u32 dev_table_size = iommu->pci_seg->dev_table_size;
+ void *dev_table = (void *)get_dev_table(iommu);
BUG_ON(iommu->mmio_base == NULL);
- entry = iommu_virt_to_phys(amd_iommu_dev_table);
+ entry = iommu_virt_to_phys(dev_table);
entry |= (dev_table_size >> 12) - 1;
memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
&entry, sizeof(entry));
@@ -557,6 +553,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
{
u8 *p = (void *)h, *end = (void *)h;
struct ivhd_entry *dev;
+ int last_devid = -EINVAL;
u32 ivhd_size = get_ivhd_header_size(h);
@@ -573,14 +570,14 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
switch (dev->type) {
case IVHD_DEV_ALL:
/* Use maximum BDF value for DEV_ALL */
- update_last_devid(0xffff);
- break;
+ return 0xffff;
case IVHD_DEV_SELECT:
case IVHD_DEV_RANGE_END:
case IVHD_DEV_ALIAS:
case IVHD_DEV_EXT_SELECT:
/* all the above subfield types refer to device ids */
- update_last_devid(dev->devid);
+ if (dev->devid > last_devid)
+ last_devid = dev->devid;
break;
default:
break;
@@ -590,7 +587,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
WARN_ON(p != end);
- return 0;
+ return last_devid;
}
static int __init check_ivrs_checksum(struct acpi_table_header *table)
@@ -614,38 +611,125 @@ static int __init check_ivrs_checksum(struct acpi_table_header *table)
* id which we need to handle. This is the first of three functions which parse
* the ACPI table. So we check the checksum here.
*/
-static int __init find_last_devid_acpi(struct acpi_table_header *table)
+static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
{
u8 *p = (u8 *)table, *end = (u8 *)table;
struct ivhd_header *h;
+ int last_devid, last_bdf = 0;
p += IVRS_HEADER_LENGTH;
end += table->length;
while (p < end) {
h = (struct ivhd_header *)p;
- if (h->type == amd_iommu_target_ivhd_type) {
- int ret = find_last_devid_from_ivhd(h);
-
- if (ret)
- return ret;
+ if (h->pci_seg == pci_seg &&
+ h->type == amd_iommu_target_ivhd_type) {
+ last_devid = find_last_devid_from_ivhd(h);
+
+ if (last_devid < 0)
+ return -EINVAL;
+ if (last_devid > last_bdf)
+ last_bdf = last_devid;
}
p += h->length;
}
WARN_ON(p != end);
- return 0;
+ return last_bdf;
}
/****************************************************************************
*
* The following functions belong to the code path which parses the ACPI table
* the second time. In this ACPI parsing iteration we allocate IOMMU specific
- * data structures, initialize the device/alias/rlookup table and also
- * basically initialize the hardware.
+ * data structures, initialize the per PCI segment device/alias/rlookup table
+ * and also basically initialize the hardware.
*
****************************************************************************/
+/* Allocate per PCI segment device table */
+static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
+{
+ pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
+ get_order(pci_seg->dev_table_size));
+ if (!pci_seg->dev_table)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
+{
+ free_pages((unsigned long)pci_seg->dev_table,
+ get_order(pci_seg->dev_table_size));
+ pci_seg->dev_table = NULL;
+}
+
+/* Allocate per PCI segment IOMMU rlookup table. */
+static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
+{
+ pci_seg->rlookup_table = (void *)__get_free_pages(
+ GFP_KERNEL | __GFP_ZERO,
+ get_order(pci_seg->rlookup_table_size));
+ if (pci_seg->rlookup_table == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
+{
+ free_pages((unsigned long)pci_seg->rlookup_table,
+ get_order(pci_seg->rlookup_table_size));
+ pci_seg->rlookup_table = NULL;
+}
+
+static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
+{
+ pci_seg->irq_lookup_table = (void *)__get_free_pages(
+ GFP_KERNEL | __GFP_ZERO,
+ get_order(pci_seg->rlookup_table_size));
+ kmemleak_alloc(pci_seg->irq_lookup_table,
+ pci_seg->rlookup_table_size, 1, GFP_KERNEL);
+ if (pci_seg->irq_lookup_table == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
+{
+ kmemleak_free(pci_seg->irq_lookup_table);
+ free_pages((unsigned long)pci_seg->irq_lookup_table,
+ get_order(pci_seg->rlookup_table_size));
+ pci_seg->irq_lookup_table = NULL;
+}
+
+static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
+{
+ int i;
+
+ pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL,
+ get_order(pci_seg->alias_table_size));
+ if (!pci_seg->alias_table)
+ return -ENOMEM;
+
+ /*
+ * let all alias entries point to itself
+ */
+ for (i = 0; i <= pci_seg->last_bdf; ++i)
+ pci_seg->alias_table[i] = i;
+
+ return 0;
+}
+
+static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
+{
+ free_pages((unsigned long)pci_seg->alias_table,
+ get_order(pci_seg->alias_table_size));
+ pci_seg->alias_table = NULL;
+}
+
/*
* Allocates the command buffer. This buffer is per AMD IOMMU. We can
* write commands to that buffer later and the IOMMU will execute them
@@ -724,7 +808,7 @@ static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
void *buf = (void *)__get_free_pages(gfp, order);
if (buf &&
- iommu_feature(iommu, FEATURE_SNP) &&
+ check_feature_on_all_iommus(FEATURE_SNP) &&
set_memory_4k((unsigned long)buf, (1 << order))) {
free_pages((unsigned long)buf, order);
buf = NULL;
@@ -815,20 +899,15 @@ static void free_ga_log(struct amd_iommu *iommu)
#endif
}
+#ifdef CONFIG_IRQ_REMAP
static int iommu_ga_log_enable(struct amd_iommu *iommu)
{
-#ifdef CONFIG_IRQ_REMAP
u32 status, i;
u64 entry;
if (!iommu->ga_log)
return -EINVAL;
- /* Check if already running */
- status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
- if (WARN_ON(status & (MMIO_STATUS_GALOG_RUN_MASK)))
- return 0;
-
entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
&entry, sizeof(entry));
@@ -852,13 +931,12 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
if (WARN_ON(i >= LOOP_TIMEOUT))
return -EINVAL;
-#endif /* CONFIG_IRQ_REMAP */
+
return 0;
}
static int iommu_init_ga_log(struct amd_iommu *iommu)
{
-#ifdef CONFIG_IRQ_REMAP
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
return 0;
@@ -876,10 +954,8 @@ static int iommu_init_ga_log(struct amd_iommu *iommu)
err_out:
free_ga_log(iommu);
return -EINVAL;
-#else
- return 0;
-#endif /* CONFIG_IRQ_REMAP */
}
+#endif /* CONFIG_IRQ_REMAP */
static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
{
@@ -916,56 +992,59 @@ static void iommu_enable_gt(struct amd_iommu *iommu)
}
/* sets a specific bit in the device table entry. */
-static void set_dev_entry_bit(u16 devid, u8 bit)
+static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
+ u16 devid, u8 bit)
{
int i = (bit >> 6) & 0x03;
int _bit = bit & 0x3f;
- amd_iommu_dev_table[devid].data[i] |= (1UL << _bit);
+ dev_table[devid].data[i] |= (1UL << _bit);
+}
+
+static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
+{
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
+
+ return __set_dev_entry_bit(dev_table, devid, bit);
}
-static int get_dev_entry_bit(u16 devid, u8 bit)
+static int __get_dev_entry_bit(struct dev_table_entry *dev_table,
+ u16 devid, u8 bit)
{
int i = (bit >> 6) & 0x03;
int _bit = bit & 0x3f;
- return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
+ return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
}
+static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
+{
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
-static bool copy_device_table(void)
+ return __get_dev_entry_bit(dev_table, devid, bit);
+}
+
+static bool __copy_device_table(struct amd_iommu *iommu)
{
- u64 int_ctl, int_tab_len, entry = 0, last_entry = 0;
+ u64 int_ctl, int_tab_len, entry = 0;
+ struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
struct dev_table_entry *old_devtb = NULL;
u32 lo, hi, devid, old_devtb_size;
phys_addr_t old_devtb_phys;
- struct amd_iommu *iommu;
u16 dom_id, dte_v, irq_v;
gfp_t gfp_flag;
u64 tmp;
- if (!amd_iommu_pre_enabled)
- return false;
-
- pr_warn("Translation is already enabled - trying to copy translation structures\n");
- for_each_iommu(iommu) {
- /* All IOMMUs should use the same device table with the same size */
- lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
- hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
- entry = (((u64) hi) << 32) + lo;
- if (last_entry && last_entry != entry) {
- pr_err("IOMMU:%d should use the same dev table as others!\n",
- iommu->index);
- return false;
- }
- last_entry = entry;
+ /* Each IOMMU use separate device table with the same size */
+ lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
+ hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
+ entry = (((u64) hi) << 32) + lo;
- old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
- if (old_devtb_size != dev_table_size) {
- pr_err("The device table size of IOMMU:%d is not expected!\n",
- iommu->index);
- return false;
- }
+ old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
+ if (old_devtb_size != pci_seg->dev_table_size) {
+ pr_err("The device table size of IOMMU:%d is not expected!\n",
+ iommu->index);
+ return false;
}
/*
@@ -981,38 +1060,38 @@ static bool copy_device_table(void)
}
old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel())
? (__force void *)ioremap_encrypted(old_devtb_phys,
- dev_table_size)
- : memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
+ pci_seg->dev_table_size)
+ : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB);
if (!old_devtb)
return false;
gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
- old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
- get_order(dev_table_size));
- if (old_dev_tbl_cpy == NULL) {
+ pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
+ get_order(pci_seg->dev_table_size));
+ if (pci_seg->old_dev_tbl_cpy == NULL) {
pr_err("Failed to allocate memory for copying old device table!\n");
memunmap(old_devtb);
return false;
}
- for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
- old_dev_tbl_cpy[devid] = old_devtb[devid];
+ for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
+ pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid];
dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
if (dte_v && dom_id) {
- old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
- old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
+ pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
+ pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
/* If gcr3 table existed, mask it out */
if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
- old_dev_tbl_cpy[devid].data[1] &= ~tmp;
+ pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
tmp |= DTE_FLAG_GV;
- old_dev_tbl_cpy[devid].data[0] &= ~tmp;
+ pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
}
}
@@ -1027,7 +1106,7 @@ static bool copy_device_table(void)
return false;
}
- old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
+ pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
}
}
memunmap(old_devtb);
@@ -1035,21 +1114,42 @@ static bool copy_device_table(void)
return true;
}
-void amd_iommu_apply_erratum_63(u16 devid)
+static bool copy_device_table(void)
{
- int sysmgt;
+ struct amd_iommu *iommu;
+ struct amd_iommu_pci_seg *pci_seg;
- sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
- (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
+ if (!amd_iommu_pre_enabled)
+ return false;
- if (sysmgt == 0x01)
- set_dev_entry_bit(devid, DEV_ENTRY_IW);
+ pr_warn("Translation is already enabled - trying to copy translation structures\n");
+
+ /*
+ * All IOMMUs within PCI segment shares common device table.
+ * Hence copy device table only once per PCI segment.
+ */
+ for_each_pci_segment(pci_seg) {
+ for_each_iommu(iommu) {
+ if (pci_seg->id != iommu->pci_seg->id)
+ continue;
+ if (!__copy_device_table(iommu))
+ return false;
+ break;
+ }
+ }
+
+ return true;
}
-/* Writes the specific IOMMU for a device into the rlookup table */
-static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
+void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid)
{
- amd_iommu_rlookup_table[devid] = iommu;
+ int sysmgt;
+
+ sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) |
+ (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1);
+
+ if (sysmgt == 0x01)
+ set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW);
}
/*
@@ -1060,26 +1160,26 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
u16 devid, u32 flags, u32 ext_flags)
{
if (flags & ACPI_DEVFLAG_INITPASS)
- set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
+ set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
if (flags & ACPI_DEVFLAG_EXTINT)
- set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
+ set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
if (flags & ACPI_DEVFLAG_NMI)
- set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
+ set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
if (flags & ACPI_DEVFLAG_SYSMGT1)
- set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
+ set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
if (flags & ACPI_DEVFLAG_SYSMGT2)
- set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
+ set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
if (flags & ACPI_DEVFLAG_LINT0)
- set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
+ set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
if (flags & ACPI_DEVFLAG_LINT1)
- set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
+ set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
- amd_iommu_apply_erratum_63(devid);
+ amd_iommu_apply_erratum_63(iommu, devid);
- set_iommu_for_device(iommu, devid);
+ amd_iommu_set_rlookup_table(iommu, devid);
}
-int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
+int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
{
struct devid_map *entry;
struct list_head *list;
@@ -1116,7 +1216,7 @@ int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
return 0;
}
-static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid,
+static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
bool cmd_line)
{
struct acpihid_map_entry *entry;
@@ -1195,10 +1295,11 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
{
u8 *p = (u8 *)h;
u8 *end = p, flags = 0;
- u16 devid = 0, devid_start = 0, devid_to = 0;
+ u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
u32 dev_i, ext_flags = 0;
bool alias = false;
struct ivhd_entry *e;
+ struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
u32 ivhd_size;
int ret;
@@ -1230,19 +1331,21 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
while (p < end) {
e = (struct ivhd_entry *)p;
+ seg_id = pci_seg->id;
+
switch (e->type) {
case IVHD_DEV_ALL:
DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags);
- for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i)
+ for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i)
set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
break;
case IVHD_DEV_SELECT:
- DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x "
+ DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x "
"flags: %02x\n",
- PCI_BUS_NUM(e->devid),
+ seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
e->flags);
@@ -1253,8 +1356,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
case IVHD_DEV_SELECT_RANGE_START:
DUMP_printk(" DEV_SELECT_RANGE_START\t "
- "devid: %02x:%02x.%x flags: %02x\n",
- PCI_BUS_NUM(e->devid),
+ "devid: %04x:%02x:%02x.%x flags: %02x\n",
+ seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
e->flags);
@@ -1266,9 +1369,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_ALIAS:
- DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
+ DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x "
"flags: %02x devid_to: %02x:%02x.%x\n",
- PCI_BUS_NUM(e->devid),
+ seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
e->flags,
@@ -1280,18 +1383,18 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
devid_to = e->ext >> 8;
set_dev_entry_from_acpi(iommu, devid , e->flags, 0);
set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
- amd_iommu_alias_table[devid] = devid_to;
+ pci_seg->alias_table[devid] = devid_to;
break;
case IVHD_DEV_ALIAS_RANGE:
DUMP_printk(" DEV_ALIAS_RANGE\t\t "
- "devid: %02x:%02x.%x flags: %02x "
- "devid_to: %02x:%02x.%x\n",
- PCI_BUS_NUM(e->devid),
+ "devid: %04x:%02x:%02x.%x flags: %02x "
+ "devid_to: %04x:%02x:%02x.%x\n",
+ seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
e->flags,
- PCI_BUS_NUM(e->ext >> 8),
+ seg_id, PCI_BUS_NUM(e->ext >> 8),
PCI_SLOT(e->ext >> 8),
PCI_FUNC(e->ext >> 8));
@@ -1303,9 +1406,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_EXT_SELECT:
- DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
+ DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x "
"flags: %02x ext: %08x\n",
- PCI_BUS_NUM(e->devid),
+ seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
e->flags, e->ext);
@@ -1317,8 +1420,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
case IVHD_DEV_EXT_SELECT_RANGE:
DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: "
- "%02x:%02x.%x flags: %02x ext: %08x\n",
- PCI_BUS_NUM(e->devid),
+ "%04x:%02x:%02x.%x flags: %02x ext: %08x\n",
+ seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
e->flags, e->ext);
@@ -1330,15 +1433,15 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_RANGE_END:
- DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
- PCI_BUS_NUM(e->devid),
+ DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n",
+ seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid));
devid = e->devid;
for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
if (alias) {
- amd_iommu_alias_table[dev_i] = devid_to;
+ pci_seg->alias_table[dev_i] = devid_to;
set_dev_entry_from_acpi(iommu,
devid_to, flags, ext_flags);
}
@@ -1349,11 +1452,11 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
case IVHD_DEV_SPECIAL: {
u8 handle, type;
const char *var;
- u16 devid;
+ u32 devid;
int ret;
handle = e->ext & 0xff;
- devid = (e->ext >> 8) & 0xffff;
+ devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8));
type = (e->ext >> 24) & 0xff;
if (type == IVHD_SPECIAL_IOAPIC)
@@ -1363,9 +1466,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
else
var = "UNKNOWN";
- DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n",
+ DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n",
var, (int)handle,
- PCI_BUS_NUM(devid),
+ seg_id, PCI_BUS_NUM(devid),
PCI_SLOT(devid),
PCI_FUNC(devid));
@@ -1383,7 +1486,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
}
case IVHD_DEV_ACPI_HID: {
- u16 devid;
+ u32 devid;
u8 hid[ACPIHID_HID_LEN];
u8 uid[ACPIHID_UID_LEN];
int ret;
@@ -1426,9 +1529,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
}
- devid = e->devid;
- DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n",
- hid, uid,
+ devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
+ DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n",
+ hid, uid, seg_id,
PCI_BUS_NUM(devid),
PCI_SLOT(devid),
PCI_FUNC(devid));
@@ -1458,6 +1561,74 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
return 0;
}
+/* Allocate PCI segment data structure */
+static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
+ struct acpi_table_header *ivrs_base)
+{
+ struct amd_iommu_pci_seg *pci_seg;
+ int last_bdf;
+
+ /*
+ * First parse ACPI tables to find the largest Bus/Dev/Func we need to
+ * handle in this PCI segment. Upon this information the shared data
+ * structures for the PCI segments in the system will be allocated.
+ */
+ last_bdf = find_last_devid_acpi(ivrs_base, id);
+ if (last_bdf < 0)
+ return NULL;
+
+ pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL);
+ if (pci_seg == NULL)
+ return NULL;
+
+ pci_seg->last_bdf = last_bdf;
+ DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
+ pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf);
+ pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf);
+ pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf);
+
+ pci_seg->id = id;
+ init_llist_head(&pci_seg->dev_data_list);
+ INIT_LIST_HEAD(&pci_seg->unity_map);
+ list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
+
+ if (alloc_dev_table(pci_seg))
+ return NULL;
+ if (alloc_alias_table(pci_seg))
+ return NULL;
+ if (alloc_rlookup_table(pci_seg))
+ return NULL;
+
+ return pci_seg;
+}
+
+static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
+ struct acpi_table_header *ivrs_base)
+{
+ struct amd_iommu_pci_seg *pci_seg;
+
+ for_each_pci_segment(pci_seg) {
+ if (pci_seg->id == id)
+ return pci_seg;
+ }
+
+ return alloc_pci_segment(id, ivrs_base);
+}
+
+static void __init free_pci_segments(void)
+{
+ struct amd_iommu_pci_seg *pci_seg, *next;
+
+ for_each_pci_segment_safe(pci_seg, next) {
+ list_del(&pci_seg->list);
+ free_irq_lookup_table(pci_seg);
+ free_rlookup_table(pci_seg);
+ free_alias_table(pci_seg);
+ free_dev_table(pci_seg);
+ kfree(pci_seg);
+ }
+}
+
static void __init free_iommu_one(struct amd_iommu *iommu)
{
free_cwwb_sem(iommu);
@@ -1542,9 +1713,15 @@ static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
* together and also allocates the command buffer and programs the
* hardware. It does NOT enable the IOMMU. This is done afterwards.
*/
-static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
+static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
+ struct acpi_table_header *ivrs_base)
{
- int ret;
+ struct amd_iommu_pci_seg *pci_seg;
+
+ pci_seg = get_pci_segment(h->pci_seg, ivrs_base);
+ if (pci_seg == NULL)
+ return -ENOMEM;
+ iommu->pci_seg = pci_seg;
raw_spin_lock_init(&iommu->lock);
iommu->cmd_sem_val = 0;
@@ -1566,7 +1743,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
*/
iommu->devid = h->devid;
iommu->cap_ptr = h->cap_ptr;
- iommu->pci_seg = h->pci_seg;
iommu->mmio_phys = h->mmio_phys;
switch (h->type) {
@@ -1621,6 +1797,13 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (!iommu->mmio_base)
return -ENOMEM;
+ return init_iommu_from_acpi(iommu, h);
+}
+
+static int __init init_iommu_one_late(struct amd_iommu *iommu)
+{
+ int ret;
+
if (alloc_cwwb_sem(iommu))
return -ENOMEM;
@@ -1642,10 +1825,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (amd_iommu_pre_enabled)
amd_iommu_pre_enabled = translation_pre_enabled(iommu);
- ret = init_iommu_from_acpi(iommu, h);
- if (ret)
- return ret;
-
if (amd_iommu_irq_remap) {
ret = amd_iommu_create_irq_domain(iommu);
if (ret)
@@ -1656,7 +1835,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
* Make sure IOMMU is not considered to translate itself. The IVRS
* table tells us so, but this is a lie!
*/
- amd_iommu_rlookup_table[iommu->devid] = NULL;
+ iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
return 0;
}
@@ -1701,15 +1880,16 @@ static int __init init_iommu_all(struct acpi_table_header *table)
end += table->length;
p += IVRS_HEADER_LENGTH;
+ /* Phase 1: Process all IVHD blocks */
while (p < end) {
h = (struct ivhd_header *)p;
if (*p == amd_iommu_target_ivhd_type) {
- DUMP_printk("device: %02x:%02x.%01x cap: %04x "
- "seg: %d flags: %01x info %04x\n",
- PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid),
- PCI_FUNC(h->devid), h->cap_ptr,
- h->pci_seg, h->flags, h->info);
+ DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x "
+ "flags: %01x info %04x\n",
+ h->pci_seg, PCI_BUS_NUM(h->devid),
+ PCI_SLOT(h->devid), PCI_FUNC(h->devid),
+ h->cap_ptr, h->flags, h->info);
DUMP_printk(" mmio-addr: %016llx\n",
h->mmio_phys);
@@ -1717,7 +1897,7 @@ static int __init init_iommu_all(struct acpi_table_header *table)
if (iommu == NULL)
return -ENOMEM;
- ret = init_iommu_one(iommu, h);
+ ret = init_iommu_one(iommu, h, table);
if (ret)
return ret;
}
@@ -1726,6 +1906,16 @@ static int __init init_iommu_all(struct acpi_table_header *table)
}
WARN_ON(p != end);
+ /* Phase 2 : Early feature support check */
+ get_global_efr();
+
+ /* Phase 3 : Enabling IOMMU features */
+ for_each_iommu(iommu) {
+ ret = init_iommu_one_late(iommu);
+ if (ret)
+ return ret;
+ }
+
return 0;
}
@@ -1762,7 +1952,7 @@ static ssize_t amd_iommu_show_features(struct device *dev,
char *buf)
{
struct amd_iommu *iommu = dev_to_amd_iommu(dev);
- return sprintf(buf, "%llx\n", iommu->features);
+ return sprintf(buf, "%llx:%llx\n", iommu->features2, iommu->features);
}
static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
@@ -1789,16 +1979,18 @@ static const struct attribute_group *amd_iommu_groups[] = {
*/
static void __init late_iommu_features_init(struct amd_iommu *iommu)
{
- u64 features;
+ u64 features, features2;
if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
return;
/* read extended feature bits */
features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
+ features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
if (!iommu->features) {
iommu->features = features;
+ iommu->features2 = features2;
return;
}
@@ -1806,9 +1998,13 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu)
* Sanity check and warn if EFR values from
* IVHD and MMIO conflict.
*/
- if (features != iommu->features)
- pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx).\n",
- features, iommu->features);
+ if (features != iommu->features ||
+ features2 != iommu->features2) {
+ pr_warn(FW_WARN
+ "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
+ features, iommu->features,
+ features2, iommu->features2);
+ }
}
static int __init iommu_init_pci(struct amd_iommu *iommu)
@@ -1816,7 +2012,8 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
int cap_ptr = iommu->cap_ptr;
int ret;
- iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid),
+ iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
+ PCI_BUS_NUM(iommu->devid),
iommu->devid & 0xff);
if (!iommu->dev)
return -ENODEV;
@@ -1863,10 +2060,6 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
return -ENOMEM;
- ret = iommu_init_ga_log(iommu);
- if (ret)
- return ret;
-
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
pr_info("Using strict mode due to virtualization\n");
iommu_set_dma_strict();
@@ -1879,7 +2072,8 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
int i, j;
iommu->root_pdev =
- pci_get_domain_bus_and_slot(0, iommu->dev->bus->number,
+ pci_get_domain_bus_and_slot(iommu->pci_seg->id,
+ iommu->dev->bus->number,
PCI_DEVFN(0, 0));
/*
@@ -1906,8 +2100,11 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
amd_iommu_erratum_746_workaround(iommu);
amd_iommu_ats_write_check_workaround(iommu);
- iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
+ ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
amd_iommu_groups, "ivhd%d", iommu->index);
+ if (ret)
+ return ret;
+
iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
return pci_enable_device(iommu->dev);
@@ -1928,7 +2125,7 @@ static void print_iommu_info(void)
pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
- pr_info("Extended features (%#llx):", iommu->features);
+ pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2);
for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
if (iommu_feature(iommu, (1ULL << i)))
@@ -1938,13 +2135,14 @@ static void print_iommu_info(void)
if (iommu->features & FEATURE_GAM_VAPIC)
pr_cont(" GA_vAPIC");
+ if (iommu->features & FEATURE_SNP)
+ pr_cont(" SNP");
+
pr_cont("\n");
}
}
if (irq_remapping_enabled) {
pr_info("Interrupt remapping enabled\n");
- if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
- pr_info("Virtual APIC enabled\n");
if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
pr_info("X2APIC enabled\n");
}
@@ -1953,6 +2151,7 @@ static void print_iommu_info(void)
static int __init amd_iommu_init_pci(void)
{
struct amd_iommu *iommu;
+ struct amd_iommu_pci_seg *pci_seg;
int ret;
for_each_iommu(iommu) {
@@ -1983,7 +2182,8 @@ static int __init amd_iommu_init_pci(void)
goto out;
}
- init_device_table_dma();
+ for_each_pci_segment(pci_seg)
+ init_device_table_dma(pci_seg);
for_each_iommu(iommu)
iommu_flush_all_caches(iommu);
@@ -2232,9 +2432,6 @@ enable_faults:
if (iommu->ppr_log != NULL)
iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
-
- iommu_ga_log_enable(iommu);
-
return 0;
}
@@ -2249,19 +2446,28 @@ enable_faults:
static void __init free_unity_maps(void)
{
struct unity_map_entry *entry, *next;
+ struct amd_iommu_pci_seg *p, *pci_seg;
- list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
- list_del(&entry->list);
- kfree(entry);
+ for_each_pci_segment_safe(pci_seg, p) {
+ list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
}
}
/* called for unity map ACPI definition */
-static int __init init_unity_map_range(struct ivmd_header *m)
+static int __init init_unity_map_range(struct ivmd_header *m,
+ struct acpi_table_header *ivrs_base)
{
struct unity_map_entry *e = NULL;
+ struct amd_iommu_pci_seg *pci_seg;
char *s;
+ pci_seg = get_pci_segment(m->pci_seg, ivrs_base);
+ if (pci_seg == NULL)
+ return -ENOMEM;
+
e = kzalloc(sizeof(*e), GFP_KERNEL);
if (e == NULL)
return -ENOMEM;
@@ -2277,7 +2483,7 @@ static int __init init_unity_map_range(struct ivmd_header *m)
case ACPI_IVMD_TYPE_ALL:
s = "IVMD_TYPE_ALL\t\t";
e->devid_start = 0;
- e->devid_end = amd_iommu_last_bdf;
+ e->devid_end = pci_seg->last_bdf;
break;
case ACPI_IVMD_TYPE_RANGE:
s = "IVMD_TYPE_RANGE\t\t";
@@ -2299,14 +2505,16 @@ static int __init init_unity_map_range(struct ivmd_header *m)
if (m->flags & IVMD_FLAG_EXCL_RANGE)
e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
- DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
- " range_start: %016llx range_end: %016llx flags: %x\n", s,
+ DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: "
+ "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx"
+ " flags: %x\n", s, m->pci_seg,
PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
- PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end),
+ PCI_FUNC(e->devid_start), m->pci_seg,
+ PCI_BUS_NUM(e->devid_end),
PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
e->address_start, e->address_end, m->flags);
- list_add_tail(&e->list, &amd_iommu_unity_map);
+ list_add_tail(&e->list, &pci_seg->unity_map);
return 0;
}
@@ -2323,7 +2531,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
while (p < end) {
m = (struct ivmd_header *)p;
if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
- init_unity_map_range(m);
+ init_unity_map_range(m, table);
p += m->length;
}
@@ -2334,35 +2542,48 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
/*
* Init the device table to not allow DMA access for devices
*/
-static void init_device_table_dma(void)
+static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
{
u32 devid;
+ struct dev_table_entry *dev_table = pci_seg->dev_table;
- for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
- set_dev_entry_bit(devid, DEV_ENTRY_VALID);
- set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
+ if (dev_table == NULL)
+ return;
+
+ for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
+ __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID);
+ if (!amd_iommu_snp_en)
+ __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION);
}
}
-static void __init uninit_device_table_dma(void)
+static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
{
u32 devid;
+ struct dev_table_entry *dev_table = pci_seg->dev_table;
+
+ if (dev_table == NULL)
+ return;
- for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
- amd_iommu_dev_table[devid].data[0] = 0ULL;
- amd_iommu_dev_table[devid].data[1] = 0ULL;
+ for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
+ dev_table[devid].data[0] = 0ULL;
+ dev_table[devid].data[1] = 0ULL;
}
}
static void init_device_table(void)
{
+ struct amd_iommu_pci_seg *pci_seg;
u32 devid;
if (!amd_iommu_irq_remap)
return;
- for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
- set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN);
+ for_each_pci_segment(pci_seg) {
+ for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
+ __set_dev_entry_bit(pci_seg->dev_table,
+ devid, DEV_ENTRY_IRQ_TBL_EN);
+ }
}
static void iommu_init_flags(struct amd_iommu *iommu)
@@ -2440,8 +2661,6 @@ static void iommu_enable_ga(struct amd_iommu *iommu)
#ifdef CONFIG_IRQ_REMAP
switch (amd_iommu_guest_ir) {
case AMD_IOMMU_GUEST_IR_VAPIC:
- iommu_feature_enable(iommu, CONTROL_GAM_EN);
- fallthrough;
case AMD_IOMMU_GUEST_IR_LEGACY_GA:
iommu_feature_enable(iommu, CONTROL_GA_EN);
iommu->irte_ops = &irte_128_ops;
@@ -2478,7 +2697,7 @@ static void early_enable_iommu(struct amd_iommu *iommu)
static void early_enable_iommus(void)
{
struct amd_iommu *iommu;
-
+ struct amd_iommu_pci_seg *pci_seg;
if (!copy_device_table()) {
/*
@@ -2488,9 +2707,14 @@ static void early_enable_iommus(void)
*/
if (amd_iommu_pre_enabled)
pr_err("Failed to copy DEV table from previous kernel.\n");
- if (old_dev_tbl_cpy != NULL)
- free_pages((unsigned long)old_dev_tbl_cpy,
- get_order(dev_table_size));
+
+ for_each_pci_segment(pci_seg) {
+ if (pci_seg->old_dev_tbl_cpy != NULL) {
+ free_pages((unsigned long)pci_seg->old_dev_tbl_cpy,
+ get_order(pci_seg->dev_table_size));
+ pci_seg->old_dev_tbl_cpy = NULL;
+ }
+ }
for_each_iommu(iommu) {
clear_translation_pre_enabled(iommu);
@@ -2498,9 +2722,13 @@ static void early_enable_iommus(void)
}
} else {
pr_info("Copied DEV table from previous kernel.\n");
- free_pages((unsigned long)amd_iommu_dev_table,
- get_order(dev_table_size));
- amd_iommu_dev_table = old_dev_tbl_cpy;
+
+ for_each_pci_segment(pci_seg) {
+ free_pages((unsigned long)pci_seg->dev_table,
+ get_order(pci_seg->dev_table_size));
+ pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
+ }
+
for_each_iommu(iommu) {
iommu_disable_command_buffer(iommu);
iommu_disable_event_buffer(iommu);
@@ -2512,19 +2740,6 @@ static void early_enable_iommus(void)
iommu_flush_all_caches(iommu);
}
}
-
-#ifdef CONFIG_IRQ_REMAP
- /*
- * Note: We have already checked GASup from IVRS table.
- * Now, we need to make sure that GAMSup is set.
- */
- if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
- !check_feature_on_all_iommus(FEATURE_GAM_VAPIC))
- amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
-
- if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
- amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
-#endif
}
static void enable_iommus_v2(void)
@@ -2537,10 +2752,72 @@ static void enable_iommus_v2(void)
}
}
+static void enable_iommus_vapic(void)
+{
+#ifdef CONFIG_IRQ_REMAP
+ u32 status, i;
+ struct amd_iommu *iommu;
+
+ for_each_iommu(iommu) {
+ /*
+ * Disable GALog if already running. It could have been enabled
+ * in the previous boot before kdump.
+ */
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
+ continue;
+
+ iommu_feature_disable(iommu, CONTROL_GALOG_EN);
+ iommu_feature_disable(iommu, CONTROL_GAINT_EN);
+
+ /*
+ * Need to set and poll check the GALOGRun bit to zero before
+ * we can set/ modify GA Log registers safely.
+ */
+ for (i = 0; i < LOOP_TIMEOUT; ++i) {
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
+ break;
+ udelay(10);
+ }
+
+ if (WARN_ON(i >= LOOP_TIMEOUT))
+ return;
+ }
+
+ if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
+ !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) {
+ amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
+ return;
+ }
+
+ if (amd_iommu_snp_en &&
+ !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
+ pr_warn("Force to disable Virtual APIC due to SNP\n");
+ amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
+ return;
+ }
+
+ /* Enabling GAM and SNPAVIC support */
+ for_each_iommu(iommu) {
+ if (iommu_init_ga_log(iommu) ||
+ iommu_ga_log_enable(iommu))
+ return;
+
+ iommu_feature_enable(iommu, CONTROL_GAM_EN);
+ if (amd_iommu_snp_en)
+ iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
+ }
+
+ amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
+ pr_info("Virtual APIC enabled\n");
+#endif
+}
+
static void enable_iommus(void)
{
early_enable_iommus();
-
+ enable_iommus_vapic();
enable_iommus_v2();
}
@@ -2590,27 +2867,11 @@ static struct syscore_ops amd_iommu_syscore_ops = {
static void __init free_iommu_resources(void)
{
- kmemleak_free(irq_lookup_table);
- free_pages((unsigned long)irq_lookup_table,
- get_order(rlookup_table_size));
- irq_lookup_table = NULL;
-
kmem_cache_destroy(amd_iommu_irq_cache);
amd_iommu_irq_cache = NULL;
- free_pages((unsigned long)amd_iommu_rlookup_table,
- get_order(rlookup_table_size));
- amd_iommu_rlookup_table = NULL;
-
- free_pages((unsigned long)amd_iommu_alias_table,
- get_order(alias_table_size));
- amd_iommu_alias_table = NULL;
-
- free_pages((unsigned long)amd_iommu_dev_table,
- get_order(dev_table_size));
- amd_iommu_dev_table = NULL;
-
free_iommu_all();
+ free_pci_segments();
}
/* SB IOAPIC is always on this device in AMD systems */
@@ -2709,7 +2970,7 @@ static void __init ivinfo_init(void *ivrs)
static int __init early_amd_iommu_init(void)
{
struct acpi_table_header *ivrs_base;
- int i, remap_cache_sz, ret;
+ int remap_cache_sz, ret;
acpi_status status;
if (!amd_iommu_detected)
@@ -2737,42 +2998,8 @@ static int __init early_amd_iommu_init(void)
amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
- /*
- * First parse ACPI tables to find the largest Bus/Dev/Func
- * we need to handle. Upon this information the shared data
- * structures for the IOMMUs in the system will be allocated
- */
- ret = find_last_devid_acpi(ivrs_base);
- if (ret)
- goto out;
-
- dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE);
- alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
- rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
-
/* Device table - directly used by all IOMMUs */
ret = -ENOMEM;
- amd_iommu_dev_table = (void *)__get_free_pages(
- GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
- get_order(dev_table_size));
- if (amd_iommu_dev_table == NULL)
- goto out;
-
- /*
- * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
- * IOMMU see for that device
- */
- amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
- get_order(alias_table_size));
- if (amd_iommu_alias_table == NULL)
- goto out;
-
- /* IOMMU rlookup table - find the IOMMU for a specific device */
- amd_iommu_rlookup_table = (void *)__get_free_pages(
- GFP_KERNEL | __GFP_ZERO,
- get_order(rlookup_table_size));
- if (amd_iommu_rlookup_table == NULL)
- goto out;
amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
GFP_KERNEL | __GFP_ZERO,
@@ -2781,12 +3008,6 @@ static int __init early_amd_iommu_init(void)
goto out;
/*
- * let all alias entries point to itself
- */
- for (i = 0; i <= amd_iommu_last_bdf; ++i)
- amd_iommu_alias_table[i] = i;
-
- /*
* never allocate domain 0 because its used as the non-allocated and
* error value placeholder
*/
@@ -2808,6 +3029,7 @@ static int __init early_amd_iommu_init(void)
amd_iommu_irq_remap = check_ioapic_information();
if (amd_iommu_irq_remap) {
+ struct amd_iommu_pci_seg *pci_seg;
/*
* Interrupt remapping enabled, create kmem_cache for the
* remapping tables.
@@ -2824,13 +3046,10 @@ static int __init early_amd_iommu_init(void)
if (!amd_iommu_irq_cache)
goto out;
- irq_lookup_table = (void *)__get_free_pages(
- GFP_KERNEL | __GFP_ZERO,
- get_order(rlookup_table_size));
- kmemleak_alloc(irq_lookup_table, rlookup_table_size,
- 1, GFP_KERNEL);
- if (!irq_lookup_table)
- goto out;
+ for_each_pci_segment(pci_seg) {
+ if (alloc_irq_lookup_table(pci_seg))
+ goto out;
+ }
}
ret = init_memory_definitions(ivrs_base);
@@ -2937,6 +3156,7 @@ static int __init state_next(void)
register_syscore_ops(&amd_iommu_syscore_ops);
ret = amd_iommu_init_pci();
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
+ enable_iommus_vapic();
enable_iommus_v2();
break;
case IOMMU_PCI_INIT:
@@ -2967,8 +3187,11 @@ static int __init state_next(void)
free_iommu_resources();
} else {
struct amd_iommu *iommu;
+ struct amd_iommu_pci_seg *pci_seg;
+
+ for_each_pci_segment(pci_seg)
+ uninit_device_table_dma(pci_seg);
- uninit_device_table_dma();
for_each_iommu(iommu)
iommu_flush_all_caches(iommu);
}
@@ -3161,15 +3384,17 @@ static int __init parse_amd_iommu_options(char *str)
static int __init parse_ivrs_ioapic(char *str)
{
- unsigned int bus, dev, fn;
+ u32 seg = 0, bus, dev, fn;
int ret, id, i;
- u16 devid;
+ u32 devid;
ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
-
if (ret != 4) {
- pr_err("Invalid command line: ivrs_ioapic%s\n", str);
- return 1;
+ ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn);
+ if (ret != 5) {
+ pr_err("Invalid command line: ivrs_ioapic%s\n", str);
+ return 1;
+ }
}
if (early_ioapic_map_size == EARLY_MAP_SIZE) {
@@ -3178,7 +3403,7 @@ static int __init parse_ivrs_ioapic(char *str)
return 1;
}
- devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
+ devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
cmdline_maps = true;
i = early_ioapic_map_size++;
@@ -3191,15 +3416,17 @@ static int __init parse_ivrs_ioapic(char *str)
static int __init parse_ivrs_hpet(char *str)
{
- unsigned int bus, dev, fn;
+ u32 seg = 0, bus, dev, fn;
int ret, id, i;
- u16 devid;
+ u32 devid;
ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
-
if (ret != 4) {
- pr_err("Invalid command line: ivrs_hpet%s\n", str);
- return 1;
+ ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn);
+ if (ret != 5) {
+ pr_err("Invalid command line: ivrs_hpet%s\n", str);
+ return 1;
+ }
}
if (early_hpet_map_size == EARLY_MAP_SIZE) {
@@ -3208,7 +3435,7 @@ static int __init parse_ivrs_hpet(char *str)
return 1;
}
- devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
+ devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
cmdline_maps = true;
i = early_hpet_map_size++;
@@ -3221,15 +3448,18 @@ static int __init parse_ivrs_hpet(char *str)
static int __init parse_ivrs_acpihid(char *str)
{
- u32 bus, dev, fn;
+ u32 seg = 0, bus, dev, fn;
char *hid, *uid, *p;
char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
int ret, i;
ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
if (ret != 4) {
- pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
- return 1;
+ ret = sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid);
+ if (ret != 5) {
+ pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
+ return 1;
+ }
}
p = acpiid;
@@ -3244,8 +3474,7 @@ static int __init parse_ivrs_acpihid(char *str)
i = early_acpihid_map_size++;
memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
- early_acpihid_map[i].devid =
- ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
+ early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
early_acpihid_map[i].cmd_line = true;
return 1;
@@ -3260,7 +3489,12 @@ __setup("ivrs_acpihid", parse_ivrs_acpihid);
bool amd_iommu_v2_supported(void)
{
- return amd_iommu_v2_present;
+ /*
+ * Since DTE[Mode]=0 is prohibited on SNP-enabled system
+ * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
+ * setting up IOMMUv1 page table.
+ */
+ return amd_iommu_v2_present && !amd_iommu_snp_en;
}
EXPORT_SYMBOL(amd_iommu_v2_supported);
@@ -3363,3 +3597,41 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64
return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
}
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+int amd_iommu_snp_enable(void)
+{
+ /*
+ * The SNP support requires that IOMMU must be enabled, and is
+ * not configured in the passthrough mode.
+ */
+ if (no_iommu || iommu_default_passthrough()) {
+ pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported");
+ return -EINVAL;
+ }
+
+ /*
+ * Prevent enabling SNP after IOMMU_ENABLED state because this process
+ * affect how IOMMU driver sets up data structures and configures
+ * IOMMU hardware.
+ */
+ if (init_state > IOMMU_ENABLED) {
+ pr_err("SNP: Too late to enable SNP for IOMMU.\n");
+ return -EINVAL;
+ }
+
+ amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP);
+ if (!amd_iommu_snp_en)
+ return -EINVAL;
+
+ pr_info("SNP enabled\n");
+
+ /* Enforce IOMMU v1 pagetable when SNP is enabled. */
+ if (amd_iommu_pgtable != AMD_IOMMU_V1) {
+ pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n");
+ amd_iommu_pgtable = AMD_IOMMU_V1;
+ }
+
+ return 0;
+}
+#endif
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 6608d1717574..7d4b61e5db47 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -258,7 +258,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
__npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
/* pte could have been changed somewhere. */
- if (cmpxchg64(pte, __pte, __npte) != __pte)
+ if (!try_cmpxchg64(pte, &__pte, __npte))
free_page((unsigned long)page);
else if (IOMMU_PTE_PRESENT(__pte))
*updated = true;
@@ -341,10 +341,8 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist)
u64 *pt;
int mode;
- while (cmpxchg64(pte, pteval, 0) != pteval) {
+ while (!try_cmpxchg64(pte, &pteval, 0))
pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
- pteval = *pte;
- }
if (!IOMMU_PTE_PRESENT(pteval))
return;
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 840831d5d2ad..65b8e4fd8217 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -62,9 +62,6 @@
static DEFINE_SPINLOCK(pd_bitmap_lock);
-/* List of all available dev_data structures */
-static LLIST_HEAD(dev_data_list);
-
LIST_HEAD(ioapic_map);
LIST_HEAD(hpet_map);
LIST_HEAD(acpihid_map);
@@ -95,13 +92,6 @@ static void detach_device(struct device *dev);
*
****************************************************************************/
-static inline u16 get_pci_device_id(struct device *dev)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
-
- return pci_dev_id(pdev);
-}
-
static inline int get_acpihid_device_id(struct device *dev,
struct acpihid_map_entry **entry)
{
@@ -122,16 +112,74 @@ static inline int get_acpihid_device_id(struct device *dev,
return -EINVAL;
}
-static inline int get_device_id(struct device *dev)
+static inline int get_device_sbdf_id(struct device *dev)
{
- int devid;
+ int sbdf;
if (dev_is_pci(dev))
- devid = get_pci_device_id(dev);
+ sbdf = get_pci_sbdf_id(to_pci_dev(dev));
else
- devid = get_acpihid_device_id(dev, NULL);
+ sbdf = get_acpihid_device_id(dev, NULL);
+
+ return sbdf;
+}
+
+struct dev_table_entry *get_dev_table(struct amd_iommu *iommu)
+{
+ struct dev_table_entry *dev_table;
+ struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
+
+ BUG_ON(pci_seg == NULL);
+ dev_table = pci_seg->dev_table;
+ BUG_ON(dev_table == NULL);
+
+ return dev_table;
+}
+
+static inline u16 get_device_segment(struct device *dev)
+{
+ u16 seg;
+
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ seg = pci_domain_nr(pdev->bus);
+ } else {
+ u32 devid = get_acpihid_device_id(dev, NULL);
+
+ seg = PCI_SBDF_TO_SEGID(devid);
+ }
+
+ return seg;
+}
+
+/* Writes the specific IOMMU for a device into the PCI segment rlookup table */
+void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid)
+{
+ struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
+
+ pci_seg->rlookup_table[devid] = iommu;
+}
+
+static struct amd_iommu *__rlookup_amd_iommu(u16 seg, u16 devid)
+{
+ struct amd_iommu_pci_seg *pci_seg;
+
+ for_each_pci_segment(pci_seg) {
+ if (pci_seg->id == seg)
+ return pci_seg->rlookup_table[devid];
+ }
+ return NULL;
+}
- return devid;
+static struct amd_iommu *rlookup_amd_iommu(struct device *dev)
+{
+ u16 seg = get_device_segment(dev);
+ int devid = get_device_sbdf_id(dev);
+
+ if (devid < 0)
+ return NULL;
+ return __rlookup_amd_iommu(seg, PCI_SBDF_TO_DEVID(devid));
}
static struct protection_domain *to_pdomain(struct iommu_domain *dom)
@@ -139,9 +187,10 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
return container_of(dom, struct protection_domain, domain);
}
-static struct iommu_dev_data *alloc_dev_data(u16 devid)
+static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid)
{
struct iommu_dev_data *dev_data;
+ struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
if (!dev_data)
@@ -151,19 +200,20 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
dev_data->devid = devid;
ratelimit_default_init(&dev_data->rs);
- llist_add(&dev_data->dev_data_list, &dev_data_list);
+ llist_add(&dev_data->dev_data_list, &pci_seg->dev_data_list);
return dev_data;
}
-static struct iommu_dev_data *search_dev_data(u16 devid)
+static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid)
{
struct iommu_dev_data *dev_data;
struct llist_node *node;
+ struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
- if (llist_empty(&dev_data_list))
+ if (llist_empty(&pci_seg->dev_data_list))
return NULL;
- node = dev_data_list.first;
+ node = pci_seg->dev_data_list.first;
llist_for_each_entry(dev_data, node, dev_data_list) {
if (dev_data->devid == devid)
return dev_data;
@@ -174,67 +224,74 @@ static struct iommu_dev_data *search_dev_data(u16 devid)
static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
{
+ struct amd_iommu *iommu;
+ struct dev_table_entry *dev_table;
u16 devid = pci_dev_id(pdev);
if (devid == alias)
return 0;
- amd_iommu_rlookup_table[alias] =
- amd_iommu_rlookup_table[devid];
- memcpy(amd_iommu_dev_table[alias].data,
- amd_iommu_dev_table[devid].data,
- sizeof(amd_iommu_dev_table[alias].data));
+ iommu = rlookup_amd_iommu(&pdev->dev);
+ if (!iommu)
+ return 0;
+
+ amd_iommu_set_rlookup_table(iommu, alias);
+ dev_table = get_dev_table(iommu);
+ memcpy(dev_table[alias].data,
+ dev_table[devid].data,
+ sizeof(dev_table[alias].data));
return 0;
}
-static void clone_aliases(struct pci_dev *pdev)
+static void clone_aliases(struct amd_iommu *iommu, struct device *dev)
{
- if (!pdev)
+ struct pci_dev *pdev;
+
+ if (!dev_is_pci(dev))
return;
+ pdev = to_pci_dev(dev);
/*
* The IVRS alias stored in the alias table may not be
* part of the PCI DMA aliases if it's bus differs
* from the original device.
*/
- clone_alias(pdev, amd_iommu_alias_table[pci_dev_id(pdev)], NULL);
+ clone_alias(pdev, iommu->pci_seg->alias_table[pci_dev_id(pdev)], NULL);
pci_for_each_dma_alias(pdev, clone_alias, NULL);
}
-static struct pci_dev *setup_aliases(struct device *dev)
+static void setup_aliases(struct amd_iommu *iommu, struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
+ struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
u16 ivrs_alias;
/* For ACPI HID devices, there are no aliases */
if (!dev_is_pci(dev))
- return NULL;
+ return;
/*
* Add the IVRS alias to the pci aliases if it is on the same
* bus. The IVRS table may know about a quirk that we don't.
*/
- ivrs_alias = amd_iommu_alias_table[pci_dev_id(pdev)];
+ ivrs_alias = pci_seg->alias_table[pci_dev_id(pdev)];
if (ivrs_alias != pci_dev_id(pdev) &&
PCI_BUS_NUM(ivrs_alias) == pdev->bus->number)
pci_add_dma_alias(pdev, ivrs_alias & 0xff, 1);
- clone_aliases(pdev);
-
- return pdev;
+ clone_aliases(iommu, dev);
}
-static struct iommu_dev_data *find_dev_data(u16 devid)
+static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid)
{
struct iommu_dev_data *dev_data;
- struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
- dev_data = search_dev_data(devid);
+ dev_data = search_dev_data(iommu, devid);
if (dev_data == NULL) {
- dev_data = alloc_dev_data(devid);
+ dev_data = alloc_dev_data(iommu, devid);
if (!dev_data)
return NULL;
@@ -296,42 +353,49 @@ static bool pci_iommuv2_capable(struct pci_dev *pdev)
*/
static bool check_device(struct device *dev)
{
- int devid;
+ struct amd_iommu_pci_seg *pci_seg;
+ struct amd_iommu *iommu;
+ int devid, sbdf;
if (!dev)
return false;
- devid = get_device_id(dev);
- if (devid < 0)
+ sbdf = get_device_sbdf_id(dev);
+ if (sbdf < 0)
return false;
+ devid = PCI_SBDF_TO_DEVID(sbdf);
- /* Out of our scope? */
- if (devid > amd_iommu_last_bdf)
+ iommu = rlookup_amd_iommu(dev);
+ if (!iommu)
return false;
- if (amd_iommu_rlookup_table[devid] == NULL)
+ /* Out of our scope? */
+ pci_seg = iommu->pci_seg;
+ if (devid > pci_seg->last_bdf)
return false;
return true;
}
-static int iommu_init_device(struct device *dev)
+static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
{
struct iommu_dev_data *dev_data;
- int devid;
+ int devid, sbdf;
if (dev_iommu_priv_get(dev))
return 0;
- devid = get_device_id(dev);
- if (devid < 0)
- return devid;
+ sbdf = get_device_sbdf_id(dev);
+ if (sbdf < 0)
+ return sbdf;
- dev_data = find_dev_data(devid);
+ devid = PCI_SBDF_TO_DEVID(sbdf);
+ dev_data = find_dev_data(iommu, devid);
if (!dev_data)
return -ENOMEM;
- dev_data->pdev = setup_aliases(dev);
+ dev_data->dev = dev;
+ setup_aliases(iommu, dev);
/*
* By default we use passthrough mode for IOMMUv2 capable device.
@@ -341,9 +405,6 @@ static int iommu_init_device(struct device *dev)
*/
if ((iommu_default_passthrough() || !amd_iommu_force_isolation) &&
dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
- struct amd_iommu *iommu;
-
- iommu = amd_iommu_rlookup_table[dev_data->devid];
dev_data->iommu_v2 = iommu->is_iommu_v2;
}
@@ -352,18 +413,21 @@ static int iommu_init_device(struct device *dev)
return 0;
}
-static void iommu_ignore_device(struct device *dev)
+static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev)
{
- int devid;
+ struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
+ int devid, sbdf;
- devid = get_device_id(dev);
- if (devid < 0)
+ sbdf = get_device_sbdf_id(dev);
+ if (sbdf < 0)
return;
- amd_iommu_rlookup_table[devid] = NULL;
- memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
+ devid = PCI_SBDF_TO_DEVID(sbdf);
+ pci_seg->rlookup_table[devid] = NULL;
+ memset(&dev_table[devid], 0, sizeof(struct dev_table_entry));
- setup_aliases(dev);
+ setup_aliases(iommu, dev);
}
static void amd_iommu_uninit_device(struct device *dev)
@@ -391,13 +455,13 @@ static void amd_iommu_uninit_device(struct device *dev)
*
****************************************************************************/
-static void dump_dte_entry(u16 devid)
+static void dump_dte_entry(struct amd_iommu *iommu, u16 devid)
{
int i;
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
for (i = 0; i < 4; ++i)
- pr_err("DTE[%d]: %016llx\n", i,
- amd_iommu_dev_table[devid].data[i]);
+ pr_err("DTE[%d]: %016llx\n", i, dev_table[devid].data[i]);
}
static void dump_command(unsigned long phys_addr)
@@ -409,7 +473,7 @@ static void dump_command(unsigned long phys_addr)
pr_err("CMD[%d]: %08x\n", i, cmd->data[i]);
}
-static void amd_iommu_report_rmp_hw_error(volatile u32 *event)
+static void amd_iommu_report_rmp_hw_error(struct amd_iommu *iommu, volatile u32 *event)
{
struct iommu_dev_data *dev_data = NULL;
int devid, vmg_tag, flags;
@@ -421,7 +485,7 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event)
flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
spa = ((u64)event[3] << 32) | (event[2] & 0xFFFFFFF8);
- pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
+ pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid),
devid & 0xff);
if (pdev)
dev_data = dev_iommu_priv_get(&pdev->dev);
@@ -432,8 +496,8 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event)
vmg_tag, spa, flags);
}
} else {
- pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
- PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%04x:%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
+ iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
vmg_tag, spa, flags);
}
@@ -441,7 +505,7 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event)
pci_dev_put(pdev);
}
-static void amd_iommu_report_rmp_fault(volatile u32 *event)
+static void amd_iommu_report_rmp_fault(struct amd_iommu *iommu, volatile u32 *event)
{
struct iommu_dev_data *dev_data = NULL;
int devid, flags_rmp, vmg_tag, flags;
@@ -454,7 +518,7 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event)
flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
gpa = ((u64)event[3] << 32) | event[2];
- pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
+ pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid),
devid & 0xff);
if (pdev)
dev_data = dev_iommu_priv_get(&pdev->dev);
@@ -465,8 +529,8 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event)
vmg_tag, gpa, flags_rmp, flags);
}
} else {
- pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
- PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%04x:%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
+ iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
vmg_tag, gpa, flags_rmp, flags);
}
@@ -480,13 +544,14 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event)
#define IS_WRITE_REQUEST(flags) \
((flags) & EVENT_FLAG_RW)
-static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
+static void amd_iommu_report_page_fault(struct amd_iommu *iommu,
+ u16 devid, u16 domain_id,
u64 address, int flags)
{
struct iommu_dev_data *dev_data = NULL;
struct pci_dev *pdev;
- pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
+ pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid),
devid & 0xff);
if (pdev)
dev_data = dev_iommu_priv_get(&pdev->dev);
@@ -511,8 +576,8 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
domain_id, address, flags);
}
} else {
- pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
- PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%04x:%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
+ iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
domain_id, address, flags);
}
@@ -549,26 +614,26 @@ retry:
}
if (type == EVENT_TYPE_IO_FAULT) {
- amd_iommu_report_page_fault(devid, pasid, address, flags);
+ amd_iommu_report_page_fault(iommu, devid, pasid, address, flags);
return;
}
switch (type) {
case EVENT_TYPE_ILL_DEV:
- dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
- PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
+ iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
- dump_dte_entry(devid);
+ dump_dte_entry(iommu, devid);
break;
case EVENT_TYPE_DEV_TAB_ERR:
- dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+ dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%04x:%02x:%02x.%x "
"address=0x%llx flags=0x%04x]\n",
- PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address, flags);
break;
case EVENT_TYPE_PAGE_TAB_ERR:
- dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n",
- PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%04x:%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n",
+ iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
case EVENT_TYPE_ILL_CMD:
@@ -580,26 +645,26 @@ retry:
address, flags);
break;
case EVENT_TYPE_IOTLB_INV_TO:
- dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%llx]\n",
- PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%04x:%02x:%02x.%x address=0x%llx]\n",
+ iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address);
break;
case EVENT_TYPE_INV_DEV_REQ:
- dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
- PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
+ iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
case EVENT_TYPE_RMP_FAULT:
- amd_iommu_report_rmp_fault(event);
+ amd_iommu_report_rmp_fault(iommu, event);
break;
case EVENT_TYPE_RMP_HW_ERR:
- amd_iommu_report_rmp_hw_error(event);
+ amd_iommu_report_rmp_hw_error(iommu, event);
break;
case EVENT_TYPE_INV_PPR_REQ:
pasid = PPR_PASID(*((u64 *)__evt));
tag = event[1] & 0x03FF;
- dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n",
- PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n",
+ iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags, tag);
break;
default:
@@ -636,7 +701,7 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
fault.address = raw[1];
fault.pasid = PPR_PASID(raw[0]);
- fault.device_id = PPR_DEVID(raw[0]);
+ fault.sbdf = PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, PPR_DEVID(raw[0]));
fault.tag = PPR_TAG(raw[0]);
fault.flags = PPR_FLAGS(raw[0]);
@@ -1125,8 +1190,9 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
static void amd_iommu_flush_dte_all(struct amd_iommu *iommu)
{
u32 devid;
+ u16 last_bdf = iommu->pci_seg->last_bdf;
- for (devid = 0; devid <= 0xffff; ++devid)
+ for (devid = 0; devid <= last_bdf; ++devid)
iommu_flush_dte(iommu, devid);
iommu_completion_wait(iommu);
@@ -1139,8 +1205,9 @@ static void amd_iommu_flush_dte_all(struct amd_iommu *iommu)
static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu)
{
u32 dom_id;
+ u16 last_bdf = iommu->pci_seg->last_bdf;
- for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
+ for (dom_id = 0; dom_id <= last_bdf; ++dom_id) {
struct iommu_cmd cmd;
build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
dom_id, 1);
@@ -1183,8 +1250,9 @@ static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid)
static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
{
u32 devid;
+ u16 last_bdf = iommu->pci_seg->last_bdf;
- for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++)
+ for (devid = 0; devid <= last_bdf; devid++)
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
@@ -1212,7 +1280,9 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data,
int qdep;
qdep = dev_data->ats.qdep;
- iommu = amd_iommu_rlookup_table[dev_data->devid];
+ iommu = rlookup_amd_iommu(dev_data->dev);
+ if (!iommu)
+ return -EINVAL;
build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size);
@@ -1232,20 +1302,28 @@ static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data)
static int device_flush_dte(struct iommu_dev_data *dev_data)
{
struct amd_iommu *iommu;
+ struct pci_dev *pdev = NULL;
+ struct amd_iommu_pci_seg *pci_seg;
u16 alias;
int ret;
- iommu = amd_iommu_rlookup_table[dev_data->devid];
+ iommu = rlookup_amd_iommu(dev_data->dev);
+ if (!iommu)
+ return -EINVAL;
- if (dev_data->pdev)
- ret = pci_for_each_dma_alias(dev_data->pdev,
+ if (dev_is_pci(dev_data->dev))
+ pdev = to_pci_dev(dev_data->dev);
+
+ if (pdev)
+ ret = pci_for_each_dma_alias(pdev,
device_flush_dte_alias, iommu);
else
ret = iommu_flush_dte(iommu, dev_data->devid);
if (ret)
return ret;
- alias = amd_iommu_alias_table[dev_data->devid];
+ pci_seg = iommu->pci_seg;
+ alias = pci_seg->alias_table[dev_data->devid];
if (alias != dev_data->devid) {
ret = iommu_flush_dte(iommu, alias);
if (ret)
@@ -1461,28 +1539,35 @@ static void free_gcr3_table(struct protection_domain *domain)
free_page((unsigned long)domain->gcr3_tbl);
}
-static void set_dte_entry(u16 devid, struct protection_domain *domain,
- bool ats, bool ppr)
+static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
+ struct protection_domain *domain, bool ats, bool ppr)
{
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
if (domain->iop.mode != PAGE_MODE_NONE)
pte_root = iommu_virt_to_phys(domain->iop.root);
pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
- pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
- flags = amd_iommu_dev_table[devid].data[1];
+ pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V;
+
+ /*
+ * When SNP is enabled, Only set TV bit when IOMMU
+ * page translation is in use.
+ */
+ if (!amd_iommu_snp_en || (domain->id != 0))
+ pte_root |= DTE_FLAG_TV;
+
+ flags = dev_table[devid].data[1];
if (ats)
flags |= DTE_FLAG_IOTLB;
if (ppr) {
- struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
-
if (iommu_feature(iommu, FEATURE_EPHSUP))
pte_root |= 1ULL << DEV_ENTRY_PPR;
}
@@ -1516,9 +1601,9 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain,
flags &= ~DEV_DOMID_MASK;
flags |= domain->id;
- old_domid = amd_iommu_dev_table[devid].data[1] & DEV_DOMID_MASK;
- amd_iommu_dev_table[devid].data[1] = flags;
- amd_iommu_dev_table[devid].data[0] = pte_root;
+ old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK;
+ dev_table[devid].data[1] = flags;
+ dev_table[devid].data[0] = pte_root;
/*
* A kdump kernel might be replacing a domain ID that was copied from
@@ -1526,19 +1611,23 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain,
* entries for the old domain ID that is being overwritten
*/
if (old_domid) {
- struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
-
amd_iommu_flush_tlb_domid(iommu, old_domid);
}
}
-static void clear_dte_entry(u16 devid)
+static void clear_dte_entry(struct amd_iommu *iommu, u16 devid)
{
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
+
/* remove entry from the device table seen by the hardware */
- amd_iommu_dev_table[devid].data[0] = DTE_FLAG_V | DTE_FLAG_TV;
- amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
+ dev_table[devid].data[0] = DTE_FLAG_V;
+
+ if (!amd_iommu_snp_en)
+ dev_table[devid].data[0] |= DTE_FLAG_TV;
+
+ dev_table[devid].data[1] &= DTE_FLAG_MASK;
- amd_iommu_apply_erratum_63(devid);
+ amd_iommu_apply_erratum_63(iommu, devid);
}
static void do_attach(struct iommu_dev_data *dev_data,
@@ -1547,7 +1636,9 @@ static void do_attach(struct iommu_dev_data *dev_data,
struct amd_iommu *iommu;
bool ats;
- iommu = amd_iommu_rlookup_table[dev_data->devid];
+ iommu = rlookup_amd_iommu(dev_data->dev);
+ if (!iommu)
+ return;
ats = dev_data->ats.enabled;
/* Update data structures */
@@ -1559,9 +1650,9 @@ static void do_attach(struct iommu_dev_data *dev_data,
domain->dev_cnt += 1;
/* Update device table */
- set_dte_entry(dev_data->devid, domain,
+ set_dte_entry(iommu, dev_data->devid, domain,
ats, dev_data->iommu_v2);
- clone_aliases(dev_data->pdev);
+ clone_aliases(iommu, dev_data->dev);
device_flush_dte(dev_data);
}
@@ -1571,13 +1662,15 @@ static void do_detach(struct iommu_dev_data *dev_data)
struct protection_domain *domain = dev_data->domain;
struct amd_iommu *iommu;
- iommu = amd_iommu_rlookup_table[dev_data->devid];
+ iommu = rlookup_amd_iommu(dev_data->dev);
+ if (!iommu)
+ return;
/* Update data structures */
dev_data->domain = NULL;
list_del(&dev_data->list);
- clear_dte_entry(dev_data->devid);
- clone_aliases(dev_data->pdev);
+ clear_dte_entry(iommu, dev_data->devid);
+ clone_aliases(iommu, dev_data->dev);
/* Flush the DTE entry */
device_flush_dte(dev_data);
@@ -1749,23 +1842,24 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
{
struct iommu_device *iommu_dev;
struct amd_iommu *iommu;
- int ret, devid;
+ int ret;
if (!check_device(dev))
return ERR_PTR(-ENODEV);
- devid = get_device_id(dev);
- iommu = amd_iommu_rlookup_table[devid];
+ iommu = rlookup_amd_iommu(dev);
+ if (!iommu)
+ return ERR_PTR(-ENODEV);
if (dev_iommu_priv_get(dev))
return &iommu->iommu;
- ret = iommu_init_device(dev);
+ ret = iommu_init_device(iommu, dev);
if (ret) {
if (ret != -ENOTSUPP)
dev_err(dev, "Failed to initialize - trying to proceed anyway\n");
iommu_dev = ERR_PTR(ret);
- iommu_ignore_device(dev);
+ iommu_ignore_device(iommu, dev);
} else {
amd_iommu_set_pci_msi_domain(dev, iommu);
iommu_dev = &iommu->iommu;
@@ -1785,13 +1879,14 @@ static void amd_iommu_probe_finalize(struct device *dev)
static void amd_iommu_release_device(struct device *dev)
{
- int devid = get_device_id(dev);
struct amd_iommu *iommu;
if (!check_device(dev))
return;
- iommu = amd_iommu_rlookup_table[devid];
+ iommu = rlookup_amd_iommu(dev);
+ if (!iommu)
+ return;
amd_iommu_uninit_device(dev);
iommu_completion_wait(iommu);
@@ -1816,9 +1911,13 @@ static void update_device_table(struct protection_domain *domain)
struct iommu_dev_data *dev_data;
list_for_each_entry(dev_data, &domain->dev_list, list) {
- set_dte_entry(dev_data->devid, domain,
+ struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev);
+
+ if (!iommu)
+ continue;
+ set_dte_entry(iommu, dev_data->devid, domain,
dev_data->ats.enabled, dev_data->iommu_v2);
- clone_aliases(dev_data->pdev);
+ clone_aliases(iommu, dev_data->dev);
}
}
@@ -1969,6 +2068,13 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
{
struct protection_domain *domain;
+ /*
+ * Since DTE[Mode]=0 is prohibited on SNP-enabled system,
+ * default to use IOMMU_DOMAIN_DMA[_FQ].
+ */
+ if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY))
+ return NULL;
+
domain = protection_domain_alloc(type);
if (!domain)
return NULL;
@@ -2004,7 +2110,6 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
struct device *dev)
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
- int devid = get_device_id(dev);
struct amd_iommu *iommu;
if (!check_device(dev))
@@ -2013,7 +2118,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
if (dev_data->domain != NULL)
detach_device(dev);
- iommu = amd_iommu_rlookup_table[devid];
+ iommu = rlookup_amd_iommu(dev);
if (!iommu)
return;
@@ -2040,7 +2145,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
dev_data = dev_iommu_priv_get(dev);
dev_data->defer_attach = false;
- iommu = amd_iommu_rlookup_table[dev_data->devid];
+ iommu = rlookup_amd_iommu(dev);
if (!iommu)
return -EINVAL;
@@ -2169,13 +2274,21 @@ static void amd_iommu_get_resv_regions(struct device *dev,
{
struct iommu_resv_region *region;
struct unity_map_entry *entry;
- int devid;
+ struct amd_iommu *iommu;
+ struct amd_iommu_pci_seg *pci_seg;
+ int devid, sbdf;
- devid = get_device_id(dev);
- if (devid < 0)
+ sbdf = get_device_sbdf_id(dev);
+ if (sbdf < 0)
+ return;
+
+ devid = PCI_SBDF_TO_DEVID(sbdf);
+ iommu = rlookup_amd_iommu(dev);
+ if (!iommu)
return;
+ pci_seg = iommu->pci_seg;
- list_for_each_entry(entry, &amd_iommu_unity_map, list) {
+ list_for_each_entry(entry, &pci_seg->unity_map, list) {
int type, prot = 0;
size_t length;
@@ -2280,7 +2393,6 @@ const struct iommu_ops amd_iommu_ops = {
.probe_finalize = amd_iommu_probe_finalize,
.device_group = amd_iommu_device_group,
.get_resv_regions = amd_iommu_get_resv_regions,
- .put_resv_regions = generic_iommu_put_resv_regions,
.is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
.def_domain_type = amd_iommu_def_domain_type,
@@ -2419,8 +2531,9 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid,
continue;
qdep = dev_data->ats.qdep;
- iommu = amd_iommu_rlookup_table[dev_data->devid];
-
+ iommu = rlookup_amd_iommu(dev_data->dev);
+ if (!iommu)
+ continue;
build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid,
qdep, address, size);
@@ -2582,7 +2695,9 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
struct iommu_cmd cmd;
dev_data = dev_iommu_priv_get(&pdev->dev);
- iommu = amd_iommu_rlookup_table[dev_data->devid];
+ iommu = rlookup_amd_iommu(&pdev->dev);
+ if (!iommu)
+ return -ENODEV;
build_complete_ppr(&cmd, dev_data->devid, pasid, status,
tag, dev_data->pri_tlp);
@@ -2644,30 +2759,35 @@ EXPORT_SYMBOL(amd_iommu_device_info);
static struct irq_chip amd_ir_chip;
static DEFINE_SPINLOCK(iommu_table_lock);
-static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
+static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
+ struct irq_remap_table *table)
{
u64 dte;
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
- dte = amd_iommu_dev_table[devid].data[2];
+ dte = dev_table[devid].data[2];
dte &= ~DTE_IRQ_PHYS_ADDR_MASK;
dte |= iommu_virt_to_phys(table->table);
dte |= DTE_IRQ_REMAP_INTCTL;
dte |= DTE_INTTABLEN;
dte |= DTE_IRQ_REMAP_ENABLE;
- amd_iommu_dev_table[devid].data[2] = dte;
+ dev_table[devid].data[2] = dte;
}
-static struct irq_remap_table *get_irq_table(u16 devid)
+static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid)
{
struct irq_remap_table *table;
+ struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
- if (WARN_ONCE(!amd_iommu_rlookup_table[devid],
- "%s: no iommu for devid %x\n", __func__, devid))
+ if (WARN_ONCE(!pci_seg->rlookup_table[devid],
+ "%s: no iommu for devid %x:%x\n",
+ __func__, pci_seg->id, devid))
return NULL;
- table = irq_lookup_table[devid];
- if (WARN_ONCE(!table, "%s: no table for devid %x\n", __func__, devid))
+ table = pci_seg->irq_lookup_table[devid];
+ if (WARN_ONCE(!table, "%s: no table for devid %x:%x\n",
+ __func__, pci_seg->id, devid))
return NULL;
return table;
@@ -2700,8 +2820,10 @@ static struct irq_remap_table *__alloc_irq_table(void)
static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid,
struct irq_remap_table *table)
{
- irq_lookup_table[devid] = table;
- set_dte_irq_entry(devid, table);
+ struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
+
+ pci_seg->irq_lookup_table[devid] = table;
+ set_dte_irq_entry(iommu, devid, table);
iommu_flush_dte(iommu, devid);
}
@@ -2709,35 +2831,38 @@ static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias,
void *data)
{
struct irq_remap_table *table = data;
+ struct amd_iommu_pci_seg *pci_seg;
+ struct amd_iommu *iommu = rlookup_amd_iommu(&pdev->dev);
- irq_lookup_table[alias] = table;
- set_dte_irq_entry(alias, table);
+ if (!iommu)
+ return -EINVAL;
- iommu_flush_dte(amd_iommu_rlookup_table[alias], alias);
+ pci_seg = iommu->pci_seg;
+ pci_seg->irq_lookup_table[alias] = table;
+ set_dte_irq_entry(iommu, alias, table);
+ iommu_flush_dte(pci_seg->rlookup_table[alias], alias);
return 0;
}
-static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev)
+static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
+ u16 devid, struct pci_dev *pdev)
{
struct irq_remap_table *table = NULL;
struct irq_remap_table *new_table = NULL;
- struct amd_iommu *iommu;
+ struct amd_iommu_pci_seg *pci_seg;
unsigned long flags;
u16 alias;
spin_lock_irqsave(&iommu_table_lock, flags);
- iommu = amd_iommu_rlookup_table[devid];
- if (!iommu)
- goto out_unlock;
-
- table = irq_lookup_table[devid];
+ pci_seg = iommu->pci_seg;
+ table = pci_seg->irq_lookup_table[devid];
if (table)
goto out_unlock;
- alias = amd_iommu_alias_table[devid];
- table = irq_lookup_table[alias];
+ alias = pci_seg->alias_table[devid];
+ table = pci_seg->irq_lookup_table[alias];
if (table) {
set_remap_table_entry(iommu, devid, table);
goto out_wait;
@@ -2751,11 +2876,11 @@ static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev)
spin_lock_irqsave(&iommu_table_lock, flags);
- table = irq_lookup_table[devid];
+ table = pci_seg->irq_lookup_table[devid];
if (table)
goto out_unlock;
- table = irq_lookup_table[alias];
+ table = pci_seg->irq_lookup_table[alias];
if (table) {
set_remap_table_entry(iommu, devid, table);
goto out_wait;
@@ -2786,18 +2911,14 @@ out_unlock:
return table;
}
-static int alloc_irq_index(u16 devid, int count, bool align,
- struct pci_dev *pdev)
+static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count,
+ bool align, struct pci_dev *pdev)
{
struct irq_remap_table *table;
int index, c, alignment = 1;
unsigned long flags;
- struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
- if (!iommu)
- return -ENODEV;
-
- table = alloc_irq_table(devid, pdev);
+ table = alloc_irq_table(iommu, devid, pdev);
if (!table)
return -ENODEV;
@@ -2836,20 +2957,15 @@ out:
return index;
}
-static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte,
- struct amd_ir_data *data)
+static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
+ struct irte_ga *irte, struct amd_ir_data *data)
{
bool ret;
struct irq_remap_table *table;
- struct amd_iommu *iommu;
unsigned long flags;
struct irte_ga *entry;
- iommu = amd_iommu_rlookup_table[devid];
- if (iommu == NULL)
- return -EINVAL;
-
- table = get_irq_table(devid);
+ table = get_irq_table(iommu, devid);
if (!table)
return -ENOMEM;
@@ -2880,17 +2996,13 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte,
return 0;
}
-static int modify_irte(u16 devid, int index, union irte *irte)
+static int modify_irte(struct amd_iommu *iommu,
+ u16 devid, int index, union irte *irte)
{
struct irq_remap_table *table;
- struct amd_iommu *iommu;
unsigned long flags;
- iommu = amd_iommu_rlookup_table[devid];
- if (iommu == NULL)
- return -EINVAL;
-
- table = get_irq_table(devid);
+ table = get_irq_table(iommu, devid);
if (!table)
return -ENOMEM;
@@ -2904,17 +3016,12 @@ static int modify_irte(u16 devid, int index, union irte *irte)
return 0;
}
-static void free_irte(u16 devid, int index)
+static void free_irte(struct amd_iommu *iommu, u16 devid, int index)
{
struct irq_remap_table *table;
- struct amd_iommu *iommu;
unsigned long flags;
- iommu = amd_iommu_rlookup_table[devid];
- if (iommu == NULL)
- return;
-
- table = get_irq_table(devid);
+ table = get_irq_table(iommu, devid);
if (!table)
return;
@@ -2956,49 +3063,49 @@ static void irte_ga_prepare(void *entry,
irte->lo.fields_remap.valid = 1;
}
-static void irte_activate(void *entry, u16 devid, u16 index)
+static void irte_activate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index)
{
union irte *irte = (union irte *) entry;
irte->fields.valid = 1;
- modify_irte(devid, index, irte);
+ modify_irte(iommu, devid, index, irte);
}
-static void irte_ga_activate(void *entry, u16 devid, u16 index)
+static void irte_ga_activate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index)
{
struct irte_ga *irte = (struct irte_ga *) entry;
irte->lo.fields_remap.valid = 1;
- modify_irte_ga(devid, index, irte, NULL);
+ modify_irte_ga(iommu, devid, index, irte, NULL);
}
-static void irte_deactivate(void *entry, u16 devid, u16 index)
+static void irte_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index)
{
union irte *irte = (union irte *) entry;
irte->fields.valid = 0;
- modify_irte(devid, index, irte);
+ modify_irte(iommu, devid, index, irte);
}
-static void irte_ga_deactivate(void *entry, u16 devid, u16 index)
+static void irte_ga_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index)
{
struct irte_ga *irte = (struct irte_ga *) entry;
irte->lo.fields_remap.valid = 0;
- modify_irte_ga(devid, index, irte, NULL);
+ modify_irte_ga(iommu, devid, index, irte, NULL);
}
-static void irte_set_affinity(void *entry, u16 devid, u16 index,
+static void irte_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index,
u8 vector, u32 dest_apicid)
{
union irte *irte = (union irte *) entry;
irte->fields.vector = vector;
irte->fields.destination = dest_apicid;
- modify_irte(devid, index, irte);
+ modify_irte(iommu, devid, index, irte);
}
-static void irte_ga_set_affinity(void *entry, u16 devid, u16 index,
+static void irte_ga_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index,
u8 vector, u32 dest_apicid)
{
struct irte_ga *irte = (struct irte_ga *) entry;
@@ -3009,7 +3116,7 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index,
APICID_TO_IRTE_DEST_LO(dest_apicid);
irte->hi.fields.destination =
APICID_TO_IRTE_DEST_HI(dest_apicid);
- modify_irte_ga(devid, index, irte, NULL);
+ modify_irte_ga(iommu, devid, index, irte, NULL);
}
}
@@ -3068,7 +3175,7 @@ static int get_devid(struct irq_alloc_info *info)
return get_hpet_devid(info->devid);
case X86_IRQ_ALLOC_TYPE_PCI_MSI:
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
- return get_device_id(msi_desc_to_dev(info->desc));
+ return get_device_sbdf_id(msi_desc_to_dev(info->desc));
default:
WARN_ON_ONCE(1);
return -1;
@@ -3097,7 +3204,7 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data,
int devid, int index, int sub_handle)
{
struct irq_2_irte *irte_info = &data->irq_2_irte;
- struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+ struct amd_iommu *iommu = data->iommu;
if (!iommu)
return;
@@ -3148,8 +3255,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
struct irq_alloc_info *info = arg;
struct irq_data *irq_data;
struct amd_ir_data *data = NULL;
+ struct amd_iommu *iommu;
struct irq_cfg *cfg;
- int i, ret, devid;
+ int i, ret, devid, seg, sbdf;
int index;
if (!info)
@@ -3165,8 +3273,14 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI)
info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
- devid = get_devid(info);
- if (devid < 0)
+ sbdf = get_devid(info);
+ if (sbdf < 0)
+ return -EINVAL;
+
+ seg = PCI_SBDF_TO_SEGID(sbdf);
+ devid = PCI_SBDF_TO_DEVID(sbdf);
+ iommu = __rlookup_amd_iommu(seg, devid);
+ if (!iommu)
return -EINVAL;
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
@@ -3175,9 +3289,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
struct irq_remap_table *table;
- struct amd_iommu *iommu;
- table = alloc_irq_table(devid, NULL);
+ table = alloc_irq_table(iommu, devid, NULL);
if (table) {
if (!table->min_index) {
/*
@@ -3185,7 +3298,6 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
* interrupts.
*/
table->min_index = 32;
- iommu = amd_iommu_rlookup_table[devid];
for (i = 0; i < 32; ++i)
iommu->irte_ops->set_allocated(table, i);
}
@@ -3198,10 +3310,10 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX) {
bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI);
- index = alloc_irq_index(devid, nr_irqs, align,
+ index = alloc_irq_index(iommu, devid, nr_irqs, align,
msi_desc_to_pci_dev(info->desc));
} else {
- index = alloc_irq_index(devid, nr_irqs, false, NULL);
+ index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL);
}
if (index < 0) {
@@ -3233,6 +3345,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
goto out_free_data;
}
+ data->iommu = iommu;
irq_data->hwirq = (devid << 16) + i;
irq_data->chip_data = data;
irq_data->chip = &amd_ir_chip;
@@ -3249,7 +3362,7 @@ out_free_data:
kfree(irq_data->chip_data);
}
for (i = 0; i < nr_irqs; i++)
- free_irte(devid, index + i);
+ free_irte(iommu, devid, index + i);
out_free_parent:
irq_domain_free_irqs_common(domain, virq, nr_irqs);
return ret;
@@ -3268,7 +3381,7 @@ static void irq_remapping_free(struct irq_domain *domain, unsigned int virq,
if (irq_data && irq_data->chip_data) {
data = irq_data->chip_data;
irte_info = &data->irq_2_irte;
- free_irte(irte_info->devid, irte_info->index);
+ free_irte(data->iommu, irte_info->devid, irte_info->index);
kfree(data->entry);
kfree(data);
}
@@ -3286,13 +3399,13 @@ static int irq_remapping_activate(struct irq_domain *domain,
{
struct amd_ir_data *data = irq_data->chip_data;
struct irq_2_irte *irte_info = &data->irq_2_irte;
- struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
+ struct amd_iommu *iommu = data->iommu;
struct irq_cfg *cfg = irqd_cfg(irq_data);
if (!iommu)
return 0;
- iommu->irte_ops->activate(data->entry, irte_info->devid,
+ iommu->irte_ops->activate(iommu, data->entry, irte_info->devid,
irte_info->index);
amd_ir_update_irte(irq_data, iommu, data, irte_info, cfg);
return 0;
@@ -3303,10 +3416,10 @@ static void irq_remapping_deactivate(struct irq_domain *domain,
{
struct amd_ir_data *data = irq_data->chip_data;
struct irq_2_irte *irte_info = &data->irq_2_irte;
- struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
+ struct amd_iommu *iommu = data->iommu;
if (iommu)
- iommu->irte_ops->deactivate(data->entry, irte_info->devid,
+ iommu->irte_ops->deactivate(iommu, data->entry, irte_info->devid,
irte_info->index);
}
@@ -3326,8 +3439,8 @@ static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec,
if (devid < 0)
return 0;
+ iommu = __rlookup_amd_iommu((devid >> 16), (devid & 0xffff));
- iommu = amd_iommu_rlookup_table[devid];
return iommu && iommu->ir_domain == d;
}
@@ -3361,7 +3474,7 @@ int amd_iommu_activate_guest_mode(void *data)
entry->hi.fields.vector = ir_data->ga_vector;
entry->lo.fields_vapic.ga_tag = ir_data->ga_tag;
- return modify_irte_ga(ir_data->irq_2_irte.devid,
+ return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
ir_data->irq_2_irte.index, entry, ir_data);
}
EXPORT_SYMBOL(amd_iommu_activate_guest_mode);
@@ -3391,7 +3504,7 @@ int amd_iommu_deactivate_guest_mode(void *data)
entry->hi.fields.destination =
APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
- return modify_irte_ga(ir_data->irq_2_irte.devid,
+ return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
ir_data->irq_2_irte.index, entry, ir_data);
}
EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
@@ -3399,12 +3512,16 @@ EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
{
int ret;
- struct amd_iommu *iommu;
struct amd_iommu_pi_data *pi_data = vcpu_info;
struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data;
struct amd_ir_data *ir_data = data->chip_data;
struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
- struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid);
+ struct iommu_dev_data *dev_data;
+
+ if (ir_data->iommu == NULL)
+ return -EINVAL;
+
+ dev_data = search_dev_data(ir_data->iommu, irte_info->devid);
/* Note:
* This device has never been set up for guest mode.
@@ -3426,10 +3543,6 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
pi_data->is_guest_mode = false;
}
- iommu = amd_iommu_rlookup_table[irte_info->devid];
- if (iommu == NULL)
- return -EINVAL;
-
pi_data->prev_ga_tag = ir_data->cached_ga_tag;
if (pi_data->is_guest_mode) {
ir_data->ga_root_ptr = (pi_data->base >> 12);
@@ -3463,7 +3576,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
* Atomically updates the IRTE with the new destination, vector
* and flushes the interrupt entry cache.
*/
- iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid,
+ iommu->irte_ops->set_affinity(iommu, ir_data->entry, irte_info->devid,
irte_info->index, cfg->vector,
cfg->dest_apicid);
}
@@ -3475,7 +3588,7 @@ static int amd_ir_set_affinity(struct irq_data *data,
struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
struct irq_cfg *cfg = irqd_cfg(data);
struct irq_data *parent = data->parent_data;
- struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
+ struct amd_iommu *iommu = ir_data->iommu;
int ret;
if (!iommu)
@@ -3545,11 +3658,11 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
!ref || !entry || !entry->lo.fields_vapic.guest_mode)
return 0;
- iommu = amd_iommu_rlookup_table[devid];
+ iommu = ir_data->iommu;
if (!iommu)
return -ENODEV;
- table = get_irq_table(devid);
+ table = get_irq_table(iommu, devid);
if (!table)
return -ENODEV;
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index afb3efd565b7..696d5555be57 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -51,7 +51,7 @@ struct pasid_state {
struct device_state {
struct list_head list;
- u16 devid;
+ u32 sbdf;
atomic_t count;
struct pci_dev *pdev;
struct pasid_state **states;
@@ -83,35 +83,25 @@ static struct workqueue_struct *iommu_wq;
static void free_pasid_states(struct device_state *dev_state);
-static u16 device_id(struct pci_dev *pdev)
-{
- u16 devid;
-
- devid = pdev->bus->number;
- devid = (devid << 8) | pdev->devfn;
-
- return devid;
-}
-
-static struct device_state *__get_device_state(u16 devid)
+static struct device_state *__get_device_state(u32 sbdf)
{
struct device_state *dev_state;
list_for_each_entry(dev_state, &state_list, list) {
- if (dev_state->devid == devid)
+ if (dev_state->sbdf == sbdf)
return dev_state;
}
return NULL;
}
-static struct device_state *get_device_state(u16 devid)
+static struct device_state *get_device_state(u32 sbdf)
{
struct device_state *dev_state;
unsigned long flags;
spin_lock_irqsave(&state_lock, flags);
- dev_state = __get_device_state(devid);
+ dev_state = __get_device_state(sbdf);
if (dev_state != NULL)
atomic_inc(&dev_state->count);
spin_unlock_irqrestore(&state_lock, flags);
@@ -528,15 +518,16 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
unsigned long flags;
struct fault *fault;
bool finish;
- u16 tag, devid;
+ u16 tag, devid, seg_id;
int ret;
iommu_fault = data;
tag = iommu_fault->tag & 0x1ff;
finish = (iommu_fault->tag >> 9) & 1;
- devid = iommu_fault->device_id;
- pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
+ seg_id = PCI_SBDF_TO_SEGID(iommu_fault->sbdf);
+ devid = PCI_SBDF_TO_DEVID(iommu_fault->sbdf);
+ pdev = pci_get_domain_bus_and_slot(seg_id, PCI_BUS_NUM(devid),
devid & 0xff);
if (!pdev)
return -ENODEV;
@@ -550,7 +541,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
goto out;
}
- dev_state = get_device_state(iommu_fault->device_id);
+ dev_state = get_device_state(iommu_fault->sbdf);
if (dev_state == NULL)
goto out;
@@ -609,7 +600,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
struct pasid_state *pasid_state;
struct device_state *dev_state;
struct mm_struct *mm;
- u16 devid;
+ u32 sbdf;
int ret;
might_sleep();
@@ -617,8 +608,8 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
if (!amd_iommu_v2_supported())
return -ENODEV;
- devid = device_id(pdev);
- dev_state = get_device_state(devid);
+ sbdf = get_pci_sbdf_id(pdev);
+ dev_state = get_device_state(sbdf);
if (dev_state == NULL)
return -EINVAL;
@@ -692,15 +683,15 @@ void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid)
{
struct pasid_state *pasid_state;
struct device_state *dev_state;
- u16 devid;
+ u32 sbdf;
might_sleep();
if (!amd_iommu_v2_supported())
return;
- devid = device_id(pdev);
- dev_state = get_device_state(devid);
+ sbdf = get_pci_sbdf_id(pdev);
+ dev_state = get_device_state(sbdf);
if (dev_state == NULL)
return;
@@ -742,7 +733,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
struct iommu_group *group;
unsigned long flags;
int ret, tmp;
- u16 devid;
+ u32 sbdf;
might_sleep();
@@ -759,7 +750,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
if (pasids <= 0 || pasids > (PASID_MASK + 1))
return -EINVAL;
- devid = device_id(pdev);
+ sbdf = get_pci_sbdf_id(pdev);
dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
if (dev_state == NULL)
@@ -768,7 +759,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
spin_lock_init(&dev_state->lock);
init_waitqueue_head(&dev_state->wq);
dev_state->pdev = pdev;
- dev_state->devid = devid;
+ dev_state->sbdf = sbdf;
tmp = pasids;
for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
@@ -806,7 +797,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
spin_lock_irqsave(&state_lock, flags);
- if (__get_device_state(devid) != NULL) {
+ if (__get_device_state(sbdf) != NULL) {
spin_unlock_irqrestore(&state_lock, flags);
ret = -EBUSY;
goto out_free_domain;
@@ -838,16 +829,16 @@ void amd_iommu_free_device(struct pci_dev *pdev)
{
struct device_state *dev_state;
unsigned long flags;
- u16 devid;
+ u32 sbdf;
if (!amd_iommu_v2_supported())
return;
- devid = device_id(pdev);
+ sbdf = get_pci_sbdf_id(pdev);
spin_lock_irqsave(&state_lock, flags);
- dev_state = __get_device_state(devid);
+ dev_state = __get_device_state(sbdf);
if (dev_state == NULL) {
spin_unlock_irqrestore(&state_lock, flags);
return;
@@ -867,18 +858,18 @@ int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
{
struct device_state *dev_state;
unsigned long flags;
- u16 devid;
+ u32 sbdf;
int ret;
if (!amd_iommu_v2_supported())
return -ENODEV;
- devid = device_id(pdev);
+ sbdf = get_pci_sbdf_id(pdev);
spin_lock_irqsave(&state_lock, flags);
ret = -EINVAL;
- dev_state = __get_device_state(devid);
+ dev_state = __get_device_state(sbdf);
if (dev_state == NULL)
goto out_unlock;
@@ -898,18 +889,18 @@ int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
{
struct device_state *dev_state;
unsigned long flags;
- u16 devid;
+ u32 sbdf;
int ret;
if (!amd_iommu_v2_supported())
return -ENODEV;
- devid = device_id(pdev);
+ sbdf = get_pci_sbdf_id(pdev);
spin_lock_irqsave(&state_lock, flags);
ret = -EINVAL;
- dev_state = __get_device_state(devid);
+ dev_state = __get_device_state(sbdf);
if (dev_state == NULL)
goto out_unlock;
diff --git a/drivers/iommu/amd/quirks.c b/drivers/iommu/amd/quirks.c
index 5120ce4fdce3..79dbb8f33b47 100644
--- a/drivers/iommu/amd/quirks.c
+++ b/drivers/iommu/amd/quirks.c
@@ -15,7 +15,7 @@
struct ivrs_quirk_entry {
u8 id;
- u16 devid;
+ u32 devid;
};
enum {
@@ -49,7 +49,7 @@ static int __init ivrs_ioapic_quirk_cb(const struct dmi_system_id *d)
const struct ivrs_quirk_entry *i;
for (i = d->driver_data; i->id != 0 && i->devid != 0; i++)
- add_special_device(IVHD_SPECIAL_IOAPIC, i->id, (u16 *)&i->devid, 0);
+ add_special_device(IVHD_SPECIAL_IOAPIC, i->id, (u32 *)&i->devid, 0);
return 0;
}
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 8af0242a90d9..1b1725759262 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -564,9 +564,6 @@ static void apple_dart_release_device(struct device *dev)
{
struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
- if (!cfg)
- return;
-
dev_iommu_priv_set(dev, NULL);
kfree(cfg);
}
@@ -771,7 +768,6 @@ static const struct iommu_ops apple_dart_iommu_ops = {
.of_xlate = apple_dart_of_xlate,
.def_domain_type = apple_dart_def_domain_type,
.get_resv_regions = apple_dart_get_resv_regions,
- .put_resv_regions = generic_iommu_put_resv_regions,
.pgsize_bitmap = -1UL, /* Restricted during dart probe */
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 88817a3376ef..d32b02336411 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1380,12 +1380,21 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
}
-static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
+static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
{
unsigned int i;
+ u64 val = STRTAB_STE_0_V;
+
+ if (disable_bypass && !force)
+ val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
+ else
+ val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
for (i = 0; i < nent; ++i) {
- arm_smmu_write_strtab_ent(NULL, -1, strtab);
+ strtab[0] = cpu_to_le64(val);
+ strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+ STRTAB_STE_1_SHCFG_INCOMING));
+ strtab[2] = 0;
strtab += STRTAB_STE_DWORDS;
}
}
@@ -1413,7 +1422,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
return -ENOMEM;
}
- arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
+ arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
arm_smmu_write_strtab_l1_desc(strtab, desc);
return 0;
}
@@ -2537,6 +2546,19 @@ static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
return sid < limit;
}
+static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
+{
+ /* Check the SIDs are in range of the SMMU and our stream table */
+ if (!arm_smmu_sid_in_range(smmu, sid))
+ return -ERANGE;
+
+ /* Ensure l2 strtab is initialised */
+ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
+ return arm_smmu_init_l2_strtab(smmu, sid);
+
+ return 0;
+}
+
static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
struct arm_smmu_master *master)
{
@@ -2560,20 +2582,9 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
new_stream->id = sid;
new_stream->master = master;
- /*
- * Check the SIDs are in range of the SMMU and our stream table
- */
- if (!arm_smmu_sid_in_range(smmu, sid)) {
- ret = -ERANGE;
+ ret = arm_smmu_init_sid_strtab(smmu, sid);
+ if (ret)
break;
- }
-
- /* Ensure l2 strtab is initialised */
- if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
- ret = arm_smmu_init_l2_strtab(smmu, sid);
- if (ret)
- break;
- }
/* Insert into SID tree */
new_node = &(smmu->streams.rb_node);
@@ -2691,20 +2702,14 @@ err_free_master:
static void arm_smmu_release_device(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct arm_smmu_master *master;
-
- if (!fwspec || fwspec->ops != &arm_smmu_ops)
- return;
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- master = dev_iommu_priv_get(dev);
if (WARN_ON(arm_smmu_master_sva_enabled(master)))
iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
arm_smmu_detach_dev(master);
arm_smmu_disable_pasid(master);
arm_smmu_remove_master(master);
kfree(master);
- iommu_fwspec_free(dev);
}
static struct iommu_group *arm_smmu_device_group(struct device *dev)
@@ -2760,58 +2765,27 @@ static void arm_smmu_get_resv_regions(struct device *dev,
iommu_dma_get_resv_regions(dev, head);
}
-static bool arm_smmu_dev_has_feature(struct device *dev,
- enum iommu_dev_features feat)
-{
- struct arm_smmu_master *master = dev_iommu_priv_get(dev);
-
- if (!master)
- return false;
-
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- return arm_smmu_master_iopf_supported(master);
- case IOMMU_DEV_FEAT_SVA:
- return arm_smmu_master_sva_supported(master);
- default:
- return false;
- }
-}
-
-static bool arm_smmu_dev_feature_enabled(struct device *dev,
- enum iommu_dev_features feat)
-{
- struct arm_smmu_master *master = dev_iommu_priv_get(dev);
-
- if (!master)
- return false;
-
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- return master->iopf_enabled;
- case IOMMU_DEV_FEAT_SVA:
- return arm_smmu_master_sva_enabled(master);
- default:
- return false;
- }
-}
-
static int arm_smmu_dev_enable_feature(struct device *dev,
enum iommu_dev_features feat)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- if (!arm_smmu_dev_has_feature(dev, feat))
+ if (!master)
return -ENODEV;
- if (arm_smmu_dev_feature_enabled(dev, feat))
- return -EBUSY;
-
switch (feat) {
case IOMMU_DEV_FEAT_IOPF:
+ if (!arm_smmu_master_iopf_supported(master))
+ return -EINVAL;
+ if (master->iopf_enabled)
+ return -EBUSY;
master->iopf_enabled = true;
return 0;
case IOMMU_DEV_FEAT_SVA:
+ if (!arm_smmu_master_sva_supported(master))
+ return -EINVAL;
+ if (arm_smmu_master_sva_enabled(master))
+ return -EBUSY;
return arm_smmu_master_enable_sva(master);
default:
return -EINVAL;
@@ -2823,16 +2797,20 @@ static int arm_smmu_dev_disable_feature(struct device *dev,
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- if (!arm_smmu_dev_feature_enabled(dev, feat))
+ if (!master)
return -EINVAL;
switch (feat) {
case IOMMU_DEV_FEAT_IOPF:
+ if (!master->iopf_enabled)
+ return -EINVAL;
if (master->sva_enabled)
return -EBUSY;
master->iopf_enabled = false;
return 0;
case IOMMU_DEV_FEAT_SVA:
+ if (!arm_smmu_master_sva_enabled(master))
+ return -EINVAL;
return arm_smmu_master_disable_sva(master);
default:
return -EINVAL;
@@ -2847,9 +2825,6 @@ static struct iommu_ops arm_smmu_ops = {
.device_group = arm_smmu_device_group,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
- .put_resv_regions = generic_iommu_put_resv_regions,
- .dev_has_feat = arm_smmu_dev_has_feature,
- .dev_feat_enabled = arm_smmu_dev_feature_enabled,
.dev_enable_feat = arm_smmu_dev_enable_feature,
.dev_disable_feat = arm_smmu_dev_disable_feature,
.sva_bind = arm_smmu_sva_bind,
@@ -3049,7 +3024,7 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
cfg->strtab_base_cfg = reg;
- arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
+ arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
return 0;
}
@@ -3743,6 +3718,36 @@ static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
return devm_ioremap_resource(dev, &res);
}
+static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
+{
+ struct list_head rmr_list;
+ struct iommu_resv_region *e;
+
+ INIT_LIST_HEAD(&rmr_list);
+ iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
+
+ list_for_each_entry(e, &rmr_list, list) {
+ __le64 *step;
+ struct iommu_iort_rmr_data *rmr;
+ int ret, i;
+
+ rmr = container_of(e, struct iommu_iort_rmr_data, rr);
+ for (i = 0; i < rmr->num_sids; i++) {
+ ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
+ if (ret) {
+ dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
+ rmr->sids[i]);
+ continue;
+ }
+
+ step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
+ arm_smmu_init_bypass_stes(step, 1, true);
+ }
+ }
+
+ iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
+}
+
static int arm_smmu_device_probe(struct platform_device *pdev)
{
int irq, ret;
@@ -3826,6 +3831,9 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
/* Record our private device structure */
platform_set_drvdata(pdev, smmu);
+ /* Check for RMRs and install bypass STEs if any */
+ arm_smmu_rmr_install_bypass_ste(smmu);
+
/* Reset the device */
ret = arm_smmu_device_reset(smmu, bypass);
if (ret)
diff --git a/drivers/iommu/arm/arm-smmu/Makefile b/drivers/iommu/arm/arm-smmu/Makefile
index b0cc01aa20c9..2a5a95e8e3f9 100644
--- a/drivers/iommu/arm/arm-smmu/Makefile
+++ b/drivers/iommu/arm/arm-smmu/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
obj-$(CONFIG_ARM_SMMU) += arm_smmu.o
arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o
arm_smmu-$(CONFIG_ARM_SMMU_QCOM) += arm-smmu-qcom.o
+arm_smmu-$(CONFIG_ARM_SMMU_QCOM_DEBUG) += arm-smmu-qcom-debug.o
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
new file mode 100644
index 000000000000..6eed8e67a0ca
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/of_device.h>
+#include <linux/qcom_scm.h>
+#include <linux/ratelimit.h>
+
+#include "arm-smmu.h"
+#include "arm-smmu-qcom.h"
+
+enum qcom_smmu_impl_reg_offset {
+ QCOM_SMMU_TBU_PWR_STATUS,
+ QCOM_SMMU_STATS_SYNC_INV_TBU_ACK,
+ QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR,
+};
+
+struct qcom_smmu_config {
+ const u32 *reg_offset;
+};
+
+void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu)
+{
+ int ret;
+ u32 tbu_pwr_status, sync_inv_ack, sync_inv_progress;
+ struct qcom_smmu *qsmmu = container_of(smmu, struct qcom_smmu, smmu);
+ const struct qcom_smmu_config *cfg;
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+ if (__ratelimit(&rs)) {
+ dev_err(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n");
+
+ cfg = qsmmu->cfg;
+ if (!cfg)
+ return;
+
+ ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS],
+ &tbu_pwr_status);
+ if (ret)
+ dev_err(smmu->dev,
+ "Failed to read TBU power status: %d\n", ret);
+
+ ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK],
+ &sync_inv_ack);
+ if (ret)
+ dev_err(smmu->dev,
+ "Failed to read TBU sync/inv ack status: %d\n", ret);
+
+ ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR],
+ &sync_inv_progress);
+ if (ret)
+ dev_err(smmu->dev,
+ "Failed to read TCU syn/inv progress: %d\n", ret);
+
+ dev_err(smmu->dev,
+ "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
+ tbu_pwr_status, sync_inv_ack, sync_inv_progress);
+ }
+}
+
+/* Implementation Defined Register Space 0 register offsets */
+static const u32 qcom_smmu_impl0_reg_offset[] = {
+ [QCOM_SMMU_TBU_PWR_STATUS] = 0x2204,
+ [QCOM_SMMU_STATS_SYNC_INV_TBU_ACK] = 0x25dc,
+ [QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR] = 0x2670,
+};
+
+static const struct qcom_smmu_config qcm2290_smmu_cfg = {
+ .reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc7180_smmu_cfg = {
+ .reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc7280_smmu_cfg = {
+ .reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc8180x_smmu_cfg = {
+ .reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc8280xp_smmu_cfg = {
+ .reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm6125_smmu_cfg = {
+ .reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm6350_smmu_cfg = {
+ .reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8150_smmu_cfg = {
+ .reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8250_smmu_cfg = {
+ .reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8350_smmu_cfg = {
+ .reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8450_smmu_cfg = {
+ .reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct of_device_id __maybe_unused qcom_smmu_impl_debug_match[] = {
+ { .compatible = "qcom,msm8998-smmu-v2" },
+ { .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg },
+ { .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg },
+ { .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg},
+ { .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg },
+ { .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg },
+ { .compatible = "qcom,sdm630-smmu-v2" },
+ { .compatible = "qcom,sdm845-smmu-500" },
+ { .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg},
+ { .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg},
+ { .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg },
+ { .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg },
+ { .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg },
+ { .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg },
+ { }
+};
+
+const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu)
+{
+ const struct of_device_id *match;
+ const struct device_node *np = smmu->dev->of_node;
+
+ match = of_match_node(qcom_smmu_impl_debug_match, np);
+ if (!match)
+ return NULL;
+
+ return match->data;
+}
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 7820711c4560..b2708de25ea3 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -5,23 +5,40 @@
#include <linux/acpi.h>
#include <linux/adreno-smmu-priv.h>
+#include <linux/delay.h>
#include <linux/of_device.h>
#include <linux/qcom_scm.h>
#include "arm-smmu.h"
+#include "arm-smmu-qcom.h"
-struct qcom_smmu {
- struct arm_smmu_device smmu;
- bool bypass_quirk;
- u8 bypass_cbndx;
- u32 stall_enabled;
-};
+#define QCOM_DUMMY_VAL -1
static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
{
return container_of(smmu, struct qcom_smmu, smmu);
}
+static void qcom_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
+ int sync, int status)
+{
+ unsigned int spin_cnt, delay;
+ u32 reg;
+
+ arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
+ for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
+ for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
+ reg = arm_smmu_readl(smmu, page, status);
+ if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
+ return;
+ cpu_relax();
+ }
+ udelay(delay);
+ }
+
+ qcom_smmu_tlb_sync_debug(smmu);
+}
+
static void qcom_adreno_smmu_write_sctlr(struct arm_smmu_device *smmu, int idx,
u32 reg)
{
@@ -233,6 +250,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,sc7280-mdss" },
{ .compatible = "qcom,sc7280-mss-pil" },
{ .compatible = "qcom,sc8180x-mdss" },
+ { .compatible = "qcom,sm8250-mdss" },
{ .compatible = "qcom,sdm845-mdss" },
{ .compatible = "qcom,sdm845-mss-pil" },
{ }
@@ -374,6 +392,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = {
.def_domain_type = qcom_smmu_def_domain_type,
.reset = qcom_smmu500_reset,
.write_s2cr = qcom_smmu_write_s2cr,
+ .tlb_sync = qcom_smmu_tlb_sync,
};
static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
@@ -382,6 +401,7 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
.reset = qcom_smmu500_reset,
.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
.write_sctlr = qcom_adreno_smmu_write_sctlr,
+ .tlb_sync = qcom_smmu_tlb_sync,
};
static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
@@ -398,6 +418,7 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
return ERR_PTR(-ENOMEM);
qsmmu->smmu.impl = impl;
+ qsmmu->cfg = qcom_smmu_impl_data(smmu);
return &qsmmu->smmu;
}
@@ -413,6 +434,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,sdm845-smmu-500" },
{ .compatible = "qcom,sm6125-smmu-500" },
{ .compatible = "qcom,sm6350-smmu-500" },
+ { .compatible = "qcom,sm6375-smmu-500" },
{ .compatible = "qcom,sm8150-smmu-500" },
{ .compatible = "qcom,sm8250-smmu-500" },
{ .compatible = "qcom,sm8350-smmu-500" },
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
new file mode 100644
index 000000000000..99ec8f8629a0
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ARM_SMMU_QCOM_H
+#define _ARM_SMMU_QCOM_H
+
+struct qcom_smmu {
+ struct arm_smmu_device smmu;
+ const struct qcom_smmu_config *cfg;
+ bool bypass_quirk;
+ u8 bypass_cbndx;
+ u32 stall_enabled;
+};
+
+#ifdef CONFIG_ARM_SMMU_QCOM_DEBUG
+void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu);
+const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu);
+#else
+static inline void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { }
+static inline const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu)
+{
+ return NULL;
+}
+#endif
+
+#endif /* _ARM_SMMU_QCOM_H */
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 2ed3594f384e..dfa82df00342 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1432,27 +1432,19 @@ out_free:
static void arm_smmu_release_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct arm_smmu_master_cfg *cfg;
- struct arm_smmu_device *smmu;
+ struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
int ret;
- if (!fwspec || fwspec->ops != &arm_smmu_ops)
- return;
-
- cfg = dev_iommu_priv_get(dev);
- smmu = cfg->smmu;
-
- ret = arm_smmu_rpm_get(smmu);
+ ret = arm_smmu_rpm_get(cfg->smmu);
if (ret < 0)
return;
arm_smmu_master_free_smes(cfg, fwspec);
- arm_smmu_rpm_put(smmu);
+ arm_smmu_rpm_put(cfg->smmu);
dev_iommu_priv_set(dev, NULL);
kfree(cfg);
- iommu_fwspec_free(dev);
}
static void arm_smmu_probe_finalize(struct device *dev)
@@ -1592,7 +1584,6 @@ static struct iommu_ops arm_smmu_ops = {
.device_group = arm_smmu_device_group,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
- .put_resv_regions = generic_iommu_put_resv_regions,
.def_domain_type = arm_smmu_def_domain_type,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
.owner = THIS_MODULE,
@@ -2071,10 +2062,57 @@ err_reset_platform_ops: __maybe_unused;
return err;
}
+static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu)
+{
+ struct list_head rmr_list;
+ struct iommu_resv_region *e;
+ int idx, cnt = 0;
+ u32 reg;
+
+ INIT_LIST_HEAD(&rmr_list);
+ iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
+
+ /*
+ * Rather than trying to look at existing mappings that
+ * are setup by the firmware and then invalidate the ones
+ * that do no have matching RMR entries, just disable the
+ * SMMU until it gets enabled again in the reset routine.
+ */
+ reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
+ reg |= ARM_SMMU_sCR0_CLIENTPD;
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
+
+ list_for_each_entry(e, &rmr_list, list) {
+ struct iommu_iort_rmr_data *rmr;
+ int i;
+
+ rmr = container_of(e, struct iommu_iort_rmr_data, rr);
+ for (i = 0; i < rmr->num_sids; i++) {
+ idx = arm_smmu_find_sme(smmu, rmr->sids[i], ~0);
+ if (idx < 0)
+ continue;
+
+ if (smmu->s2crs[idx].count == 0) {
+ smmu->smrs[idx].id = rmr->sids[i];
+ smmu->smrs[idx].mask = 0;
+ smmu->smrs[idx].valid = true;
+ }
+ smmu->s2crs[idx].count++;
+ smmu->s2crs[idx].type = S2CR_TYPE_BYPASS;
+ smmu->s2crs[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
+
+ cnt++;
+ }
+ }
+
+ dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt,
+ cnt == 1 ? "" : "s");
+ iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
+}
+
static int arm_smmu_device_probe(struct platform_device *pdev)
{
struct resource *res;
- resource_size_t ioaddr;
struct arm_smmu_device *smmu;
struct device *dev = &pdev->dev;
int num_irqs, i, err;
@@ -2098,7 +2136,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(smmu->base))
return PTR_ERR(smmu->base);
- ioaddr = res->start;
+ smmu->ioaddr = res->start;
+
/*
* The resource size should effectively match the value of SMMU_TOP;
* stash that temporarily until we know PAGESIZE to validate it with.
@@ -2178,7 +2217,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
}
err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
- "smmu.%pa", &ioaddr);
+ "smmu.%pa", &smmu->ioaddr);
if (err) {
dev_err(dev, "Failed to register iommu in sysfs\n");
return err;
@@ -2191,6 +2230,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
}
platform_set_drvdata(pdev, smmu);
+
+ /* Check for RMRs and install bypass SMRs if any */
+ arm_smmu_rmr_install_bypass_smr(smmu);
+
arm_smmu_device_reset(smmu);
arm_smmu_test_smr_masks(smmu);
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 2b9b42fb6f30..703fd5817ec1 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -278,6 +278,7 @@ struct arm_smmu_device {
struct device *dev;
void __iomem *base;
+ phys_addr_t ioaddr;
unsigned int numpage;
unsigned int pgshift;
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 4c077c38fbd6..17235116d3bb 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -532,16 +532,6 @@ static struct iommu_device *qcom_iommu_probe_device(struct device *dev)
return &qcom_iommu->iommu;
}
-static void qcom_iommu_release_device(struct device *dev)
-{
- struct qcom_iommu_dev *qcom_iommu = to_iommu(dev);
-
- if (!qcom_iommu)
- return;
-
- iommu_fwspec_free(dev);
-}
-
static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
struct qcom_iommu_dev *qcom_iommu;
@@ -591,7 +581,6 @@ static const struct iommu_ops qcom_iommu_ops = {
.capable = qcom_iommu_capable,
.domain_alloc = qcom_iommu_domain_alloc,
.probe_device = qcom_iommu_probe_device,
- .release_device = qcom_iommu_release_device,
.device_group = generic_device_group,
.of_xlate = qcom_iommu_of_xlate,
.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
@@ -750,9 +739,12 @@ static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu)
{
struct device_node *child;
- for_each_child_of_node(qcom_iommu->dev->of_node, child)
- if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec"))
+ for_each_child_of_node(qcom_iommu->dev->of_node, child) {
+ if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec")) {
+ of_node_put(child);
return true;
+ }
+ }
return false;
}
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f90251572a5d..458fb6738223 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -64,10 +64,11 @@ struct iommu_dma_cookie {
/* Domain for flush queue callback; NULL if flush queue not in use */
struct iommu_domain *fq_domain;
+ struct mutex mutex;
};
static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
-bool iommu_dma_forcedac __read_mostly;
+bool iommu_dma_forcedac __read_mostly = !IS_ENABLED(CONFIG_IOMMU_DMA_PCI_SAC);
static int __init iommu_dma_forcedac_setup(char *str)
{
@@ -310,6 +311,7 @@ int iommu_get_dma_cookie(struct iommu_domain *domain)
if (!domain->iova_cookie)
return -ENOMEM;
+ mutex_init(&domain->iova_cookie->mutex);
return 0;
}
@@ -385,7 +387,7 @@ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
{
if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
- iort_iommu_msi_get_resv_regions(dev, list);
+ iort_iommu_get_resv_regions(dev, list);
}
EXPORT_SYMBOL(iommu_dma_get_resv_regions);
@@ -560,26 +562,33 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
}
/* start_pfn is always nonzero for an already-initialised domain */
+ mutex_lock(&cookie->mutex);
if (iovad->start_pfn) {
if (1UL << order != iovad->granule ||
base_pfn != iovad->start_pfn) {
pr_warn("Incompatible range for DMA domain\n");
- return -EFAULT;
+ ret = -EFAULT;
+ goto done_unlock;
}
- return 0;
+ ret = 0;
+ goto done_unlock;
}
init_iova_domain(iovad, 1UL << order, base_pfn);
ret = iova_domain_init_rcaches(iovad);
if (ret)
- return ret;
+ goto done_unlock;
/* If the FQ fails we can simply fall back to strict mode */
if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
domain->type = IOMMU_DOMAIN_DMA;
- return iova_reserve_iommu_regions(dev, domain);
+ ret = iova_reserve_iommu_regions(dev, domain);
+
+done_unlock:
+ mutex_unlock(&cookie->mutex);
+ return ret;
}
/**
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 71f2018e23fe..8e18984a0c4f 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -135,6 +135,11 @@ static u32 lv2ent_offset(sysmmu_iova_t iova)
#define CFG_SYSSEL (1 << 22) /* System MMU 3.2 only */
#define CFG_FLPDCACHE (1 << 20) /* System MMU 3.2+ only */
+#define CTRL_VM_ENABLE BIT(0)
+#define CTRL_VM_FAULT_MODE_STALL BIT(3)
+#define CAPA0_CAPA1_EXIST BIT(11)
+#define CAPA1_VCR_ENABLED BIT(14)
+
/* common registers */
#define REG_MMU_CTRL 0x000
#define REG_MMU_CFG 0x004
@@ -148,29 +153,20 @@ static u32 lv2ent_offset(sysmmu_iova_t iova)
#define MAKE_MMU_VER(maj, min) ((((maj) & 0xF) << 7) | ((min) & 0x7F))
/* v1.x - v3.x registers */
-#define REG_MMU_FLUSH 0x00C
-#define REG_MMU_FLUSH_ENTRY 0x010
-#define REG_PT_BASE_ADDR 0x014
-#define REG_INT_STATUS 0x018
-#define REG_INT_CLEAR 0x01C
-
#define REG_PAGE_FAULT_ADDR 0x024
#define REG_AW_FAULT_ADDR 0x028
#define REG_AR_FAULT_ADDR 0x02C
#define REG_DEFAULT_SLAVE_ADDR 0x030
/* v5.x registers */
-#define REG_V5_PT_BASE_PFN 0x00C
-#define REG_V5_MMU_FLUSH_ALL 0x010
-#define REG_V5_MMU_FLUSH_ENTRY 0x014
-#define REG_V5_MMU_FLUSH_RANGE 0x018
-#define REG_V5_MMU_FLUSH_START 0x020
-#define REG_V5_MMU_FLUSH_END 0x024
-#define REG_V5_INT_STATUS 0x060
-#define REG_V5_INT_CLEAR 0x064
#define REG_V5_FAULT_AR_VA 0x070
#define REG_V5_FAULT_AW_VA 0x080
+/* v7.x registers */
+#define REG_V7_CAPA0 0x870
+#define REG_V7_CAPA1 0x874
+#define REG_V7_CTRL_VM 0x8000
+
#define has_sysmmu(dev) (dev_iommu_priv_get(dev) != NULL)
static struct device *dma_dev;
@@ -251,6 +247,21 @@ struct exynos_iommu_domain {
};
/*
+ * SysMMU version specific data. Contains offsets for the registers which can
+ * be found in different SysMMU variants, but have different offset values.
+ */
+struct sysmmu_variant {
+ u32 pt_base; /* page table base address (physical) */
+ u32 flush_all; /* invalidate all TLB entries */
+ u32 flush_entry; /* invalidate specific TLB entry */
+ u32 flush_range; /* invalidate TLB entries in specified range */
+ u32 flush_start; /* start address of range invalidation */
+ u32 flush_end; /* end address of range invalidation */
+ u32 int_status; /* interrupt status information */
+ u32 int_clear; /* clear the interrupt */
+};
+
+/*
* This structure hold all data of a single SYSMMU controller, this includes
* hw resources like registers and clocks, pointers and list nodes to connect
* it to all other structures, internal state and parameters read from device
@@ -274,6 +285,45 @@ struct sysmmu_drvdata {
unsigned int version; /* our version */
struct iommu_device iommu; /* IOMMU core handle */
+ const struct sysmmu_variant *variant; /* version specific data */
+
+ /* v7 fields */
+ bool has_vcr; /* virtual machine control register */
+};
+
+#define SYSMMU_REG(data, reg) ((data)->sfrbase + (data)->variant->reg)
+
+/* SysMMU v1..v3 */
+static const struct sysmmu_variant sysmmu_v1_variant = {
+ .flush_all = 0x0c,
+ .flush_entry = 0x10,
+ .pt_base = 0x14,
+ .int_status = 0x18,
+ .int_clear = 0x1c,
+};
+
+/* SysMMU v5 and v7 (non-VM capable) */
+static const struct sysmmu_variant sysmmu_v5_variant = {
+ .pt_base = 0x0c,
+ .flush_all = 0x10,
+ .flush_entry = 0x14,
+ .flush_range = 0x18,
+ .flush_start = 0x20,
+ .flush_end = 0x24,
+ .int_status = 0x60,
+ .int_clear = 0x64,
+};
+
+/* SysMMU v7: VM capable register set */
+static const struct sysmmu_variant sysmmu_v7_vm_variant = {
+ .pt_base = 0x800c,
+ .flush_all = 0x8010,
+ .flush_entry = 0x8014,
+ .flush_range = 0x8018,
+ .flush_start = 0x8020,
+ .flush_end = 0x8024,
+ .int_status = 0x60,
+ .int_clear = 0x64,
};
static struct exynos_iommu_domain *to_exynos_domain(struct iommu_domain *dom)
@@ -304,10 +354,7 @@ static bool sysmmu_block(struct sysmmu_drvdata *data)
static void __sysmmu_tlb_invalidate(struct sysmmu_drvdata *data)
{
- if (MMU_MAJ_VER(data->version) < 5)
- writel(0x1, data->sfrbase + REG_MMU_FLUSH);
- else
- writel(0x1, data->sfrbase + REG_V5_MMU_FLUSH_ALL);
+ writel(0x1, SYSMMU_REG(data, flush_all));
}
static void __sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
@@ -315,34 +362,30 @@ static void __sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
{
unsigned int i;
- if (MMU_MAJ_VER(data->version) < 5) {
+ if (MMU_MAJ_VER(data->version) < 5 || num_inv == 1) {
for (i = 0; i < num_inv; i++) {
writel((iova & SPAGE_MASK) | 1,
- data->sfrbase + REG_MMU_FLUSH_ENTRY);
+ SYSMMU_REG(data, flush_entry));
iova += SPAGE_SIZE;
}
} else {
- if (num_inv == 1) {
- writel((iova & SPAGE_MASK) | 1,
- data->sfrbase + REG_V5_MMU_FLUSH_ENTRY);
- } else {
- writel((iova & SPAGE_MASK),
- data->sfrbase + REG_V5_MMU_FLUSH_START);
- writel((iova & SPAGE_MASK) + (num_inv - 1) * SPAGE_SIZE,
- data->sfrbase + REG_V5_MMU_FLUSH_END);
- writel(1, data->sfrbase + REG_V5_MMU_FLUSH_RANGE);
- }
+ writel(iova & SPAGE_MASK, SYSMMU_REG(data, flush_start));
+ writel((iova & SPAGE_MASK) + (num_inv - 1) * SPAGE_SIZE,
+ SYSMMU_REG(data, flush_end));
+ writel(0x1, SYSMMU_REG(data, flush_range));
}
}
static void __sysmmu_set_ptbase(struct sysmmu_drvdata *data, phys_addr_t pgd)
{
+ u32 pt_base;
+
if (MMU_MAJ_VER(data->version) < 5)
- writel(pgd, data->sfrbase + REG_PT_BASE_ADDR);
+ pt_base = pgd;
else
- writel(pgd >> PAGE_SHIFT,
- data->sfrbase + REG_V5_PT_BASE_PFN);
+ pt_base = pgd >> SPAGE_ORDER;
+ writel(pt_base, SYSMMU_REG(data, pt_base));
__sysmmu_tlb_invalidate(data);
}
@@ -362,6 +405,20 @@ static void __sysmmu_disable_clocks(struct sysmmu_drvdata *data)
clk_disable_unprepare(data->clk_master);
}
+static bool __sysmmu_has_capa1(struct sysmmu_drvdata *data)
+{
+ u32 capa0 = readl(data->sfrbase + REG_V7_CAPA0);
+
+ return capa0 & CAPA0_CAPA1_EXIST;
+}
+
+static void __sysmmu_get_vcr(struct sysmmu_drvdata *data)
+{
+ u32 capa1 = readl(data->sfrbase + REG_V7_CAPA1);
+
+ data->has_vcr = capa1 & CAPA1_VCR_ENABLED;
+}
+
static void __sysmmu_get_version(struct sysmmu_drvdata *data)
{
u32 ver;
@@ -379,6 +436,19 @@ static void __sysmmu_get_version(struct sysmmu_drvdata *data)
dev_dbg(data->sysmmu, "hardware version: %d.%d\n",
MMU_MAJ_VER(data->version), MMU_MIN_VER(data->version));
+ if (MMU_MAJ_VER(data->version) < 5) {
+ data->variant = &sysmmu_v1_variant;
+ } else if (MMU_MAJ_VER(data->version) < 7) {
+ data->variant = &sysmmu_v5_variant;
+ } else {
+ if (__sysmmu_has_capa1(data))
+ __sysmmu_get_vcr(data);
+ if (data->has_vcr)
+ data->variant = &sysmmu_v7_vm_variant;
+ else
+ data->variant = &sysmmu_v5_variant;
+ }
+
__sysmmu_disable_clocks(data);
}
@@ -406,19 +476,14 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
const struct sysmmu_fault_info *finfo;
unsigned int i, n, itype;
sysmmu_iova_t fault_addr;
- unsigned short reg_status, reg_clear;
int ret = -ENOSYS;
WARN_ON(!data->active);
if (MMU_MAJ_VER(data->version) < 5) {
- reg_status = REG_INT_STATUS;
- reg_clear = REG_INT_CLEAR;
finfo = sysmmu_faults;
n = ARRAY_SIZE(sysmmu_faults);
} else {
- reg_status = REG_V5_INT_STATUS;
- reg_clear = REG_V5_INT_CLEAR;
finfo = sysmmu_v5_faults;
n = ARRAY_SIZE(sysmmu_v5_faults);
}
@@ -427,7 +492,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
clk_enable(data->clk_master);
- itype = __ffs(readl(data->sfrbase + reg_status));
+ itype = __ffs(readl(SYSMMU_REG(data, int_status)));
for (i = 0; i < n; i++, finfo++)
if (finfo->bit == itype)
break;
@@ -444,7 +509,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
/* fault is not recovered by fault handler */
BUG_ON(ret != 0);
- writel(1 << itype, data->sfrbase + reg_clear);
+ writel(1 << itype, SYSMMU_REG(data, int_clear));
sysmmu_unblock(data);
@@ -486,6 +551,18 @@ static void __sysmmu_init_config(struct sysmmu_drvdata *data)
writel(cfg, data->sfrbase + REG_MMU_CFG);
}
+static void __sysmmu_enable_vid(struct sysmmu_drvdata *data)
+{
+ u32 ctrl;
+
+ if (MMU_MAJ_VER(data->version) < 7 || !data->has_vcr)
+ return;
+
+ ctrl = readl(data->sfrbase + REG_V7_CTRL_VM);
+ ctrl |= CTRL_VM_ENABLE | CTRL_VM_FAULT_MODE_STALL;
+ writel(ctrl, data->sfrbase + REG_V7_CTRL_VM);
+}
+
static void __sysmmu_enable(struct sysmmu_drvdata *data)
{
unsigned long flags;
@@ -496,6 +573,7 @@ static void __sysmmu_enable(struct sysmmu_drvdata *data)
writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL);
__sysmmu_init_config(data);
__sysmmu_set_ptbase(data, data->pgtable);
+ __sysmmu_enable_vid(data);
writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
data->active = true;
spin_unlock_irqrestore(&data->lock, flags);
@@ -551,7 +629,7 @@ static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
* 64KB page can be one of 16 consecutive sets.
*/
if (MMU_MAJ_VER(data->version) == 2)
- num_inv = min_t(unsigned int, size / PAGE_SIZE, 64);
+ num_inv = min_t(unsigned int, size / SPAGE_SIZE, 64);
if (sysmmu_block(data)) {
__sysmmu_tlb_invalidate_entry(data, iova, num_inv);
@@ -623,6 +701,8 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
data->sysmmu = dev;
spin_lock_init(&data->lock);
+ __sysmmu_get_version(data);
+
ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL,
dev_name(data->sysmmu));
if (ret)
@@ -630,11 +710,10 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
ret = iommu_device_register(&data->iommu, &exynos_iommu_ops, dev);
if (ret)
- return ret;
+ goto err_iommu_register;
platform_set_drvdata(pdev, data);
- __sysmmu_get_version(data);
if (PG_ENT_SHIFT < 0) {
if (MMU_MAJ_VER(data->version) < 5) {
PG_ENT_SHIFT = SYSMMU_PG_ENT_SHIFT;
@@ -647,6 +726,14 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
}
}
+ if (MMU_MAJ_VER(data->version) >= 5) {
+ ret = dma_set_mask(dev, DMA_BIT_MASK(36));
+ if (ret) {
+ dev_err(dev, "Unable to set DMA mask: %d\n", ret);
+ goto err_dma_set_mask;
+ }
+ }
+
/*
* use the first registered sysmmu device for performing
* dma mapping operations on iommu page tables (cpu cache flush)
@@ -657,6 +744,12 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
pm_runtime_enable(dev);
return 0;
+
+err_dma_set_mask:
+ iommu_device_unregister(&data->iommu);
+err_iommu_register:
+ iommu_device_sysfs_remove(&data->iommu);
+ return ret;
}
static int __maybe_unused exynos_sysmmu_suspend(struct device *dev)
@@ -1251,9 +1344,6 @@ static void exynos_iommu_release_device(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
struct sysmmu_drvdata *data;
- if (!has_sysmmu(dev))
- return;
-
if (owner->domain) {
struct iommu_group *group = iommu_group_get(dev);
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 94b4589dc67c..011f9ab7f743 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -447,15 +447,10 @@ static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
return &pamu_iommu;
}
-static void fsl_pamu_release_device(struct device *dev)
-{
-}
-
static const struct iommu_ops fsl_pamu_ops = {
.capable = fsl_pamu_capable,
.domain_alloc = fsl_pamu_domain_alloc,
.probe_device = fsl_pamu_probe_device,
- .release_device = fsl_pamu_release_device,
.device_group = fsl_pamu_device_group,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = fsl_pamu_attach_device,
diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c
index 71596fc62822..3ee68393122f 100644
--- a/drivers/iommu/intel/cap_audit.c
+++ b/drivers/iommu/intel/cap_audit.c
@@ -10,7 +10,7 @@
#define pr_fmt(fmt) "DMAR: " fmt
-#include <linux/intel-iommu.h>
+#include "iommu.h"
#include "cap_audit.h"
static u64 intel_iommu_cap_sanity;
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index ed796eea4581..1f925285104e 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -10,11 +10,11 @@
#include <linux/debugfs.h>
#include <linux/dmar.h>
-#include <linux/intel-iommu.h>
#include <linux/pci.h>
#include <asm/irq_remapping.h>
+#include "iommu.h"
#include "pasid.h"
#include "perf.h"
@@ -263,10 +263,9 @@ static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, u16 bus)
static void root_tbl_walk(struct seq_file *m, struct intel_iommu *iommu)
{
- unsigned long flags;
u16 bus;
- spin_lock_irqsave(&iommu->lock, flags);
+ spin_lock(&iommu->lock);
seq_printf(m, "IOMMU %s: Root Table Address: 0x%llx\n", iommu->name,
(u64)virt_to_phys(iommu->root_entry));
seq_puts(m, "B.D.F\tRoot_entry\t\t\t\tContext_entry\t\t\t\tPASID\tPASID_table_entry\n");
@@ -278,8 +277,7 @@ static void root_tbl_walk(struct seq_file *m, struct intel_iommu *iommu)
*/
for (bus = 0; bus < 256; bus++)
ctx_tbl_walk(m, iommu, bus);
-
- spin_unlock_irqrestore(&iommu->lock, flags);
+ spin_unlock(&iommu->lock);
}
static int dmar_translation_struct_show(struct seq_file *m, void *unused)
@@ -342,13 +340,13 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
}
}
-static int show_device_domain_translation(struct device *dev, void *data)
+static int __show_device_domain_translation(struct device *dev, void *data)
{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct dmar_domain *domain = info->domain;
+ struct dmar_domain *domain;
struct seq_file *m = data;
u64 path[6] = { 0 };
+ domain = to_dmar_domain(iommu_get_domain_for_dev(dev));
if (!domain)
return 0;
@@ -359,20 +357,39 @@ static int show_device_domain_translation(struct device *dev, void *data)
pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
seq_putc(m, '\n');
- return 0;
+ /* Don't iterate */
+ return 1;
}
-static int domain_translation_struct_show(struct seq_file *m, void *unused)
+static int show_device_domain_translation(struct device *dev, void *data)
{
- unsigned long flags;
- int ret;
+ struct iommu_group *group;
- spin_lock_irqsave(&device_domain_lock, flags);
- ret = bus_for_each_dev(&pci_bus_type, NULL, m,
- show_device_domain_translation);
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ group = iommu_group_get(dev);
+ if (group) {
+ /*
+ * The group->mutex is held across the callback, which will
+ * block calls to iommu_attach/detach_group/device. Hence,
+ * the domain of the device will not change during traversal.
+ *
+ * All devices in an iommu group share a single domain, hence
+ * we only dump the domain of the first device. Even though,
+ * this code still possibly races with the iommu_unmap()
+ * interface. This could be solved by RCU-freeing the page
+ * table pages in the iommu_unmap() path.
+ */
+ iommu_group_for_each_dev(group, data,
+ __show_device_domain_translation);
+ iommu_group_put(group);
+ }
- return ret;
+ return 0;
+}
+
+static int domain_translation_struct_show(struct seq_file *m, void *unused)
+{
+ return bus_for_each_dev(&pci_bus_type, NULL, m,
+ show_device_domain_translation);
}
DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 9699ca101c62..6327b34f5aa7 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -19,7 +19,6 @@
#include <linux/pci.h>
#include <linux/dmar.h>
#include <linux/iova.h>
-#include <linux/intel-iommu.h>
#include <linux/timer.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
@@ -30,10 +29,11 @@
#include <linux/numa.h>
#include <linux/limits.h>
#include <asm/irq_remapping.h>
-#include <trace/events/intel_iommu.h>
+#include "iommu.h"
#include "../irq_remapping.h"
#include "perf.h"
+#include "trace.h"
typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
struct dmar_res_callback {
@@ -60,7 +60,7 @@ LIST_HEAD(dmar_drhd_units);
struct acpi_table_header * __initdata dmar_tbl;
static int dmar_dev_scope_status = 1;
-static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
+static DEFINE_IDA(dmar_seq_ids);
static int alloc_iommu(struct dmar_drhd_unit *drhd);
static void free_iommu(struct intel_iommu *iommu);
@@ -1023,28 +1023,6 @@ out:
return err;
}
-static int dmar_alloc_seq_id(struct intel_iommu *iommu)
-{
- iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
- DMAR_UNITS_SUPPORTED);
- if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
- iommu->seq_id = -1;
- } else {
- set_bit(iommu->seq_id, dmar_seq_ids);
- sprintf(iommu->name, "dmar%d", iommu->seq_id);
- }
-
- return iommu->seq_id;
-}
-
-static void dmar_free_seq_id(struct intel_iommu *iommu)
-{
- if (iommu->seq_id >= 0) {
- clear_bit(iommu->seq_id, dmar_seq_ids);
- iommu->seq_id = -1;
- }
-}
-
static int alloc_iommu(struct dmar_drhd_unit *drhd)
{
struct intel_iommu *iommu;
@@ -1062,11 +1040,14 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
if (!iommu)
return -ENOMEM;
- if (dmar_alloc_seq_id(iommu) < 0) {
+ iommu->seq_id = ida_alloc_range(&dmar_seq_ids, 0,
+ DMAR_UNITS_SUPPORTED - 1, GFP_KERNEL);
+ if (iommu->seq_id < 0) {
pr_err("Failed to allocate seq_id\n");
- err = -ENOSPC;
+ err = iommu->seq_id;
goto error;
}
+ sprintf(iommu->name, "dmar%d", iommu->seq_id);
err = map_iommu(iommu, drhd->reg_base_addr);
if (err) {
@@ -1150,7 +1131,7 @@ err_sysfs:
err_unmap:
unmap_iommu(iommu);
error_free_seq_id:
- dmar_free_seq_id(iommu);
+ ida_free(&dmar_seq_ids, iommu->seq_id);
error:
kfree(iommu);
return err;
@@ -1183,7 +1164,7 @@ static void free_iommu(struct intel_iommu *iommu)
if (iommu->reg)
unmap_iommu(iommu);
- dmar_free_seq_id(iommu);
+ ida_free(&dmar_seq_ids, iommu->seq_id);
kfree(iommu);
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5c0dce78586a..7cca030a508e 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -17,7 +17,6 @@
#include <linux/dma-direct.h>
#include <linux/dma-iommu.h>
#include <linux/dmi.h>
-#include <linux/intel-iommu.h>
#include <linux/intel-svm.h>
#include <linux/memory.h>
#include <linux/pci.h>
@@ -26,6 +25,7 @@
#include <linux/syscore_ops.h>
#include <linux/tboot.h>
+#include "iommu.h"
#include "../irq_remapping.h"
#include "../iommu-sva-lib.h"
#include "pasid.h"
@@ -126,13 +126,8 @@ static inline unsigned long virt_to_dma_pfn(void *p)
return page_to_dma_pfn(virt_to_page(p));
}
-/* global iommu list, set NULL for ignored DMAR units */
-static struct intel_iommu **g_iommus;
-
static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
-static inline struct device_domain_info *
-dmar_search_domain_by_dev_info(int segment, int bus, int devfn);
/*
* set to 1 to panic kernel if can't successfully enable VT-d
@@ -256,10 +251,6 @@ static inline void context_clear_entry(struct context_entry *context)
static struct dmar_domain *si_domain;
static int hw_pass_through = 1;
-#define for_each_domain_iommu(idx, domain) \
- for (idx = 0; idx < g_num_of_iommus; idx++) \
- if (domain->iommu_refcnt[idx])
-
struct dmar_rmrr_unit {
struct list_head list; /* list of rmrr units */
struct acpi_dmar_header *hdr; /* ACPI header */
@@ -293,12 +284,7 @@ static LIST_HEAD(dmar_satc_units);
#define for_each_rmrr_units(rmrr) \
list_for_each_entry(rmrr, &dmar_rmrr_units, list)
-/* bitmap for indexing intel_iommus */
-static int g_num_of_iommus;
-
-static void domain_remove_dev_info(struct dmar_domain *domain);
static void dmar_remove_one_dev_info(struct device *dev);
-static void __dmar_remove_one_dev_info(struct device_domain_info *info);
int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
@@ -314,12 +300,6 @@ static int iommu_skip_te_disable;
#define IDENTMAP_GFX 2
#define IDENTMAP_AZALIA 4
-int intel_iommu_gfx_mapped;
-EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
-
-DEFINE_SPINLOCK(device_domain_lock);
-static LIST_HEAD(device_domain_list);
-
const struct iommu_ops intel_iommu_ops;
static bool translation_pre_enabled(struct intel_iommu *iommu)
@@ -455,24 +435,6 @@ int iommu_calculate_agaw(struct intel_iommu *iommu)
return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
}
-/* This functionin only returns single iommu in a domain */
-struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
-{
- int iommu_id;
-
- /* si_domain and vm domain should not get here. */
- if (WARN_ON(!iommu_is_dma_domain(&domain->domain)))
- return NULL;
-
- for_each_domain_iommu(iommu_id, domain)
- break;
-
- if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
- return NULL;
-
- return g_iommus[iommu_id];
-}
-
static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
{
return sm_supported(iommu) ?
@@ -481,16 +443,16 @@ static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
static void domain_update_iommu_coherency(struct dmar_domain *domain)
{
+ struct iommu_domain_info *info;
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
bool found = false;
- int i;
+ unsigned long i;
domain->iommu_coherency = true;
-
- for_each_domain_iommu(i, domain) {
+ xa_for_each(&domain->iommu_array, i, info) {
found = true;
- if (!iommu_paging_structure_coherency(g_iommus[i])) {
+ if (!iommu_paging_structure_coherency(info->iommu)) {
domain->iommu_coherency = false;
break;
}
@@ -544,15 +506,8 @@ static int domain_update_device_node(struct dmar_domain *domain)
struct device_domain_info *info;
int nid = NUMA_NO_NODE;
- assert_spin_locked(&device_domain_lock);
-
- if (list_empty(&domain->devices))
- return NUMA_NO_NODE;
-
+ spin_lock(&domain->lock);
list_for_each_entry(info, &domain->devices, link) {
- if (!info->dev)
- continue;
-
/*
* There could possibly be multiple device numa nodes as devices
* within the same domain may sit behind different IOMMUs. There
@@ -563,6 +518,7 @@ static int domain_update_device_node(struct dmar_domain *domain)
if (nid != NUMA_NO_NODE)
break;
}
+ spin_unlock(&domain->lock);
return nid;
}
@@ -804,26 +760,23 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
{
struct context_entry *context;
int ret = 0;
- unsigned long flags;
- spin_lock_irqsave(&iommu->lock, flags);
+ spin_lock(&iommu->lock);
context = iommu_context_addr(iommu, bus, devfn, 0);
if (context)
ret = context_present(context);
- spin_unlock_irqrestore(&iommu->lock, flags);
+ spin_unlock(&iommu->lock);
return ret;
}
static void free_context_table(struct intel_iommu *iommu)
{
- int i;
- unsigned long flags;
struct context_entry *context;
+ int i;
+
+ if (!iommu->root_entry)
+ return;
- spin_lock_irqsave(&iommu->lock, flags);
- if (!iommu->root_entry) {
- goto out;
- }
for (i = 0; i < ROOT_ENTRY_NR; i++) {
context = iommu_context_addr(iommu, i, 0, 0);
if (context)
@@ -835,12 +788,10 @@ static void free_context_table(struct intel_iommu *iommu)
context = iommu_context_addr(iommu, i, 0x80, 0);
if (context)
free_pgtable_page(context);
-
}
+
free_pgtable_page(iommu->root_entry);
iommu->root_entry = NULL;
-out:
- spin_unlock_irqrestore(&iommu->lock, flags);
}
#ifdef CONFIG_DMAR_DEBUG
@@ -849,9 +800,14 @@ static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u
struct device_domain_info *info;
struct dma_pte *parent, *pte;
struct dmar_domain *domain;
+ struct pci_dev *pdev;
int offset, level;
- info = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
+ pdev = pci_get_domain_bus_and_slot(iommu->segment, bus, devfn);
+ if (!pdev)
+ return;
+
+ info = dev_iommu_priv_get(&pdev->dev);
if (!info || !info->domain) {
pr_info("device [%02x:%02x.%d] not probed\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1234,7 +1190,6 @@ static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
struct root_entry *root;
- unsigned long flags;
root = (struct root_entry *)alloc_pgtable_page(iommu->node);
if (!root) {
@@ -1244,10 +1199,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
}
__iommu_flush_cache(iommu, root, ROOT_SIZE);
-
- spin_lock_irqsave(&iommu->lock, flags);
iommu->root_entry = root;
- spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
}
@@ -1389,23 +1341,23 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
}
static struct device_domain_info *
-iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
- u8 bus, u8 devfn)
+iommu_support_dev_iotlb(struct dmar_domain *domain, struct intel_iommu *iommu,
+ u8 bus, u8 devfn)
{
struct device_domain_info *info;
- assert_spin_locked(&device_domain_lock);
-
if (!iommu->qi)
return NULL;
- list_for_each_entry(info, &domain->devices, link)
+ spin_lock(&domain->lock);
+ list_for_each_entry(info, &domain->devices, link) {
if (info->iommu == iommu && info->bus == bus &&
info->devfn == devfn) {
- if (info->ats_supported && info->dev)
- return info;
- break;
+ spin_unlock(&domain->lock);
+ return info->ats_supported ? info : NULL;
}
+ }
+ spin_unlock(&domain->lock);
return NULL;
}
@@ -1415,23 +1367,21 @@ static void domain_update_iotlb(struct dmar_domain *domain)
struct device_domain_info *info;
bool has_iotlb_device = false;
- assert_spin_locked(&device_domain_lock);
-
- list_for_each_entry(info, &domain->devices, link)
+ spin_lock(&domain->lock);
+ list_for_each_entry(info, &domain->devices, link) {
if (info->ats_enabled) {
has_iotlb_device = true;
break;
}
-
+ }
domain->has_iotlb_device = has_iotlb_device;
+ spin_unlock(&domain->lock);
}
static void iommu_enable_dev_iotlb(struct device_domain_info *info)
{
struct pci_dev *pdev;
- assert_spin_locked(&device_domain_lock);
-
if (!info || !dev_is_pci(info->dev))
return;
@@ -1477,8 +1427,6 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info)
{
struct pci_dev *pdev;
- assert_spin_locked(&device_domain_lock);
-
if (!dev_is_pci(info->dev))
return;
@@ -1518,17 +1466,15 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
u64 addr, unsigned mask)
{
- unsigned long flags;
struct device_domain_info *info;
if (!domain->has_iotlb_device)
return;
- spin_lock_irqsave(&device_domain_lock, flags);
+ spin_lock(&domain->lock);
list_for_each_entry(info, &domain->devices, link)
__iommu_flush_dev_iotlb(info, addr, mask);
-
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ spin_unlock(&domain->lock);
}
static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
@@ -1539,7 +1485,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
unsigned int aligned_pages = __roundup_pow_of_two(pages);
unsigned int mask = ilog2(aligned_pages);
uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
- u16 did = domain->iommu_did[iommu->seq_id];
+ u16 did = domain_id_iommu(domain, iommu);
BUG_ON(pages == 0);
@@ -1609,11 +1555,12 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu,
static void intel_flush_iotlb_all(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- int idx;
+ struct iommu_domain_info *info;
+ unsigned long idx;
- for_each_domain_iommu(idx, dmar_domain) {
- struct intel_iommu *iommu = g_iommus[idx];
- u16 did = dmar_domain->iommu_did[iommu->seq_id];
+ xa_for_each(&dmar_domain->iommu_array, idx, info) {
+ struct intel_iommu *iommu = info->iommu;
+ u16 did = domain_id_iommu(dmar_domain, iommu);
if (domain_use_first_level(dmar_domain))
qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0);
@@ -1719,23 +1666,16 @@ static int iommu_init_domains(struct intel_iommu *iommu)
static void disable_dmar_iommu(struct intel_iommu *iommu)
{
- struct device_domain_info *info, *tmp;
- unsigned long flags;
-
if (!iommu->domain_ids)
return;
- spin_lock_irqsave(&device_domain_lock, flags);
- list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
- if (info->iommu != iommu)
- continue;
-
- if (!info->dev || !info->domain)
- continue;
-
- __dmar_remove_one_dev_info(info);
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ /*
+ * All iommu domains must have been detached from the devices,
+ * hence there should be no domain IDs in use.
+ */
+ if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))
+ > NUM_RESERVED_DID))
+ return;
if (iommu->gcmd & DMA_GCMD_TE)
iommu_disable_translation(iommu);
@@ -1748,8 +1688,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
iommu->domain_ids = NULL;
}
- g_iommus[iommu->seq_id] = NULL;
-
/* free context mapping */
free_context_table(iommu);
@@ -1795,55 +1733,77 @@ static struct dmar_domain *alloc_domain(unsigned int type)
domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
domain->has_iotlb_device = false;
INIT_LIST_HEAD(&domain->devices);
+ spin_lock_init(&domain->lock);
+ xa_init(&domain->iommu_array);
return domain;
}
-/* Must be called with iommu->lock */
static int domain_attach_iommu(struct dmar_domain *domain,
struct intel_iommu *iommu)
{
+ struct iommu_domain_info *info, *curr;
unsigned long ndomains;
- int num;
+ int num, ret = -ENOSPC;
- assert_spin_locked(&device_domain_lock);
- assert_spin_locked(&iommu->lock);
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
- domain->iommu_refcnt[iommu->seq_id] += 1;
- if (domain->iommu_refcnt[iommu->seq_id] == 1) {
- ndomains = cap_ndoms(iommu->cap);
- num = find_first_zero_bit(iommu->domain_ids, ndomains);
+ spin_lock(&iommu->lock);
+ curr = xa_load(&domain->iommu_array, iommu->seq_id);
+ if (curr) {
+ curr->refcnt++;
+ spin_unlock(&iommu->lock);
+ kfree(info);
+ return 0;
+ }
- if (num >= ndomains) {
- pr_err("%s: No free domain ids\n", iommu->name);
- domain->iommu_refcnt[iommu->seq_id] -= 1;
- return -ENOSPC;
- }
+ ndomains = cap_ndoms(iommu->cap);
+ num = find_first_zero_bit(iommu->domain_ids, ndomains);
+ if (num >= ndomains) {
+ pr_err("%s: No free domain ids\n", iommu->name);
+ goto err_unlock;
+ }
- set_bit(num, iommu->domain_ids);
- domain->iommu_did[iommu->seq_id] = num;
- domain->nid = iommu->node;
- domain_update_iommu_cap(domain);
+ set_bit(num, iommu->domain_ids);
+ info->refcnt = 1;
+ info->did = num;
+ info->iommu = iommu;
+ curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
+ NULL, info, GFP_ATOMIC);
+ if (curr) {
+ ret = xa_err(curr) ? : -EBUSY;
+ goto err_clear;
}
+ domain_update_iommu_cap(domain);
+ spin_unlock(&iommu->lock);
return 0;
+
+err_clear:
+ clear_bit(info->did, iommu->domain_ids);
+err_unlock:
+ spin_unlock(&iommu->lock);
+ kfree(info);
+ return ret;
}
static void domain_detach_iommu(struct dmar_domain *domain,
struct intel_iommu *iommu)
{
- int num;
+ struct iommu_domain_info *info;
- assert_spin_locked(&device_domain_lock);
- assert_spin_locked(&iommu->lock);
-
- domain->iommu_refcnt[iommu->seq_id] -= 1;
- if (domain->iommu_refcnt[iommu->seq_id] == 0) {
- num = domain->iommu_did[iommu->seq_id];
- clear_bit(num, iommu->domain_ids);
+ spin_lock(&iommu->lock);
+ info = xa_load(&domain->iommu_array, iommu->seq_id);
+ if (--info->refcnt == 0) {
+ clear_bit(info->did, iommu->domain_ids);
+ xa_erase(&domain->iommu_array, iommu->seq_id);
+ domain->nid = NUMA_NO_NODE;
domain_update_iommu_cap(domain);
- domain->iommu_did[iommu->seq_id] = 0;
+ kfree(info);
}
+ spin_unlock(&iommu->lock);
}
static inline int guestwidth_to_adjustwidth(int gaw)
@@ -1862,10 +1822,6 @@ static inline int guestwidth_to_adjustwidth(int gaw)
static void domain_exit(struct dmar_domain *domain)
{
-
- /* Remove associated devices and clear attached or cached domains */
- domain_remove_dev_info(domain);
-
if (domain->pgd) {
LIST_HEAD(freelist);
@@ -1873,6 +1829,9 @@ static void domain_exit(struct dmar_domain *domain)
put_pages_list(&freelist);
}
+ if (WARN_ON(!list_empty(&domain->devices)))
+ return;
+
kfree(domain);
}
@@ -1930,11 +1889,11 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
struct pasid_table *table,
u8 bus, u8 devfn)
{
- u16 did = domain->iommu_did[iommu->seq_id];
+ struct device_domain_info *info =
+ iommu_support_dev_iotlb(domain, iommu, bus, devfn);
+ u16 did = domain_id_iommu(domain, iommu);
int translation = CONTEXT_TT_MULTI_LEVEL;
- struct device_domain_info *info = NULL;
struct context_entry *context;
- unsigned long flags;
int ret;
WARN_ON(did == 0);
@@ -1947,9 +1906,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
BUG_ON(!domain->pgd);
- spin_lock_irqsave(&device_domain_lock, flags);
spin_lock(&iommu->lock);
-
ret = -ENOMEM;
context = iommu_context_addr(iommu, bus, devfn, 1);
if (!context)
@@ -2000,7 +1957,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
* Setup the Device-TLB enable bit and Page request
* Enable bit:
*/
- info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
if (info && info->ats_supported)
context_set_sm_dte(context);
if (info && info->pri_supported)
@@ -2023,7 +1979,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
goto out_unlock;
}
- info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
if (info && info->ats_supported)
translation = CONTEXT_TT_DEV_IOTLB;
else
@@ -2069,7 +2024,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
out_unlock:
spin_unlock(&iommu->lock);
- spin_unlock_irqrestore(&device_domain_lock, flags);
return ret;
}
@@ -2186,8 +2140,9 @@ static void switch_to_super_page(struct dmar_domain *domain,
unsigned long end_pfn, int level)
{
unsigned long lvl_pages = lvl_to_nr_pages(level);
+ struct iommu_domain_info *info;
struct dma_pte *pte = NULL;
- int i;
+ unsigned long i;
while (start_pfn <= end_pfn) {
if (!pte)
@@ -2198,8 +2153,8 @@ static void switch_to_super_page(struct dmar_domain *domain,
start_pfn + lvl_pages - 1,
level + 1);
- for_each_domain_iommu(i, domain)
- iommu_flush_iotlb_psi(g_iommus[i], domain,
+ xa_for_each(&domain->iommu_array, i, info)
+ iommu_flush_iotlb_psi(info->iommu, domain,
start_pfn, lvl_pages,
0, 0);
}
@@ -2313,16 +2268,15 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
{
struct intel_iommu *iommu = info->iommu;
struct context_entry *context;
- unsigned long flags;
u16 did_old;
if (!iommu)
return;
- spin_lock_irqsave(&iommu->lock, flags);
+ spin_lock(&iommu->lock);
context = iommu_context_addr(iommu, bus, devfn, 0);
if (!context) {
- spin_unlock_irqrestore(&iommu->lock, flags);
+ spin_unlock(&iommu->lock);
return;
}
@@ -2330,14 +2284,14 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
if (hw_pass_through && domain_type_is_si(info->domain))
did_old = FLPT_DEFAULT_DID;
else
- did_old = info->domain->iommu_did[iommu->seq_id];
+ did_old = domain_id_iommu(info->domain, iommu);
} else {
did_old = context_domain_id(context);
}
context_clear_entry(context);
__iommu_flush_cache(iommu, context, sizeof(*context));
- spin_unlock_irqrestore(&iommu->lock, flags);
+ spin_unlock(&iommu->lock);
iommu->flush.flush_context(iommu,
did_old,
(((u16)bus) << 8) | devfn,
@@ -2356,30 +2310,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
__iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
}
-static void domain_remove_dev_info(struct dmar_domain *domain)
-{
- struct device_domain_info *info, *tmp;
- unsigned long flags;
-
- spin_lock_irqsave(&device_domain_lock, flags);
- list_for_each_entry_safe(info, tmp, &domain->devices, link)
- __dmar_remove_one_dev_info(info);
- spin_unlock_irqrestore(&device_domain_lock, flags);
-}
-
-static inline struct device_domain_info *
-dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
-{
- struct device_domain_info *info;
-
- list_for_each_entry(info, &device_domain_list, global)
- if (info->segment == segment && info->bus == bus &&
- info->devfn == devfn)
- return info;
-
- return NULL;
-}
-
static int domain_setup_first_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev,
@@ -2412,7 +2342,7 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
flags |= PASID_FLAG_PAGE_SNOOP;
return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
- domain->iommu_did[iommu->seq_id],
+ domain_id_iommu(domain, iommu),
flags);
}
@@ -2499,7 +2429,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu;
- unsigned long flags;
u8 bus, devfn;
int ret;
@@ -2507,17 +2436,13 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
if (!iommu)
return -ENODEV;
- spin_lock_irqsave(&device_domain_lock, flags);
- info->domain = domain;
- spin_lock(&iommu->lock);
ret = domain_attach_iommu(domain, iommu);
- spin_unlock(&iommu->lock);
- if (ret) {
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ if (ret)
return ret;
- }
+ info->domain = domain;
+ spin_lock(&domain->lock);
list_add(&info->link, &domain->devices);
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ spin_unlock(&domain->lock);
/* PASID table is mandatory for a PCI device in scalable mode. */
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
@@ -2529,7 +2454,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
}
/* Setup the PASID entry for requests without PASID: */
- spin_lock_irqsave(&iommu->lock, flags);
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
@@ -2539,7 +2463,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
- spin_unlock_irqrestore(&iommu->lock, flags);
if (ret) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
@@ -2807,7 +2730,6 @@ static int copy_translation_tables(struct intel_iommu *iommu)
struct root_entry *old_rt;
phys_addr_t old_rt_phys;
int ctxt_table_entries;
- unsigned long flags;
u64 rtaddr_reg;
int bus, ret;
bool new_ext, ext;
@@ -2850,7 +2772,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
}
}
- spin_lock_irqsave(&iommu->lock, flags);
+ spin_lock(&iommu->lock);
/* Context tables are copied, now write them to the root_entry table */
for (bus = 0; bus < 256; bus++) {
@@ -2869,7 +2791,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
iommu->root_entry[bus].hi = val;
}
- spin_unlock_irqrestore(&iommu->lock, flags);
+ spin_unlock(&iommu->lock);
kfree(ctxt_tbls);
@@ -2968,36 +2890,6 @@ static int __init init_dmars(void)
struct intel_iommu *iommu;
int ret;
- /*
- * for each drhd
- * allocate root
- * initialize and program root entry to not present
- * endfor
- */
- for_each_drhd_unit(drhd) {
- /*
- * lock not needed as this is only incremented in the single
- * threaded kernel __init code path all other access are read
- * only
- */
- if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
- g_num_of_iommus++;
- continue;
- }
- pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
- }
-
- /* Preallocate enough resources for IOMMU hot-addition */
- if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
- g_num_of_iommus = DMAR_UNITS_SUPPORTED;
-
- g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
- GFP_KERNEL);
- if (!g_iommus) {
- ret = -ENOMEM;
- goto error;
- }
-
ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
if (ret)
goto free_iommu;
@@ -3020,8 +2912,6 @@ static int __init init_dmars(void)
intel_pasid_max_id);
}
- g_iommus[iommu->seq_id] = iommu;
-
intel_iommu_init_qi(iommu);
ret = iommu_init_domains(iommu);
@@ -3147,9 +3037,6 @@ free_iommu:
free_dmar_iommu(iommu);
}
- kfree(g_iommus);
-
-error:
return ret;
}
@@ -3530,9 +3417,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
int sp, ret;
struct intel_iommu *iommu = dmaru->iommu;
- if (g_iommus[iommu->seq_id])
- return 0;
-
ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
if (ret)
goto out;
@@ -3556,7 +3440,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
if (iommu->gcmd & DMA_GCMD_TE)
iommu_disable_translation(iommu);
- g_iommus[iommu->seq_id] = iommu;
ret = iommu_init_domains(iommu);
if (ret == 0)
ret = iommu_alloc_root_entry(iommu);
@@ -4022,6 +3905,20 @@ static int __init probe_acpi_namespace_devices(void)
return 0;
}
+static __init int tboot_force_iommu(void)
+{
+ if (!tboot_enabled())
+ return 0;
+
+ if (no_iommu || dmar_disabled)
+ pr_warn("Forcing Intel-IOMMU to enabled\n");
+
+ dmar_disabled = 0;
+ no_iommu = 0;
+
+ return 1;
+}
+
int __init intel_iommu_init(void)
{
int ret = -ENODEV;
@@ -4093,9 +3990,6 @@ int __init intel_iommu_init(void)
if (list_empty(&dmar_satc_units))
pr_info("No SATC found\n");
- if (dmar_map_gfx)
- intel_iommu_gfx_mapped = 1;
-
init_no_remapping_devices();
ret = init_dmars();
@@ -4181,21 +4075,13 @@ static void domain_context_clear(struct device_domain_info *info)
&domain_context_clear_one_cb, info);
}
-static void __dmar_remove_one_dev_info(struct device_domain_info *info)
+static void dmar_remove_one_dev_info(struct device *dev)
{
- struct dmar_domain *domain;
- struct intel_iommu *iommu;
- unsigned long flags;
-
- assert_spin_locked(&device_domain_lock);
-
- if (WARN_ON(!info))
- return;
-
- iommu = info->iommu;
- domain = info->domain;
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct dmar_domain *domain = info->domain;
+ struct intel_iommu *iommu = info->iommu;
- if (info->dev && !dev_is_real_dma_subdevice(info->dev)) {
+ if (!dev_is_real_dma_subdevice(info->dev)) {
if (dev_is_pci(info->dev) && sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, info->dev,
PASID_RID2PASID, false);
@@ -4205,23 +4091,12 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
intel_pasid_free_table(info->dev);
}
+ spin_lock(&domain->lock);
list_del(&info->link);
+ spin_unlock(&domain->lock);
- spin_lock_irqsave(&iommu->lock, flags);
domain_detach_iommu(domain, iommu);
- spin_unlock_irqrestore(&iommu->lock, flags);
-}
-
-static void dmar_remove_one_dev_info(struct device *dev)
-{
- struct device_domain_info *info;
- unsigned long flags;
-
- spin_lock_irqsave(&device_domain_lock, flags);
- info = dev_iommu_priv_get(dev);
- if (info)
- __dmar_remove_one_dev_info(info);
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ info->domain = NULL;
}
static int md_domain_init(struct dmar_domain *domain, int guest_width)
@@ -4466,15 +4341,16 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
unsigned long iova_pfn = IOVA_PFN(gather->start);
size_t size = gather->end - gather->start;
+ struct iommu_domain_info *info;
unsigned long start_pfn;
unsigned long nrpages;
- int iommu_id;
+ unsigned long i;
nrpages = aligned_nrpages(gather->start, size);
start_pfn = mm_to_dma_pfn(iova_pfn);
- for_each_domain_iommu(iommu_id, dmar_domain)
- iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
+ xa_for_each(&dmar_domain->iommu_array, i, info)
+ iommu_flush_iotlb_psi(info->iommu, dmar_domain,
start_pfn, nrpages,
list_empty(&gather->freelist), 0);
@@ -4503,7 +4379,7 @@ static bool domain_support_force_snooping(struct dmar_domain *domain)
struct device_domain_info *info;
bool support = true;
- assert_spin_locked(&device_domain_lock);
+ assert_spin_locked(&domain->lock);
list_for_each_entry(info, &domain->devices, link) {
if (!ecap_sc_support(info->iommu->ecap)) {
support = false;
@@ -4518,8 +4394,7 @@ static void domain_set_force_snooping(struct dmar_domain *domain)
{
struct device_domain_info *info;
- assert_spin_locked(&device_domain_lock);
-
+ assert_spin_locked(&domain->lock);
/*
* Second level page table supports per-PTE snoop control. The
* iommu_map() interface will handle this by setting SNP bit.
@@ -4537,20 +4412,19 @@ static void domain_set_force_snooping(struct dmar_domain *domain)
static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- unsigned long flags;
if (dmar_domain->force_snooping)
return true;
- spin_lock_irqsave(&device_domain_lock, flags);
+ spin_lock(&dmar_domain->lock);
if (!domain_support_force_snooping(dmar_domain)) {
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ spin_unlock(&dmar_domain->lock);
return false;
}
domain_set_force_snooping(dmar_domain);
dmar_domain->force_snooping = true;
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ spin_unlock(&dmar_domain->lock);
return true;
}
@@ -4572,7 +4446,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
struct device_domain_info *info;
struct intel_iommu *iommu;
- unsigned long flags;
u8 bus, devfn;
iommu = device_to_iommu(dev, &bus, &devfn);
@@ -4615,10 +4488,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
}
- spin_lock_irqsave(&device_domain_lock, flags);
- list_add(&info->global, &device_domain_list);
dev_iommu_priv_set(dev, info);
- spin_unlock_irqrestore(&device_domain_lock, flags);
return &iommu->iommu;
}
@@ -4626,15 +4496,9 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
static void intel_iommu_release_device(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
- unsigned long flags;
dmar_remove_one_dev_info(dev);
-
- spin_lock_irqsave(&device_domain_lock, flags);
dev_iommu_priv_set(dev, NULL);
- list_del(&info->global);
- spin_unlock_irqrestore(&device_domain_lock, flags);
-
kfree(info);
set_dma_ops(dev, NULL);
}
@@ -4707,7 +4571,6 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct context_entry *context;
struct dmar_domain *domain;
- unsigned long flags;
u64 ctx_lo;
int ret;
@@ -4715,9 +4578,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
if (!domain)
return -EINVAL;
- spin_lock_irqsave(&device_domain_lock, flags);
spin_lock(&iommu->lock);
-
ret = -EINVAL;
if (!info->pasid_supported)
goto out;
@@ -4733,7 +4594,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
context[0].lo = ctx_lo;
wmb();
iommu->flush.flush_context(iommu,
- domain->iommu_did[iommu->seq_id],
+ domain_id_iommu(domain, iommu),
PCI_DEVID(info->bus, info->devfn),
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
@@ -4747,7 +4608,6 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
out:
spin_unlock(&iommu->lock);
- spin_unlock_irqrestore(&device_domain_lock, flags);
return ret;
}
@@ -4871,13 +4731,11 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
unsigned long pages = aligned_nrpages(iova, size);
unsigned long pfn = iova >> VTD_PAGE_SHIFT;
- struct intel_iommu *iommu;
- int iommu_id;
+ struct iommu_domain_info *info;
+ unsigned long i;
- for_each_domain_iommu(iommu_id, dmar_domain) {
- iommu = g_iommus[iommu_id];
- __mapping_notify_one(iommu, dmar_domain, pfn, pages);
- }
+ xa_for_each(&dmar_domain->iommu_array, i, info)
+ __mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
}
const struct iommu_ops intel_iommu_ops = {
@@ -4887,7 +4745,6 @@ const struct iommu_ops intel_iommu_ops = {
.probe_finalize = intel_iommu_probe_finalize,
.release_device = intel_iommu_release_device,
.get_resv_regions = intel_iommu_get_resv_regions,
- .put_resv_regions = generic_iommu_put_resv_regions,
.device_group = intel_iommu_device_group,
.dev_enable_feat = intel_iommu_dev_enable_feat,
.dev_disable_feat = intel_iommu_dev_disable_feat,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
new file mode 100644
index 000000000000..fae45bbb0c7f
--- /dev/null
+++ b/drivers/iommu/intel/iommu.h
@@ -0,0 +1,839 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2006-2015, Intel Corporation.
+ *
+ * Authors: Ashok Raj <ashok.raj@intel.com>
+ * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ * David Woodhouse <David.Woodhouse@intel.com>
+ */
+
+#ifndef _INTEL_IOMMU_H_
+#define _INTEL_IOMMU_H_
+
+#include <linux/types.h>
+#include <linux/iova.h>
+#include <linux/io.h>
+#include <linux/idr.h>
+#include <linux/mmu_notifier.h>
+#include <linux/list.h>
+#include <linux/iommu.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmar.h>
+#include <linux/ioasid.h>
+#include <linux/bitfield.h>
+#include <linux/xarray.h>
+
+#include <asm/cacheflush.h>
+#include <asm/iommu.h>
+
+/*
+ * VT-d hardware uses 4KiB page size regardless of host page size.
+ */
+#define VTD_PAGE_SHIFT (12)
+#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT)
+#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
+#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
+
+#define VTD_STRIDE_SHIFT (9)
+#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
+
+#define DMA_PTE_READ BIT_ULL(0)
+#define DMA_PTE_WRITE BIT_ULL(1)
+#define DMA_PTE_LARGE_PAGE BIT_ULL(7)
+#define DMA_PTE_SNP BIT_ULL(11)
+
+#define DMA_FL_PTE_PRESENT BIT_ULL(0)
+#define DMA_FL_PTE_US BIT_ULL(2)
+#define DMA_FL_PTE_ACCESS BIT_ULL(5)
+#define DMA_FL_PTE_DIRTY BIT_ULL(6)
+#define DMA_FL_PTE_XD BIT_ULL(63)
+
+#define ADDR_WIDTH_5LEVEL (57)
+#define ADDR_WIDTH_4LEVEL (48)
+
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define CONTEXT_TT_DEV_IOTLB 1
+#define CONTEXT_TT_PASS_THROUGH 2
+#define CONTEXT_PASIDE BIT_ULL(3)
+
+/*
+ * Intel IOMMU register specification per version 1.0 public spec.
+ */
+#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */
+#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */
+#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */
+#define DMAR_GCMD_REG 0x18 /* Global command register */
+#define DMAR_GSTS_REG 0x1c /* Global status register */
+#define DMAR_RTADDR_REG 0x20 /* Root entry table */
+#define DMAR_CCMD_REG 0x28 /* Context command reg */
+#define DMAR_FSTS_REG 0x34 /* Fault Status register */
+#define DMAR_FECTL_REG 0x38 /* Fault control register */
+#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */
+#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */
+#define DMAR_FEUADDR_REG 0x44 /* Upper address register */
+#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */
+#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */
+#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */
+#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
+#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
+#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
+#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
+#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
+#define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */
+#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
+#define DMAR_ICS_REG 0x9c /* Invalidation complete status register */
+#define DMAR_IQER_REG 0xb0 /* Invalidation queue error record register */
+#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */
+#define DMAR_PQH_REG 0xc0 /* Page request queue head register */
+#define DMAR_PQT_REG 0xc8 /* Page request queue tail register */
+#define DMAR_PQA_REG 0xd0 /* Page request queue address register */
+#define DMAR_PRS_REG 0xdc /* Page request status register */
+#define DMAR_PECTL_REG 0xe0 /* Page request event control register */
+#define DMAR_PEDATA_REG 0xe4 /* Page request event interrupt data register */
+#define DMAR_PEADDR_REG 0xe8 /* Page request event interrupt addr register */
+#define DMAR_PEUADDR_REG 0xec /* Page request event Upper address register */
+#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability register */
+#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type register */
+#define DMAR_MTRR_FIX64K_00000_REG 0x120 /* MTRR Fixed range registers */
+#define DMAR_MTRR_FIX16K_80000_REG 0x128
+#define DMAR_MTRR_FIX16K_A0000_REG 0x130
+#define DMAR_MTRR_FIX4K_C0000_REG 0x138
+#define DMAR_MTRR_FIX4K_C8000_REG 0x140
+#define DMAR_MTRR_FIX4K_D0000_REG 0x148
+#define DMAR_MTRR_FIX4K_D8000_REG 0x150
+#define DMAR_MTRR_FIX4K_E0000_REG 0x158
+#define DMAR_MTRR_FIX4K_E8000_REG 0x160
+#define DMAR_MTRR_FIX4K_F0000_REG 0x168
+#define DMAR_MTRR_FIX4K_F8000_REG 0x170
+#define DMAR_MTRR_PHYSBASE0_REG 0x180 /* MTRR Variable range registers */
+#define DMAR_MTRR_PHYSMASK0_REG 0x188
+#define DMAR_MTRR_PHYSBASE1_REG 0x190
+#define DMAR_MTRR_PHYSMASK1_REG 0x198
+#define DMAR_MTRR_PHYSBASE2_REG 0x1a0
+#define DMAR_MTRR_PHYSMASK2_REG 0x1a8
+#define DMAR_MTRR_PHYSBASE3_REG 0x1b0
+#define DMAR_MTRR_PHYSMASK3_REG 0x1b8
+#define DMAR_MTRR_PHYSBASE4_REG 0x1c0
+#define DMAR_MTRR_PHYSMASK4_REG 0x1c8
+#define DMAR_MTRR_PHYSBASE5_REG 0x1d0
+#define DMAR_MTRR_PHYSMASK5_REG 0x1d8
+#define DMAR_MTRR_PHYSBASE6_REG 0x1e0
+#define DMAR_MTRR_PHYSMASK6_REG 0x1e8
+#define DMAR_MTRR_PHYSBASE7_REG 0x1f0
+#define DMAR_MTRR_PHYSMASK7_REG 0x1f8
+#define DMAR_MTRR_PHYSBASE8_REG 0x200
+#define DMAR_MTRR_PHYSMASK8_REG 0x208
+#define DMAR_MTRR_PHYSBASE9_REG 0x210
+#define DMAR_MTRR_PHYSMASK9_REG 0x218
+#define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */
+#define DMAR_VCMD_REG 0xe00 /* Virtual command register */
+#define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */
+
+#define DMAR_IQER_REG_IQEI(reg) FIELD_GET(GENMASK_ULL(3, 0), reg)
+#define DMAR_IQER_REG_ITESID(reg) FIELD_GET(GENMASK_ULL(47, 32), reg)
+#define DMAR_IQER_REG_ICESID(reg) FIELD_GET(GENMASK_ULL(63, 48), reg)
+
+#define OFFSET_STRIDE (9)
+
+#define dmar_readq(a) readq(a)
+#define dmar_writeq(a,v) writeq(v,a)
+#define dmar_readl(a) readl(a)
+#define dmar_writel(a, v) writel(v, a)
+
+#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4)
+#define DMAR_VER_MINOR(v) ((v) & 0x0f)
+
+/*
+ * Decoding Capability Register
+ */
+#define cap_5lp_support(c) (((c) >> 60) & 1)
+#define cap_pi_support(c) (((c) >> 59) & 1)
+#define cap_fl1gp_support(c) (((c) >> 56) & 1)
+#define cap_read_drain(c) (((c) >> 55) & 1)
+#define cap_write_drain(c) (((c) >> 54) & 1)
+#define cap_max_amask_val(c) (((c) >> 48) & 0x3f)
+#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1)
+#define cap_pgsel_inv(c) (((c) >> 39) & 1)
+
+#define cap_super_page_val(c) (((c) >> 34) & 0xf)
+#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \
+ * OFFSET_STRIDE) + 21)
+
+#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16)
+#define cap_max_fault_reg_offset(c) \
+ (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16)
+
+#define cap_zlr(c) (((c) >> 22) & 1)
+#define cap_isoch(c) (((c) >> 23) & 1)
+#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1)
+#define cap_sagaw(c) (((c) >> 8) & 0x1f)
+#define cap_caching_mode(c) (((c) >> 7) & 1)
+#define cap_phmr(c) (((c) >> 6) & 1)
+#define cap_plmr(c) (((c) >> 5) & 1)
+#define cap_rwbf(c) (((c) >> 4) & 1)
+#define cap_afl(c) (((c) >> 3) & 1)
+#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7)))
+/*
+ * Extended Capability Register
+ */
+
+#define ecap_rps(e) (((e) >> 49) & 0x1)
+#define ecap_smpwc(e) (((e) >> 48) & 0x1)
+#define ecap_flts(e) (((e) >> 47) & 0x1)
+#define ecap_slts(e) (((e) >> 46) & 0x1)
+#define ecap_slads(e) (((e) >> 45) & 0x1)
+#define ecap_vcs(e) (((e) >> 44) & 0x1)
+#define ecap_smts(e) (((e) >> 43) & 0x1)
+#define ecap_dit(e) (((e) >> 41) & 0x1)
+#define ecap_pds(e) (((e) >> 42) & 0x1)
+#define ecap_pasid(e) (((e) >> 40) & 0x1)
+#define ecap_pss(e) (((e) >> 35) & 0x1f)
+#define ecap_eafs(e) (((e) >> 34) & 0x1)
+#define ecap_nwfs(e) (((e) >> 33) & 0x1)
+#define ecap_srs(e) (((e) >> 31) & 0x1)
+#define ecap_ers(e) (((e) >> 30) & 0x1)
+#define ecap_prs(e) (((e) >> 29) & 0x1)
+#define ecap_broken_pasid(e) (((e) >> 28) & 0x1)
+#define ecap_dis(e) (((e) >> 27) & 0x1)
+#define ecap_nest(e) (((e) >> 26) & 0x1)
+#define ecap_mts(e) (((e) >> 25) & 0x1)
+#define ecap_ecs(e) (((e) >> 24) & 0x1)
+#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16)
+#define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16)
+#define ecap_coherent(e) ((e) & 0x1)
+#define ecap_qis(e) ((e) & 0x2)
+#define ecap_pass_through(e) (((e) >> 6) & 0x1)
+#define ecap_eim_support(e) (((e) >> 4) & 0x1)
+#define ecap_ir_support(e) (((e) >> 3) & 0x1)
+#define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1)
+#define ecap_max_handle_mask(e) (((e) >> 20) & 0xf)
+#define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */
+
+/* Virtual command interface capability */
+#define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */
+
+/* IOTLB_REG */
+#define DMA_TLB_FLUSH_GRANU_OFFSET 60
+#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
+#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
+#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
+#define DMA_TLB_IIRG(type) ((type >> 60) & 3)
+#define DMA_TLB_IAIG(val) (((val) >> 57) & 3)
+#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
+#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
+#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32)
+#define DMA_TLB_IVT (((u64)1) << 63)
+#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
+#define DMA_TLB_MAX_SIZE (0x3f)
+
+/* INVALID_DESC */
+#define DMA_CCMD_INVL_GRANU_OFFSET 61
+#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 4)
+#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 4)
+#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 4)
+#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7)
+#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
+#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16)))
+#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6)
+#define DMA_ID_TLB_ADDR(addr) (addr)
+#define DMA_ID_TLB_ADDR_MASK(mask) (mask)
+
+/* PMEN_REG */
+#define DMA_PMEN_EPM (((u32)1)<<31)
+#define DMA_PMEN_PRS (((u32)1)<<0)
+
+/* GCMD_REG */
+#define DMA_GCMD_TE (((u32)1) << 31)
+#define DMA_GCMD_SRTP (((u32)1) << 30)
+#define DMA_GCMD_SFL (((u32)1) << 29)
+#define DMA_GCMD_EAFL (((u32)1) << 28)
+#define DMA_GCMD_WBF (((u32)1) << 27)
+#define DMA_GCMD_QIE (((u32)1) << 26)
+#define DMA_GCMD_SIRTP (((u32)1) << 24)
+#define DMA_GCMD_IRE (((u32) 1) << 25)
+#define DMA_GCMD_CFI (((u32) 1) << 23)
+
+/* GSTS_REG */
+#define DMA_GSTS_TES (((u32)1) << 31)
+#define DMA_GSTS_RTPS (((u32)1) << 30)
+#define DMA_GSTS_FLS (((u32)1) << 29)
+#define DMA_GSTS_AFLS (((u32)1) << 28)
+#define DMA_GSTS_WBFS (((u32)1) << 27)
+#define DMA_GSTS_QIES (((u32)1) << 26)
+#define DMA_GSTS_IRTPS (((u32)1) << 24)
+#define DMA_GSTS_IRES (((u32)1) << 25)
+#define DMA_GSTS_CFIS (((u32)1) << 23)
+
+/* DMA_RTADDR_REG */
+#define DMA_RTADDR_RTT (((u64)1) << 11)
+#define DMA_RTADDR_SMT (((u64)1) << 10)
+
+/* CCMD_REG */
+#define DMA_CCMD_ICC (((u64)1) << 63)
+#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
+#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
+#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
+#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
+#define DMA_CCMD_MASK_NOBIT 0
+#define DMA_CCMD_MASK_1BIT 1
+#define DMA_CCMD_MASK_2BIT 2
+#define DMA_CCMD_MASK_3BIT 3
+#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
+#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
+
+/* FECTL_REG */
+#define DMA_FECTL_IM (((u32)1) << 31)
+
+/* FSTS_REG */
+#define DMA_FSTS_PFO (1 << 0) /* Primary Fault Overflow */
+#define DMA_FSTS_PPF (1 << 1) /* Primary Pending Fault */
+#define DMA_FSTS_IQE (1 << 4) /* Invalidation Queue Error */
+#define DMA_FSTS_ICE (1 << 5) /* Invalidation Completion Error */
+#define DMA_FSTS_ITE (1 << 6) /* Invalidation Time-out Error */
+#define DMA_FSTS_PRO (1 << 7) /* Page Request Overflow */
+#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
+
+/* FRCD_REG, 32 bits access */
+#define DMA_FRCD_F (((u32)1) << 31)
+#define dma_frcd_type(d) ((d >> 30) & 1)
+#define dma_frcd_fault_reason(c) (c & 0xff)
+#define dma_frcd_source_id(c) (c & 0xffff)
+#define dma_frcd_pasid_value(c) (((c) >> 8) & 0xfffff)
+#define dma_frcd_pasid_present(c) (((c) >> 31) & 1)
+/* low 64 bit */
+#define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT))
+
+/* PRS_REG */
+#define DMA_PRS_PPR ((u32)1)
+#define DMA_PRS_PRO ((u32)2)
+
+#define DMA_VCS_PAS ((u64)1)
+
+#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
+do { \
+ cycles_t start_time = get_cycles(); \
+ while (1) { \
+ sts = op(iommu->reg + offset); \
+ if (cond) \
+ break; \
+ if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
+ panic("DMAR hardware is malfunctioning\n"); \
+ cpu_relax(); \
+ } \
+} while (0)
+
+#define QI_LENGTH 256 /* queue length */
+
+enum {
+ QI_FREE,
+ QI_IN_USE,
+ QI_DONE,
+ QI_ABORT
+};
+
+#define QI_CC_TYPE 0x1
+#define QI_IOTLB_TYPE 0x2
+#define QI_DIOTLB_TYPE 0x3
+#define QI_IEC_TYPE 0x4
+#define QI_IWD_TYPE 0x5
+#define QI_EIOTLB_TYPE 0x6
+#define QI_PC_TYPE 0x7
+#define QI_DEIOTLB_TYPE 0x8
+#define QI_PGRP_RESP_TYPE 0x9
+#define QI_PSTRM_RESP_TYPE 0xa
+
+#define QI_IEC_SELECTIVE (((u64)1) << 4)
+#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
+#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27))
+
+#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
+#define QI_IWD_STATUS_WRITE (((u64)1) << 5)
+#define QI_IWD_FENCE (((u64)1) << 6)
+#define QI_IWD_PRQ_DRAIN (((u64)1) << 7)
+
+#define QI_IOTLB_DID(did) (((u64)did) << 16)
+#define QI_IOTLB_DR(dr) (((u64)dr) << 7)
+#define QI_IOTLB_DW(dw) (((u64)dw) << 6)
+#define QI_IOTLB_GRAN(gran) (((u64)gran) >> (DMA_TLB_FLUSH_GRANU_OFFSET-4))
+#define QI_IOTLB_ADDR(addr) (((u64)addr) & VTD_PAGE_MASK)
+#define QI_IOTLB_IH(ih) (((u64)ih) << 6)
+#define QI_IOTLB_AM(am) (((u8)am) & 0x3f)
+
+#define QI_CC_FM(fm) (((u64)fm) << 48)
+#define QI_CC_SID(sid) (((u64)sid) << 32)
+#define QI_CC_DID(did) (((u64)did) << 16)
+#define QI_CC_GRAN(gran) (((u64)gran) >> (DMA_CCMD_INVL_GRANU_OFFSET-4))
+
+#define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32)
+#define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16)
+#define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK)
+#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \
+ ((u64)((pfsid >> 4) & 0xfff) << 52))
+#define QI_DEV_IOTLB_SIZE 1
+#define QI_DEV_IOTLB_MAX_INVS 32
+
+#define QI_PC_PASID(pasid) (((u64)pasid) << 32)
+#define QI_PC_DID(did) (((u64)did) << 16)
+#define QI_PC_GRAN(gran) (((u64)gran) << 4)
+
+/* PASID cache invalidation granu */
+#define QI_PC_ALL_PASIDS 0
+#define QI_PC_PASID_SEL 1
+#define QI_PC_GLOBAL 3
+
+#define QI_EIOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK)
+#define QI_EIOTLB_IH(ih) (((u64)ih) << 6)
+#define QI_EIOTLB_AM(am) (((u64)am) & 0x3f)
+#define QI_EIOTLB_PASID(pasid) (((u64)pasid) << 32)
+#define QI_EIOTLB_DID(did) (((u64)did) << 16)
+#define QI_EIOTLB_GRAN(gran) (((u64)gran) << 4)
+
+/* QI Dev-IOTLB inv granu */
+#define QI_DEV_IOTLB_GRAN_ALL 1
+#define QI_DEV_IOTLB_GRAN_PASID_SEL 0
+
+#define QI_DEV_EIOTLB_ADDR(a) ((u64)(a) & VTD_PAGE_MASK)
+#define QI_DEV_EIOTLB_SIZE (((u64)1) << 11)
+#define QI_DEV_EIOTLB_PASID(p) ((u64)((p) & 0xfffff) << 32)
+#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16)
+#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4)
+#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \
+ ((u64)((pfsid >> 4) & 0xfff) << 52))
+#define QI_DEV_EIOTLB_MAX_INVS 32
+
+/* Page group response descriptor QW0 */
+#define QI_PGRP_PASID_P(p) (((u64)(p)) << 4)
+#define QI_PGRP_PDP(p) (((u64)(p)) << 5)
+#define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12)
+#define QI_PGRP_DID(rid) (((u64)(rid)) << 16)
+#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32)
+
+/* Page group response descriptor QW1 */
+#define QI_PGRP_LPIG(x) (((u64)(x)) << 2)
+#define QI_PGRP_IDX(idx) (((u64)(idx)) << 3)
+
+
+#define QI_RESP_SUCCESS 0x0
+#define QI_RESP_INVALID 0x1
+#define QI_RESP_FAILURE 0xf
+
+#define QI_GRAN_NONG_PASID 2
+#define QI_GRAN_PSI_PASID 3
+
+#define qi_shift(iommu) (DMAR_IQ_SHIFT + !!ecap_smts((iommu)->ecap))
+
+struct qi_desc {
+ u64 qw0;
+ u64 qw1;
+ u64 qw2;
+ u64 qw3;
+};
+
+struct q_inval {
+ raw_spinlock_t q_lock;
+ void *desc; /* invalidation queue */
+ int *desc_status; /* desc status */
+ int free_head; /* first free entry */
+ int free_tail; /* last free entry */
+ int free_cnt;
+};
+
+struct dmar_pci_notify_info;
+
+#ifdef CONFIG_IRQ_REMAP
+/* 1MB - maximum possible interrupt remapping table size */
+#define INTR_REMAP_PAGE_ORDER 8
+#define INTR_REMAP_TABLE_REG_SIZE 0xf
+#define INTR_REMAP_TABLE_REG_SIZE_MASK 0xf
+
+#define INTR_REMAP_TABLE_ENTRIES 65536
+
+struct irq_domain;
+
+struct ir_table {
+ struct irte *base;
+ unsigned long *bitmap;
+};
+
+void intel_irq_remap_add_device(struct dmar_pci_notify_info *info);
+#else
+static inline void
+intel_irq_remap_add_device(struct dmar_pci_notify_info *info) { }
+#endif
+
+struct iommu_flush {
+ void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid,
+ u8 fm, u64 type);
+ void (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type);
+};
+
+enum {
+ SR_DMAR_FECTL_REG,
+ SR_DMAR_FEDATA_REG,
+ SR_DMAR_FEADDR_REG,
+ SR_DMAR_FEUADDR_REG,
+ MAX_SR_DMAR_REGS
+};
+
+#define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0)
+#define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1)
+#define VTD_FLAG_SVM_CAPABLE (1 << 2)
+
+extern int intel_iommu_sm;
+
+#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
+#define pasid_supported(iommu) (sm_supported(iommu) && \
+ ecap_pasid((iommu)->ecap))
+
+struct pasid_entry;
+struct pasid_state_entry;
+struct page_req_dsc;
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+ u64 lo;
+ u64 hi;
+};
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+ u64 lo;
+ u64 hi;
+};
+
+/*
+ * When VT-d works in the scalable mode, it allows DMA translation to
+ * happen through either first level or second level page table. This
+ * bit marks that the DMA translation for the domain goes through the
+ * first level page table, otherwise, it goes through the second level.
+ */
+#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1)
+
+struct iommu_domain_info {
+ struct intel_iommu *iommu;
+ unsigned int refcnt; /* Refcount of devices per iommu */
+ u16 did; /* Domain ids per IOMMU. Use u16 since
+ * domain ids are 16 bit wide according
+ * to VT-d spec, section 9.3 */
+};
+
+struct dmar_domain {
+ int nid; /* node id */
+ struct xarray iommu_array; /* Attached IOMMU array */
+
+ u8 has_iotlb_device: 1;
+ u8 iommu_coherency: 1; /* indicate coherency of iommu access */
+ u8 force_snooping : 1; /* Create IOPTEs with snoop control */
+ u8 set_pte_snp:1;
+
+ spinlock_t lock; /* Protect device tracking lists */
+ struct list_head devices; /* all devices' list */
+
+ struct dma_pte *pgd; /* virtual address */
+ int gaw; /* max guest address width */
+
+ /* adjusted guest address width, 0 is level 2 30-bit */
+ int agaw;
+
+ int flags; /* flags to find out type of domain */
+ int iommu_superpage;/* Level of superpages supported:
+ 0 == 4KiB (no superpages), 1 == 2MiB,
+ 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
+ u64 max_addr; /* maximum mapped address */
+
+ struct iommu_domain domain; /* generic domain data structure for
+ iommu core */
+};
+
+struct intel_iommu {
+ void __iomem *reg; /* Pointer to hardware regs, virtual addr */
+ u64 reg_phys; /* physical address of hw register set */
+ u64 reg_size; /* size of hw register set */
+ u64 cap;
+ u64 ecap;
+ u64 vccap;
+ u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
+ raw_spinlock_t register_lock; /* protect register handling */
+ int seq_id; /* sequence id of the iommu */
+ int agaw; /* agaw of this iommu */
+ int msagaw; /* max sagaw of this iommu */
+ unsigned int irq, pr_irq;
+ u16 segment; /* PCI segment# */
+ unsigned char name[13]; /* Device Name */
+
+#ifdef CONFIG_INTEL_IOMMU
+ unsigned long *domain_ids; /* bitmap of domains */
+ spinlock_t lock; /* protect context, domain ids */
+ struct root_entry *root_entry; /* virtual address */
+
+ struct iommu_flush flush;
+#endif
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ struct page_req_dsc *prq;
+ unsigned char prq_name[16]; /* Name for PRQ interrupt */
+ struct completion prq_complete;
+ struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */
+#endif
+ struct iopf_queue *iopf_queue;
+ unsigned char iopfq_name[16];
+ struct q_inval *qi; /* Queued invalidation info */
+ u32 *iommu_state; /* Store iommu states between suspend and resume.*/
+
+#ifdef CONFIG_IRQ_REMAP
+ struct ir_table *ir_table; /* Interrupt remapping info */
+ struct irq_domain *ir_domain;
+ struct irq_domain *ir_msi_domain;
+#endif
+ struct iommu_device iommu; /* IOMMU core code handle */
+ int node;
+ u32 flags; /* Software defined flags */
+
+ struct dmar_drhd_unit *drhd;
+ void *perf_statistic;
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+ struct list_head link; /* link to domain siblings */
+ u32 segment; /* PCI segment number */
+ u8 bus; /* PCI bus number */
+ u8 devfn; /* PCI devfn number */
+ u16 pfsid; /* SRIOV physical function source ID */
+ u8 pasid_supported:3;
+ u8 pasid_enabled:1;
+ u8 pri_supported:1;
+ u8 pri_enabled:1;
+ u8 ats_supported:1;
+ u8 ats_enabled:1;
+ u8 ats_qdep;
+ struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
+ struct intel_iommu *iommu; /* IOMMU used by this device */
+ struct dmar_domain *domain; /* pointer to domain */
+ struct pasid_table *pasid_table; /* pasid table */
+};
+
+static inline void __iommu_flush_cache(
+ struct intel_iommu *iommu, void *addr, int size)
+{
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(addr, size);
+}
+
+/* Convert generic struct iommu_domain to private struct dmar_domain */
+static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
+{
+ return container_of(dom, struct dmar_domain, domain);
+}
+
+/* Retrieve the domain ID which has allocated to the domain */
+static inline u16
+domain_id_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
+{
+ struct iommu_domain_info *info =
+ xa_load(&domain->iommu_array, iommu->seq_id);
+
+ return info->did;
+}
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-10: available
+ * 11: snoop behavior
+ * 12-63: Host physical address
+ */
+struct dma_pte {
+ u64 val;
+};
+
+static inline void dma_clear_pte(struct dma_pte *pte)
+{
+ pte->val = 0;
+}
+
+static inline u64 dma_pte_addr(struct dma_pte *pte)
+{
+#ifdef CONFIG_64BIT
+ return pte->val & VTD_PAGE_MASK & (~DMA_FL_PTE_XD);
+#else
+ /* Must have a full atomic 64-bit read */
+ return __cmpxchg64(&pte->val, 0ULL, 0ULL) &
+ VTD_PAGE_MASK & (~DMA_FL_PTE_XD);
+#endif
+}
+
+static inline bool dma_pte_present(struct dma_pte *pte)
+{
+ return (pte->val & 3) != 0;
+}
+
+static inline bool dma_pte_superpage(struct dma_pte *pte)
+{
+ return (pte->val & DMA_PTE_LARGE_PAGE);
+}
+
+static inline bool first_pte_in_page(struct dma_pte *pte)
+{
+ return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE);
+}
+
+static inline int nr_pte_to_next_page(struct dma_pte *pte)
+{
+ return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) :
+ (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte;
+}
+
+extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+
+extern int dmar_enable_qi(struct intel_iommu *iommu);
+extern void dmar_disable_qi(struct intel_iommu *iommu);
+extern int dmar_reenable_qi(struct intel_iommu *iommu);
+extern void qi_global_iec(struct intel_iommu *iommu);
+
+extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
+ u8 fm, u64 type);
+extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type);
+extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u16 qdep, u64 addr, unsigned mask);
+
+void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
+ unsigned long npages, bool ih);
+
+void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u32 pasid, u16 qdep, u64 addr,
+ unsigned int size_order);
+void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,
+ u32 pasid);
+
+int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
+ unsigned int count, unsigned long options);
+/*
+ * Options used in qi_submit_sync:
+ * QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8.
+ */
+#define QI_OPT_WAIT_DRAIN BIT(0)
+
+extern int dmar_ir_support(void);
+
+void *alloc_pgtable_page(int node);
+void free_pgtable_page(void *vaddr);
+void iommu_flush_write_buffer(struct intel_iommu *iommu);
+int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev);
+struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+extern void intel_svm_check(struct intel_iommu *iommu);
+extern int intel_svm_enable_prq(struct intel_iommu *iommu);
+extern int intel_svm_finish_prq(struct intel_iommu *iommu);
+struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
+ void *drvdata);
+void intel_svm_unbind(struct iommu_sva *handle);
+u32 intel_svm_get_pasid(struct iommu_sva *handle);
+int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
+ struct iommu_page_response *msg);
+
+struct intel_svm_dev {
+ struct list_head list;
+ struct rcu_head rcu;
+ struct device *dev;
+ struct intel_iommu *iommu;
+ struct iommu_sva sva;
+ unsigned long prq_seq_number;
+ u32 pasid;
+ int users;
+ u16 did;
+ u16 dev_iotlb:1;
+ u16 sid, qdep;
+};
+
+struct intel_svm {
+ struct mmu_notifier notifier;
+ struct mm_struct *mm;
+
+ unsigned int flags;
+ u32 pasid;
+ struct list_head devs;
+};
+#else
+static inline void intel_svm_check(struct intel_iommu *iommu) {}
+#endif
+
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+void intel_iommu_debugfs_init(void);
+#else
+static inline void intel_iommu_debugfs_init(void) {}
+#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
+
+extern const struct attribute_group *intel_iommu_groups[];
+bool context_present(struct context_entry *context);
+struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+ u8 devfn, int alloc);
+
+extern const struct iommu_ops intel_iommu_ops;
+
+#ifdef CONFIG_INTEL_IOMMU
+extern int iommu_calculate_agaw(struct intel_iommu *iommu);
+extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
+extern int dmar_disabled;
+extern int intel_iommu_enabled;
+#else
+static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
+{
+ return 0;
+}
+static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
+{
+ return 0;
+}
+#define dmar_disabled (1)
+#define intel_iommu_enabled (0)
+#endif
+
+static inline const char *decode_prq_descriptor(char *str, size_t size,
+ u64 dw0, u64 dw1, u64 dw2, u64 dw3)
+{
+ char *buf = str;
+ int bytes;
+
+ bytes = snprintf(buf, size,
+ "rid=0x%llx addr=0x%llx %c%c%c%c%c pasid=0x%llx index=0x%llx",
+ FIELD_GET(GENMASK_ULL(31, 16), dw0),
+ FIELD_GET(GENMASK_ULL(63, 12), dw1),
+ dw1 & BIT_ULL(0) ? 'r' : '-',
+ dw1 & BIT_ULL(1) ? 'w' : '-',
+ dw0 & BIT_ULL(52) ? 'x' : '-',
+ dw0 & BIT_ULL(53) ? 'p' : '-',
+ dw1 & BIT_ULL(2) ? 'l' : '-',
+ FIELD_GET(GENMASK_ULL(51, 32), dw0),
+ FIELD_GET(GENMASK_ULL(11, 3), dw1));
+
+ /* Private Data */
+ if (dw0 & BIT_ULL(9)) {
+ size -= bytes;
+ buf += bytes;
+ snprintf(buf, size, " private=0x%llx/0x%llx\n", dw2, dw3);
+ }
+
+ return str;
+}
+
+#endif
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index a67319597884..2e9683e970f8 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -10,7 +10,6 @@
#include <linux/hpet.h>
#include <linux/pci.h>
#include <linux/irq.h>
-#include <linux/intel-iommu.h>
#include <linux/acpi.h>
#include <linux/irqdomain.h>
#include <linux/crash_dump.h>
@@ -21,6 +20,7 @@
#include <asm/irq_remapping.h>
#include <asm/pci-direct.h>
+#include "iommu.h"
#include "../irq_remapping.h"
#include "cap_audit.h"
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 17cad7c1f62d..c5e7e8b020a5 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -12,13 +12,13 @@
#include <linux/bitops.h>
#include <linux/cpufeature.h>
#include <linux/dmar.h>
-#include <linux/intel-iommu.h>
#include <linux/iommu.h>
#include <linux/memory.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/spinlock.h>
+#include "iommu.h"
#include "pasid.h"
/*
@@ -450,17 +450,17 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
struct pasid_entry *pte;
u16 did, pgtt;
+ spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
- if (WARN_ON(!pte))
- return;
-
- if (!pasid_pte_is_present(pte))
+ if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
return;
+ }
did = pasid_get_domain_id(pte);
pgtt = pasid_pte_get_pgtt(pte);
-
intel_pasid_clear_entry(dev, pasid, fault_ignore);
+ spin_unlock(&iommu->lock);
if (!ecap_coherent(iommu->ecap))
clflush_cache_range(pte, sizeof(*pte));
@@ -496,22 +496,6 @@ static void pasid_flush_caches(struct intel_iommu *iommu,
}
}
-static inline int pasid_enable_wpe(struct pasid_entry *pte)
-{
-#ifdef CONFIG_X86
- unsigned long cr0 = read_cr0();
-
- /* CR0.WP is normally set but just to be sure */
- if (unlikely(!(cr0 & X86_CR0_WP))) {
- pr_err_ratelimited("No CPU write protect!\n");
- return -EINVAL;
- }
-#endif
- pasid_set_wpe(pte);
-
- return 0;
-};
-
/*
* Set up the scalable mode pasid table entry for first only
* translation type.
@@ -528,39 +512,52 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
return -EINVAL;
}
- pte = intel_pasid_get_entry(dev, pasid);
- if (WARN_ON(!pte))
+ if (flags & PASID_FLAG_SUPERVISOR_MODE) {
+#ifdef CONFIG_X86
+ unsigned long cr0 = read_cr0();
+
+ /* CR0.WP is normally set but just to be sure */
+ if (unlikely(!(cr0 & X86_CR0_WP))) {
+ pr_err("No CPU write protect!\n");
+ return -EINVAL;
+ }
+#endif
+ if (!ecap_srs(iommu->ecap)) {
+ pr_err("No supervisor request support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+ }
+
+ if ((flags & PASID_FLAG_FL5LP) && !cap_5lp_support(iommu->cap)) {
+ pr_err("No 5-level paging support for first-level on %s\n",
+ iommu->name);
return -EINVAL;
+ }
- /* Caller must ensure PASID entry is not in use. */
- if (pasid_pte_is_present(pte))
+ spin_lock(&iommu->lock);
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ spin_unlock(&iommu->lock);
+ return -ENODEV;
+ }
+
+ if (pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
return -EBUSY;
+ }
pasid_clear_entry(pte);
/* Setup the first level page table pointer: */
pasid_set_flptr(pte, (u64)__pa(pgd));
if (flags & PASID_FLAG_SUPERVISOR_MODE) {
- if (!ecap_srs(iommu->ecap)) {
- pr_err("No supervisor request support on %s\n",
- iommu->name);
- return -EINVAL;
- }
pasid_set_sre(pte);
- if (pasid_enable_wpe(pte))
- return -EINVAL;
-
+ pasid_set_wpe(pte);
}
- if (flags & PASID_FLAG_FL5LP) {
- if (cap_5lp_support(iommu->cap)) {
- pasid_set_flpm(pte, 1);
- } else {
- pr_err("No 5-level paging support for first-level\n");
- pasid_clear_entry(pte);
- return -EINVAL;
- }
- }
+ if (flags & PASID_FLAG_FL5LP)
+ pasid_set_flpm(pte, 1);
if (flags & PASID_FLAG_PAGE_SNOOP)
pasid_set_pgsnp(pte);
@@ -572,6 +569,8 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
/* Setup Present and PASID Granular Transfer Type: */
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
pasid_set_present(pte);
+ spin_unlock(&iommu->lock);
+
pasid_flush_caches(iommu, pte, pasid, did);
return 0;
@@ -627,17 +626,19 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
}
pgd_val = virt_to_phys(pgd);
- did = domain->iommu_did[iommu->seq_id];
+ did = domain_id_iommu(domain, iommu);
+ spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
- dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+ spin_unlock(&iommu->lock);
return -ENODEV;
}
- /* Caller must ensure PASID entry is not in use. */
- if (pasid_pte_is_present(pte))
+ if (pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
return -EBUSY;
+ }
pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);
@@ -654,6 +655,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
if (pasid != PASID_RID2PASID)
pasid_set_sre(pte);
pasid_set_present(pte);
+ spin_unlock(&iommu->lock);
+
pasid_flush_caches(iommu, pte, pasid, did);
return 0;
@@ -669,15 +672,17 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
u16 did = FLPT_DEFAULT_DID;
struct pasid_entry *pte;
+ spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
- dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+ spin_unlock(&iommu->lock);
return -ENODEV;
}
- /* Caller must ensure PASID entry is not in use. */
- if (pasid_pte_is_present(pte))
+ if (pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
return -EBUSY;
+ }
pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);
@@ -692,6 +697,8 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
*/
pasid_set_sre(pte);
pasid_set_present(pte);
+ spin_unlock(&iommu->lock);
+
pasid_flush_caches(iommu, pte, pasid, did);
return 0;
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index bf5b937848b4..20c54e50f533 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -39,6 +39,7 @@
* only and pass-through transfer modes.
*/
#define FLPT_DEFAULT_DID 1
+#define NUM_RESERVED_DID 2
/*
* The SUPERVISOR_MODE flag indicates a first level translation which
diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c
index 0e8e03252d92..94ee70ac38e3 100644
--- a/drivers/iommu/intel/perf.c
+++ b/drivers/iommu/intel/perf.c
@@ -9,8 +9,8 @@
*/
#include <linux/spinlock.h>
-#include <linux/intel-iommu.h>
+#include "iommu.h"
#include "perf.h"
static DEFINE_SPINLOCK(latency_lock);
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 7ee37d996e15..8bcfb93dda56 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -5,7 +5,6 @@
* Authors: David Woodhouse <dwmw2@infradead.org>
*/
-#include <linux/intel-iommu.h>
#include <linux/mmu_notifier.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
@@ -21,11 +20,12 @@
#include <linux/ioasid.h>
#include <asm/page.h>
#include <asm/fpu/api.h>
-#include <trace/events/intel_iommu.h>
+#include "iommu.h"
#include "pasid.h"
#include "perf.h"
#include "../iommu-sva-lib.h"
+#include "trace.h"
static irqreturn_t prq_event_thread(int irq, void *d);
static void intel_svm_drain_prq(struct device *dev, u32 pasid);
@@ -328,9 +328,9 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
unsigned int flags)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
- unsigned long iflags, sflags;
struct intel_svm_dev *sdev;
struct intel_svm *svm;
+ unsigned long sflags;
int ret = 0;
svm = pasid_private_find(mm->pasid);
@@ -394,11 +394,8 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
PASID_FLAG_SUPERVISOR_MODE : 0;
sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
- spin_lock_irqsave(&iommu->lock, iflags);
ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
FLPT_DEFAULT_DID, sflags);
- spin_unlock_irqrestore(&iommu->lock, iflags);
-
if (ret)
goto free_sdev;
@@ -544,7 +541,7 @@ static void intel_svm_drain_prq(struct device *dev, u32 pasid)
domain = info->domain;
pdev = to_pci_dev(dev);
sid = PCI_DEVID(info->bus, info->devfn);
- did = domain->iommu_did[iommu->seq_id];
+ did = domain_id_iommu(domain, iommu);
qdep = pci_ats_queue_depth(pdev);
/*
diff --git a/drivers/iommu/intel/trace.c b/drivers/iommu/intel/trace.c
index bfb6a6e37a88..117e626e3ea9 100644
--- a/drivers/iommu/intel/trace.c
+++ b/drivers/iommu/intel/trace.c
@@ -11,4 +11,4 @@
#include <linux/types.h>
#define CREATE_TRACE_POINTS
-#include <trace/events/intel_iommu.h>
+#include "trace.h"
diff --git a/drivers/iommu/intel/trace.h b/drivers/iommu/intel/trace.h
new file mode 100644
index 000000000000..93d96f93a89b
--- /dev/null
+++ b/drivers/iommu/intel/trace.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel IOMMU trace support
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM intel_iommu
+
+#if !defined(_TRACE_INTEL_IOMMU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_INTEL_IOMMU_H
+
+#include <linux/tracepoint.h>
+
+#include "iommu.h"
+
+#define MSG_MAX 256
+
+TRACE_EVENT(qi_submit,
+ TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3),
+
+ TP_ARGS(iommu, qw0, qw1, qw2, qw3),
+
+ TP_STRUCT__entry(
+ __field(u64, qw0)
+ __field(u64, qw1)
+ __field(u64, qw2)
+ __field(u64, qw3)
+ __string(iommu, iommu->name)
+ ),
+
+ TP_fast_assign(
+ __assign_str(iommu, iommu->name);
+ __entry->qw0 = qw0;
+ __entry->qw1 = qw1;
+ __entry->qw2 = qw2;
+ __entry->qw3 = qw3;
+ ),
+
+ TP_printk("%s %s: 0x%llx 0x%llx 0x%llx 0x%llx",
+ __print_symbolic(__entry->qw0 & 0xf,
+ { QI_CC_TYPE, "cc_inv" },
+ { QI_IOTLB_TYPE, "iotlb_inv" },
+ { QI_DIOTLB_TYPE, "dev_tlb_inv" },
+ { QI_IEC_TYPE, "iec_inv" },
+ { QI_IWD_TYPE, "inv_wait" },
+ { QI_EIOTLB_TYPE, "p_iotlb_inv" },
+ { QI_PC_TYPE, "pc_inv" },
+ { QI_DEIOTLB_TYPE, "p_dev_tlb_inv" },
+ { QI_PGRP_RESP_TYPE, "page_grp_resp" }),
+ __get_str(iommu),
+ __entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3
+ )
+);
+
+TRACE_EVENT(prq_report,
+ TP_PROTO(struct intel_iommu *iommu, struct device *dev,
+ u64 dw0, u64 dw1, u64 dw2, u64 dw3,
+ unsigned long seq),
+
+ TP_ARGS(iommu, dev, dw0, dw1, dw2, dw3, seq),
+
+ TP_STRUCT__entry(
+ __field(u64, dw0)
+ __field(u64, dw1)
+ __field(u64, dw2)
+ __field(u64, dw3)
+ __field(unsigned long, seq)
+ __string(iommu, iommu->name)
+ __string(dev, dev_name(dev))
+ __dynamic_array(char, buff, MSG_MAX)
+ ),
+
+ TP_fast_assign(
+ __entry->dw0 = dw0;
+ __entry->dw1 = dw1;
+ __entry->dw2 = dw2;
+ __entry->dw3 = dw3;
+ __entry->seq = seq;
+ __assign_str(iommu, iommu->name);
+ __assign_str(dev, dev_name(dev));
+ ),
+
+ TP_printk("%s/%s seq# %ld: %s",
+ __get_str(iommu), __get_str(dev), __entry->seq,
+ decode_prq_descriptor(__get_str(buff), MSG_MAX, __entry->dw0,
+ __entry->dw1, __entry->dw2, __entry->dw3)
+ )
+);
+#endif /* _TRACE_INTEL_IOMMU_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ../../drivers/iommu/intel/
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index be066c1503d3..ba3115fd0f86 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -182,14 +182,8 @@ static bool arm_v7s_is_mtk_enabled(struct io_pgtable_cfg *cfg)
(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT);
}
-static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
- struct io_pgtable_cfg *cfg)
+static arm_v7s_iopte to_mtk_iopte(phys_addr_t paddr, arm_v7s_iopte pte)
{
- arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
-
- if (!arm_v7s_is_mtk_enabled(cfg))
- return pte;
-
if (paddr & BIT_ULL(32))
pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
if (paddr & BIT_ULL(33))
@@ -199,6 +193,17 @@ static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
return pte;
}
+static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
+ struct io_pgtable_cfg *cfg)
+{
+ arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
+
+ if (arm_v7s_is_mtk_enabled(cfg))
+ return to_mtk_iopte(paddr, pte);
+
+ return pte;
+}
+
static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
struct io_pgtable_cfg *cfg)
{
@@ -240,10 +245,17 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
dma_addr_t dma;
size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg);
void *table = NULL;
+ gfp_t gfp_l1;
+
+ /*
+ * ARM_MTK_TTBR_EXT extend the translation table base support larger
+ * memory address.
+ */
+ gfp_l1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT ?
+ GFP_KERNEL : ARM_V7S_TABLE_GFP_DMA;
if (lvl == 1)
- table = (void *)__get_free_pages(
- __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size));
+ table = (void *)__get_free_pages(gfp_l1 | __GFP_ZERO, get_order(size));
else if (lvl == 2)
table = kmem_cache_zalloc(data->l2_tables, gfp);
@@ -251,7 +263,8 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
return NULL;
phys = virt_to_phys(table);
- if (phys != (arm_v7s_iopte)phys) {
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT ?
+ phys >= (1ULL << cfg->oas) : phys != (arm_v7s_iopte)phys) {
/* Doesn't fit in PTE */
dev_err(dev, "Page table does not fit in PTE: %pa", &phys);
goto out_free;
@@ -457,9 +470,14 @@ static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table,
arm_v7s_iopte curr,
struct io_pgtable_cfg *cfg)
{
+ phys_addr_t phys = virt_to_phys(table);
arm_v7s_iopte old, new;
- new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE;
+ new = phys | ARM_V7S_PTE_TYPE_TABLE;
+
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT)
+ new = to_mtk_iopte(phys, new);
+
if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
new |= ARM_V7S_ATTR_NS_TABLE;
@@ -779,6 +797,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
void *cookie)
{
struct arm_v7s_io_pgtable *data;
+ slab_flags_t slab_flag;
+ phys_addr_t paddr;
if (cfg->ias > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
return NULL;
@@ -788,7 +808,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
IO_PGTABLE_QUIRK_NO_PERMS |
- IO_PGTABLE_QUIRK_ARM_MTK_EXT))
+ IO_PGTABLE_QUIRK_ARM_MTK_EXT |
+ IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT))
return NULL;
/* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
@@ -796,15 +817,27 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
!(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS))
return NULL;
+ if ((cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT) &&
+ !arm_v7s_is_mtk_enabled(cfg))
+ return NULL;
+
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return NULL;
spin_lock_init(&data->split_lock);
+
+ /*
+ * ARM_MTK_TTBR_EXT extend the translation table base support larger
+ * memory address.
+ */
+ slab_flag = cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT ?
+ 0 : ARM_V7S_TABLE_SLAB_FLAGS;
+
data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
ARM_V7S_TABLE_SIZE(2, cfg),
ARM_V7S_TABLE_SIZE(2, cfg),
- ARM_V7S_TABLE_SLAB_FLAGS, NULL);
+ slab_flag, NULL);
if (!data->l2_tables)
goto out_free_data;
@@ -850,12 +883,16 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
wmb();
/* TTBR */
- cfg->arm_v7s_cfg.ttbr = virt_to_phys(data->pgd) | ARM_V7S_TTBR_S |
- (cfg->coherent_walk ? (ARM_V7S_TTBR_NOS |
- ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
- ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) :
- (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) |
- ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC)));
+ paddr = virt_to_phys(data->pgd);
+ if (arm_v7s_is_mtk_enabled(cfg))
+ cfg->arm_v7s_cfg.ttbr = paddr | upper_32_bits(paddr);
+ else
+ cfg->arm_v7s_cfg.ttbr = paddr | ARM_V7S_TTBR_S |
+ (cfg->coherent_walk ? (ARM_V7S_TTBR_NOS |
+ ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
+ ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) :
+ (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) |
+ ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC)));
return &data->iop;
out_free_data:
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 847ad47a2dfd..f53f8b2d27a5 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -259,7 +259,8 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
return 0;
out_release:
- ops->release_device(dev);
+ if (ops->release_device)
+ ops->release_device(dev);
out_module_put:
module_put(ops->owner);
@@ -272,7 +273,7 @@ err_free:
int iommu_probe_device(struct device *dev)
{
- const struct iommu_ops *ops = dev->bus->iommu_ops;
+ const struct iommu_ops *ops;
struct iommu_group *group;
int ret;
@@ -313,6 +314,7 @@ int iommu_probe_device(struct device *dev)
mutex_unlock(&group->mutex);
iommu_group_put(group);
+ ops = dev_iommu_ops(dev);
if (ops->probe_finalize)
ops->probe_finalize(dev);
@@ -336,7 +338,8 @@ void iommu_release_device(struct device *dev)
iommu_device_unlink(dev->iommu->iommu_dev, dev);
ops = dev_iommu_ops(dev);
- ops->release_device(dev);
+ if (ops->release_device)
+ ops->release_device(dev);
iommu_group_remove_device(dev);
module_put(ops->owner);
@@ -600,7 +603,7 @@ static void iommu_group_release(struct kobject *kobj)
if (group->iommu_data_release)
group->iommu_data_release(group->iommu_data);
- ida_simple_remove(&iommu_group_ida, group->id);
+ ida_free(&iommu_group_ida, group->id);
if (group->default_domain)
iommu_domain_free(group->default_domain);
@@ -641,7 +644,7 @@ struct iommu_group *iommu_group_alloc(void)
INIT_LIST_HEAD(&group->devices);
INIT_LIST_HEAD(&group->entry);
- ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL);
+ ret = ida_alloc(&iommu_group_ida, GFP_KERNEL);
if (ret < 0) {
kfree(group);
return ERR_PTR(ret);
@@ -651,7 +654,7 @@ struct iommu_group *iommu_group_alloc(void)
ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
NULL, "%d", group->id);
if (ret) {
- ida_simple_remove(&iommu_group_ida, group->id);
+ ida_free(&iommu_group_ida, group->id);
kobject_put(&group->kobj);
return ERR_PTR(ret);
}
@@ -2576,32 +2579,25 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list)
ops->get_resv_regions(dev, list);
}
-void iommu_put_resv_regions(struct device *dev, struct list_head *list)
-{
- const struct iommu_ops *ops = dev_iommu_ops(dev);
-
- if (ops->put_resv_regions)
- ops->put_resv_regions(dev, list);
-}
-
/**
- * generic_iommu_put_resv_regions - Reserved region driver helper
+ * iommu_put_resv_regions - release resered regions
* @dev: device for which to free reserved regions
* @list: reserved region list for device
*
- * IOMMU drivers can use this to implement their .put_resv_regions() callback
- * for simple reservations. Memory allocated for each reserved region will be
- * freed. If an IOMMU driver allocates additional resources per region, it is
- * going to have to implement a custom callback.
+ * This releases a reserved region list acquired by iommu_get_resv_regions().
*/
-void generic_iommu_put_resv_regions(struct device *dev, struct list_head *list)
+void iommu_put_resv_regions(struct device *dev, struct list_head *list)
{
struct iommu_resv_region *entry, *next;
- list_for_each_entry_safe(entry, next, list, list)
- kfree(entry);
+ list_for_each_entry_safe(entry, next, list, list) {
+ if (entry->free)
+ entry->free(dev, entry);
+ else
+ kfree(entry);
+ }
}
-EXPORT_SYMBOL(generic_iommu_put_resv_regions);
+EXPORT_SYMBOL(iommu_put_resv_regions);
struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
size_t length, int prot,
@@ -2751,19 +2747,6 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
}
EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
-bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat)
-{
- if (dev->iommu && dev->iommu->iommu_dev) {
- const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
-
- if (ops->dev_feat_enabled)
- return ops->dev_feat_enabled(dev, feat);
- }
-
- return false;
-}
-EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled);
-
/**
* iommu_sva_bind_device() - Bind a process address space to a device
* @dev: the device
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index db77aa675145..e44f565c5319 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -614,7 +614,12 @@ EXPORT_SYMBOL_GPL(reserve_iova);
* dynamic size tuning described in the paper.
*/
-#define IOVA_MAG_SIZE 128
+/*
+ * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to
+ * assure size of 'iova_magazine' to be 1024 bytes, so that no memory
+ * will be wasted.
+ */
+#define IOVA_MAG_SIZE 127
#define MAX_GLOBAL_MAGS 32 /* magazines per bin */
struct iova_magazine {
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index f09aedfdd462..6a24aa804ea3 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -394,10 +394,6 @@ static struct iommu_device *msm_iommu_probe_device(struct device *dev)
return &iommu->iommu;
}
-static void msm_iommu_release_device(struct device *dev)
-{
-}
-
static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
int ret = 0;
@@ -603,7 +599,7 @@ static int insert_iommu_master(struct device *dev,
for (sid = 0; sid < master->num_mids; sid++)
if (master->mids[sid] == spec->args[0]) {
- dev_warn(dev, "Stream ID 0x%hx repeated; ignoring\n",
+ dev_warn(dev, "Stream ID 0x%x repeated; ignoring\n",
sid);
return 0;
}
@@ -677,7 +673,6 @@ fail:
static struct iommu_ops msm_iommu_ops = {
.domain_alloc = msm_iommu_domain_alloc,
.probe_device = msm_iommu_probe_device,
- .release_device = msm_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = MSM_IOMMU_PGSIZES,
.of_xlate = qcom_iommu_of_xlate,
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index bb9dd92c9898..7e363b1f24df 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -34,7 +34,6 @@
#include <dt-bindings/memory/mtk-memory-port.h>
#define REG_MMU_PT_BASE_ADDR 0x000
-#define MMU_PT_ADDR_MASK GENMASK(31, 7)
#define REG_MMU_INVALIDATE 0x020
#define F_ALL_INVLD 0x2
@@ -138,6 +137,7 @@
/* PM and clock always on. e.g. infra iommu */
#define PM_CLK_AO BIT(15)
#define IFA_IOMMU_PCIE_SUPPORT BIT(16)
+#define PGTABLE_PA_35_EN BIT(17)
#define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \
((((pdata)->flags) & (mask)) == (_x))
@@ -596,6 +596,9 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom,
.iommu_dev = data->dev,
};
+ if (MTK_IOMMU_HAS_FLAG(data->plat_data, PGTABLE_PA_35_EN))
+ dom->cfg.quirks |= IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT;
+
if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE))
dom->cfg.oas = data->enable_4GB ? 33 : 32;
else
@@ -684,8 +687,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
goto err_unlock;
}
bank->m4u_dom = dom;
- writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK,
- bank->base + REG_MMU_PT_BASE_ADDR);
+ writel(dom->cfg.arm_v7s_cfg.ttbr, bank->base + REG_MMU_PT_BASE_ADDR);
pm_runtime_put(m4udev);
}
@@ -819,17 +821,12 @@ static void mtk_iommu_release_device(struct device *dev)
struct device *larbdev;
unsigned int larbid;
- if (!fwspec || fwspec->ops != &mtk_iommu_ops)
- return;
-
data = dev_iommu_priv_get(dev);
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
larbid = MTK_M4U_TO_LARB(fwspec->ids[0]);
larbdev = data->larb_imu[larbid].dev;
device_link_remove(dev, larbdev);
}
-
- iommu_fwspec_free(dev);
}
static int mtk_iommu_get_group_id(struct device *dev, const struct mtk_iommu_plat_data *plat_data)
@@ -933,7 +930,6 @@ static const struct iommu_ops mtk_iommu_ops = {
.device_group = mtk_iommu_device_group,
.of_xlate = mtk_iommu_of_xlate,
.get_resv_regions = mtk_iommu_get_resv_regions,
- .put_resv_regions = generic_iommu_put_resv_regions,
.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
@@ -1140,22 +1136,32 @@ static int mtk_iommu_probe(struct platform_device *pdev)
data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN);
if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE)) {
- switch (data->plat_data->m4u_plat) {
- case M4U_MT2712:
- p = "mediatek,mt2712-infracfg";
- break;
- case M4U_MT8173:
- p = "mediatek,mt8173-infracfg";
- break;
- default:
- p = NULL;
+ infracfg = syscon_regmap_lookup_by_phandle(dev->of_node, "mediatek,infracfg");
+ if (IS_ERR(infracfg)) {
+ /*
+ * Legacy devicetrees will not specify a phandle to
+ * mediatek,infracfg: in that case, we use the older
+ * way to retrieve a syscon to infra.
+ *
+ * This is for retrocompatibility purposes only, hence
+ * no more compatibles shall be added to this.
+ */
+ switch (data->plat_data->m4u_plat) {
+ case M4U_MT2712:
+ p = "mediatek,mt2712-infracfg";
+ break;
+ case M4U_MT8173:
+ p = "mediatek,mt8173-infracfg";
+ break;
+ default:
+ p = NULL;
+ }
+
+ infracfg = syscon_regmap_lookup_by_compatible(p);
+ if (IS_ERR(infracfg))
+ return PTR_ERR(infracfg);
}
- infracfg = syscon_regmap_lookup_by_compatible(p);
-
- if (IS_ERR(infracfg))
- return PTR_ERR(infracfg);
-
ret = regmap_read(infracfg, REG_INFRA_MISC, &val);
if (ret)
return ret;
@@ -1204,18 +1210,16 @@ static int mtk_iommu_probe(struct platform_device *pdev)
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
ret = mtk_iommu_mm_dts_parse(dev, &match, data);
if (ret) {
- dev_err(dev, "mm dts parse fail(%d).", ret);
+ dev_err_probe(dev, ret, "mm dts parse fail\n");
goto out_runtime_disable;
}
- } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA) &&
- data->plat_data->pericfg_comp_str) {
- infracfg = syscon_regmap_lookup_by_compatible(data->plat_data->pericfg_comp_str);
- if (IS_ERR(infracfg)) {
- ret = PTR_ERR(infracfg);
+ } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) {
+ p = data->plat_data->pericfg_comp_str;
+ data->pericfg = syscon_regmap_lookup_by_compatible(p);
+ if (IS_ERR(data->pericfg)) {
+ ret = PTR_ERR(data->pericfg);
goto out_runtime_disable;
}
-
- data->pericfg = infracfg;
}
platform_set_drvdata(pdev, data);
@@ -1366,8 +1370,7 @@ static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev)
writel_relaxed(reg->int_control[i], base + REG_MMU_INT_CONTROL0);
writel_relaxed(reg->int_main_control[i], base + REG_MMU_INT_MAIN_CONTROL);
writel_relaxed(reg->ivrp_paddr[i], base + REG_MMU_IVRP_PADDR);
- writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK,
- base + REG_MMU_PT_BASE_ADDR);
+ writel(m4u_dom->cfg.arm_v7s_cfg.ttbr, base + REG_MMU_PT_BASE_ADDR);
} while (++i < data->plat_data->banks_num);
/*
@@ -1401,7 +1404,7 @@ static const struct mtk_iommu_plat_data mt2712_data = {
static const struct mtk_iommu_plat_data mt6779_data = {
.m4u_plat = M4U_MT6779,
.flags = HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | WR_THROT_EN |
- MTK_IOMMU_TYPE_MM,
+ MTK_IOMMU_TYPE_MM | PGTABLE_PA_35_EN,
.inv_sel_reg = REG_MMU_INV_SEL_GEN2,
.banks_num = 1,
.banks_enable = {true},
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index e1cb51b9866c..128c7a3f1778 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -532,15 +532,10 @@ static void mtk_iommu_v1_release_device(struct device *dev)
struct device *larbdev;
unsigned int larbid;
- if (!fwspec || fwspec->ops != &mtk_iommu_v1_ops)
- return;
-
data = dev_iommu_priv_get(dev);
larbid = mt2701_m4u_to_larb(fwspec->ids[0]);
larbdev = data->larb_imu[larbid].dev;
device_link_remove(dev, larbdev);
-
- iommu_fwspec_free(dev);
}
static int mtk_iommu_v1_hw_init(const struct mtk_iommu_v1_data *data)
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index bd409bab6286..511959c8a14d 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -383,16 +383,6 @@ static struct iommu_device *sprd_iommu_probe_device(struct device *dev)
return &sdev->iommu;
}
-static void sprd_iommu_release_device(struct device *dev)
-{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
-
- if (!fwspec || fwspec->ops != &sprd_iommu_ops)
- return;
-
- iommu_fwspec_free(dev);
-}
-
static struct iommu_group *sprd_iommu_device_group(struct device *dev)
{
struct sprd_iommu_device *sdev = dev_iommu_priv_get(dev);
@@ -417,7 +407,6 @@ static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
static const struct iommu_ops sprd_iommu_ops = {
.domain_alloc = sprd_iommu_domain_alloc,
.probe_device = sprd_iommu_probe_device,
- .release_device = sprd_iommu_release_device,
.device_group = sprd_iommu_device_group,
.of_xlate = sprd_iommu_of_xlate,
.pgsize_bitmap = ~0UL << SPRD_IOMMU_PAGE_SHIFT,
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index c54ab477b8fd..a84c63518773 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -738,8 +738,6 @@ static struct iommu_device *sun50i_iommu_probe_device(struct device *dev)
return &iommu->iommu;
}
-static void sun50i_iommu_release_device(struct device *dev) {}
-
static struct iommu_group *sun50i_iommu_device_group(struct device *dev)
{
struct sun50i_iommu *iommu = sun50i_iommu_from_dev(dev);
@@ -764,7 +762,6 @@ static const struct iommu_ops sun50i_iommu_ops = {
.domain_alloc = sun50i_iommu_domain_alloc,
.of_xlate = sun50i_iommu_of_xlate,
.probe_device = sun50i_iommu_probe_device,
- .release_device = sun50i_iommu_release_device,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = sun50i_iommu_attach_device,
.detach_dev = sun50i_iommu_detach_device,
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index a6700a40a6f8..e5ca3cf1a949 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -246,10 +246,6 @@ static struct iommu_device *gart_iommu_probe_device(struct device *dev)
return &gart_handle->iommu;
}
-static void gart_iommu_release_device(struct device *dev)
-{
-}
-
static int gart_iommu_of_xlate(struct device *dev,
struct of_phandle_args *args)
{
@@ -273,7 +269,6 @@ static void gart_iommu_sync(struct iommu_domain *domain,
static const struct iommu_ops gart_iommu_ops = {
.domain_alloc = gart_iommu_domain_alloc,
.probe_device = gart_iommu_probe_device,
- .release_device = gart_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = GART_IOMMU_PGSIZES,
.of_xlate = gart_iommu_of_xlate,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 1fea68e551f1..2a8de975fe63 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -864,8 +864,6 @@ static struct iommu_device *tegra_smmu_probe_device(struct device *dev)
return &smmu->iommu;
}
-static void tegra_smmu_release_device(struct device *dev) {}
-
static const struct tegra_smmu_group_soc *
tegra_smmu_find_group(struct tegra_smmu *smmu, unsigned int swgroup)
{
@@ -966,7 +964,6 @@ static int tegra_smmu_of_xlate(struct device *dev,
static const struct iommu_ops tegra_smmu_ops = {
.domain_alloc = tegra_smmu_domain_alloc,
.probe_device = tegra_smmu_probe_device,
- .release_device = tegra_smmu_release_device,
.device_group = tegra_smmu_device_group,
.of_xlate = tegra_smmu_of_xlate,
.pgsize_bitmap = SZ_4K,
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 25be4b822aa0..08eeafc9529f 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -788,11 +788,13 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
return 0;
}
-static int viommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int viommu_map_pages(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped)
{
int ret;
u32 flags;
+ size_t size = pgsize * pgcount;
u64 end = iova + size - 1;
struct virtio_iommu_req_map map;
struct viommu_domain *vdomain = to_viommu_domain(domain);
@@ -823,17 +825,21 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova,
ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
if (ret)
viommu_del_mappings(vdomain, iova, end);
+ else if (mapped)
+ *mapped = size;
return ret;
}
-static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t size, struct iommu_iotlb_gather *gather)
+static size_t viommu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
{
int ret = 0;
size_t unmapped;
struct virtio_iommu_req_unmap unmap;
struct viommu_domain *vdomain = to_viommu_domain(domain);
+ size_t size = pgsize * pgcount;
unmapped = viommu_del_mappings(vdomain, iova, iova + size - 1);
if (unmapped < size)
@@ -964,7 +970,7 @@ static struct iommu_device *viommu_probe_device(struct device *dev)
return &viommu->iommu;
err_free_dev:
- generic_iommu_put_resv_regions(dev, &vdev->resv_regions);
+ iommu_put_resv_regions(dev, &vdev->resv_regions);
kfree(vdev);
return ERR_PTR(ret);
@@ -981,15 +987,9 @@ static void viommu_probe_finalize(struct device *dev)
static void viommu_release_device(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct viommu_endpoint *vdev;
-
- if (!fwspec || fwspec->ops != &viommu_ops)
- return;
-
- vdev = dev_iommu_priv_get(dev);
+ struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
- generic_iommu_put_resv_regions(dev, &vdev->resv_regions);
+ iommu_put_resv_regions(dev, &vdev->resv_regions);
kfree(vdev);
}
@@ -1013,13 +1013,12 @@ static struct iommu_ops viommu_ops = {
.release_device = viommu_release_device,
.device_group = viommu_device_group,
.get_resv_regions = viommu_get_resv_regions,
- .put_resv_regions = generic_iommu_put_resv_regions,
.of_xlate = viommu_of_xlate,
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = viommu_attach_dev,
- .map = viommu_map,
- .unmap = viommu_unmap,
+ .map_pages = viommu_map_pages,
+ .unmap_pages = viommu_unmap_pages,
.iova_to_phys = viommu_iova_to_phys,
.iotlb_sync = viommu_iotlb_sync,
.free = viommu_domain_free,