summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoerg Roedel <jroedel@suse.de>2023-01-25 11:54:58 +0100
committerJoerg Roedel <jroedel@suse.de>2023-01-25 11:54:58 +0100
commitff489fe002ab20daf0985f119682474cfda83496 (patch)
treeee367a7703f1570680524e4fc6dafc7e11a0bc09
parent5cef282e295f7cf623672470d040716d1e3eacf2 (diff)
parent429f27e36874e34727a1f8495be2ea3a7060732c (diff)
Merge branch 'iommu-memory-accounting' into core
Merge patch-set from Jason: "Let iommufd charge IOPTE allocations to the memory cgroup" Description: IOMMUFD follows the same design as KVM and uses memory cgroups to limit the amount of kernel memory a iommufd file descriptor can pin down. The various internal data structures already use GFP_KERNEL_ACCOUNT to charge its own memory. However, one of the biggest consumers of kernel memory is the IOPTEs stored under the iommu_domain and these allocations are not tracked. This series is the first step in fixing it. The iommu driver contract already includes a 'gfp' argument to the map_pages op, allowing iommufd to specify GFP_KERNEL_ACCOUNT and then having the driver allocate the IOPTE tables with that flag will capture a significant amount of the allocations. Update the iommu_map() API to pass in the GFP argument, and fix all call sites. Replace iommu_map_atomic(). Audit the "enterprise" iommu drivers to make sure they do the right thing. Intel and S390 ignore the GFP argument and always use GFP_ATOMIC. This is problematic for iommufd anyhow, so fix it. AMD and ARM SMMUv2/3 are already correct. A follow up series will be needed to capture the allocations made when the iommu_domain itself is allocated, which will complete the job. Link: https://lore.kernel.org/linux-iommu/0-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com/
-rw-r--r--arch/arm/mm/dma-mapping.c11
-rw-r--r--arch/s390/include/asm/pci_dma.h5
-rw-r--r--arch/s390/pci/pci_dma.c31
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c3
-rw-r--r--drivers/gpu/drm/tegra/drm.c2
-rw-r--r--drivers/gpu/host1x/cdma.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c4
-rw-r--r--drivers/iommu/dma-iommu.c18
-rw-r--r--drivers/iommu/intel/iommu.c36
-rw-r--r--drivers/iommu/intel/iommu.h2
-rw-r--r--drivers/iommu/intel/pasid.c2
-rw-r--r--drivers/iommu/iommu.c53
-rw-r--r--drivers/iommu/iommufd/pages.c6
-rw-r--r--drivers/iommu/s390-iommu.c15
-rw-r--r--drivers/media/platform/qcom/venus/firmware.c2
-rw-r--r--drivers/net/ipa/ipa_mem.c6
-rw-r--r--drivers/net/wireless/ath/ath10k/snoc.c2
-rw-r--r--drivers/net/wireless/ath/ath11k/ahb.c4
-rw-r--r--drivers/remoteproc/remoteproc_core.c5
-rw-r--r--drivers/vfio/vfio_iommu_type1.c9
-rw-r--r--drivers/vhost/vdpa.c2
-rw-r--r--include/linux/iommu.h31
22 files changed, 126 insertions, 125 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c135f6e37a00..8bc01071474a 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -984,7 +984,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size,
len = (j - i) << PAGE_SHIFT;
ret = iommu_map(mapping->domain, iova, phys, len,
- __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs));
+ __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs),
+ GFP_KERNEL);
if (ret < 0)
goto fail;
iova += len;
@@ -1207,7 +1208,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
prot = __dma_info_to_prot(dir, attrs);
- ret = iommu_map(mapping->domain, iova, phys, len, prot);
+ ret = iommu_map(mapping->domain, iova, phys, len, prot,
+ GFP_KERNEL);
if (ret < 0)
goto fail;
count += len >> PAGE_SHIFT;
@@ -1379,7 +1381,8 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
prot = __dma_info_to_prot(dir, attrs);
- ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot);
+ ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len,
+ prot, GFP_KERNEL);
if (ret < 0)
goto fail;
@@ -1443,7 +1446,7 @@ static dma_addr_t arm_iommu_map_resource(struct device *dev,
prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO;
- ret = iommu_map(mapping->domain, dma_addr, addr, len, prot);
+ ret = iommu_map(mapping->domain, dma_addr, addr, len, prot, GFP_KERNEL);
if (ret < 0)
goto fail;
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 91e63426bdc5..7119c04c51c5 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -186,9 +186,10 @@ static inline unsigned long *get_st_pto(unsigned long entry)
/* Prototypes */
void dma_free_seg_table(unsigned long);
-unsigned long *dma_alloc_cpu_table(void);
+unsigned long *dma_alloc_cpu_table(gfp_t gfp);
void dma_cleanup_tables(unsigned long *);
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr);
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
+ gfp_t gfp);
void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
extern const struct dma_map_ops s390_pci_dma_ops;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index ea478d11fbd1..2d9b01d7ca4c 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -27,11 +27,11 @@ static int zpci_refresh_global(struct zpci_dev *zdev)
zdev->iommu_pages * PAGE_SIZE);
}
-unsigned long *dma_alloc_cpu_table(void)
+unsigned long *dma_alloc_cpu_table(gfp_t gfp)
{
unsigned long *table, *entry;
- table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
+ table = kmem_cache_alloc(dma_region_table_cache, gfp);
if (!table)
return NULL;
@@ -45,11 +45,11 @@ static void dma_free_cpu_table(void *table)
kmem_cache_free(dma_region_table_cache, table);
}
-static unsigned long *dma_alloc_page_table(void)
+static unsigned long *dma_alloc_page_table(gfp_t gfp)
{
unsigned long *table, *entry;
- table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
+ table = kmem_cache_alloc(dma_page_table_cache, gfp);
if (!table)
return NULL;
@@ -63,7 +63,7 @@ static void dma_free_page_table(void *table)
kmem_cache_free(dma_page_table_cache, table);
}
-static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
+static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
{
unsigned long old_rte, rte;
unsigned long *sto;
@@ -72,7 +72,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
if (reg_entry_isvalid(rte)) {
sto = get_rt_sto(rte);
} else {
- sto = dma_alloc_cpu_table();
+ sto = dma_alloc_cpu_table(gfp);
if (!sto)
return NULL;
@@ -90,7 +90,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
return sto;
}
-static unsigned long *dma_get_page_table_origin(unsigned long *step)
+static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
{
unsigned long old_ste, ste;
unsigned long *pto;
@@ -99,7 +99,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step)
if (reg_entry_isvalid(ste)) {
pto = get_st_pto(ste);
} else {
- pto = dma_alloc_page_table();
+ pto = dma_alloc_page_table(gfp);
if (!pto)
return NULL;
set_st_pto(&ste, virt_to_phys(pto));
@@ -116,18 +116,19 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step)
return pto;
}
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
+ gfp_t gfp)
{
unsigned long *sto, *pto;
unsigned int rtx, sx, px;
rtx = calc_rtx(dma_addr);
- sto = dma_get_seg_table_origin(&rto[rtx]);
+ sto = dma_get_seg_table_origin(&rto[rtx], gfp);
if (!sto)
return NULL;
sx = calc_sx(dma_addr);
- pto = dma_get_page_table_origin(&sto[sx]);
+ pto = dma_get_page_table_origin(&sto[sx], gfp);
if (!pto)
return NULL;
@@ -170,7 +171,8 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
return -EINVAL;
for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
+ GFP_ATOMIC);
if (!entry) {
rc = -ENOMEM;
goto undo_cpu_trans;
@@ -186,7 +188,8 @@ undo_cpu_trans:
while (i-- > 0) {
page_addr -= PAGE_SIZE;
dma_addr -= PAGE_SIZE;
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
+ GFP_ATOMIC);
if (!entry)
break;
dma_update_cpu_trans(entry, page_addr, flags);
@@ -576,7 +579,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
spin_lock_init(&zdev->iommu_bitmap_lock);
- zdev->dma_table = dma_alloc_cpu_table();
+ zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
if (!zdev->dma_table) {
rc = -ENOMEM;
goto out;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index 648ecf5a8fbc..a4ac94a2ab57 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -475,7 +475,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
u32 offset = (r->offset + i) << imem->iommu_pgshift;
ret = iommu_map(imem->domain, offset, node->dma_addrs[i],
- PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
+ PAGE_SIZE, IOMMU_READ | IOMMU_WRITE,
+ GFP_KERNEL);
if (ret < 0) {
nvkm_error(subdev, "IOMMU mapping failure: %d\n", ret);
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 7bd2e65c2a16..6ca9f396e55b 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1057,7 +1057,7 @@ void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma)
*dma = iova_dma_addr(&tegra->carveout.domain, alloc);
err = iommu_map(tegra->domain, *dma, virt_to_phys(virt),
- size, IOMMU_READ | IOMMU_WRITE);
+ size, IOMMU_READ | IOMMU_WRITE, GFP_KERNEL);
if (err < 0)
goto free_iova;
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 103fda055394..4ddfcd2138c9 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -105,7 +105,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
pb->dma = iova_dma_addr(&host1x->iova, alloc);
err = iommu_map(host1x->domain, pb->dma, pb->phys, size,
- IOMMU_READ);
+ IOMMU_READ, GFP_KERNEL);
if (err)
goto iommu_free_iova;
} else {
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index c301b3be9f30..aeeaca65ace9 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -277,7 +277,7 @@ iter_chunk:
usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
va_start, &pa_start, size, flags);
err = iommu_map(pd->domain, va_start, pa_start,
- size, flags);
+ size, flags, GFP_KERNEL);
if (err) {
usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
va_start, &pa_start, size, err);
@@ -294,7 +294,7 @@ iter_chunk:
usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
va_start, &pa_start, size, flags);
err = iommu_map(pd->domain, va_start, pa_start,
- size, flags);
+ size, flags, GFP_KERNEL);
if (err) {
usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
va_start, &pa_start, size, err);
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 8e023f032b76..99b2646cb5c7 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -716,7 +716,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
if (!iova)
return DMA_MAPPING_ERROR;
- if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
+ if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) {
iommu_dma_free_iova(cookie, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
@@ -825,7 +825,14 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
if (!iova)
goto out_free_pages;
- if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
+ /*
+ * Remove the zone/policy flags from the GFP - these are applied to the
+ * __iommu_dma_alloc_pages() but are not used for the supporting
+ * internal allocations that follow.
+ */
+ gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_COMP);
+
+ if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, gfp))
goto out_free_iova;
if (!(ioprot & IOMMU_CACHE)) {
@@ -836,7 +843,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
arch_dma_prep_coherent(sg_page(sg), sg->length);
}
- ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot);
+ ret = iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, ioprot,
+ gfp);
if (ret < 0 || ret < size)
goto out_free_sg;
@@ -1284,7 +1292,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
* We'll leave any physical concatenation to the IOMMU driver's
* implementation - it knows better than we do.
*/
- ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
+ ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
if (ret < 0 || ret < iova_len)
goto out_free_iova;
@@ -1618,7 +1626,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
if (!iova)
goto out_free_page;
- if (iommu_map(domain, iova, msi_addr, size, prot))
+ if (iommu_map(domain, iova, msi_addr, size, prot, GFP_KERNEL))
goto out_free_iova;
INIT_LIST_HEAD(&msi_page->list);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 59df7e42fd53..a1a66798e1f0 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -362,12 +362,12 @@ static int __init intel_iommu_setup(char *str)
}
__setup("intel_iommu=", intel_iommu_setup);
-void *alloc_pgtable_page(int node)
+void *alloc_pgtable_page(int node, gfp_t gfp)
{
struct page *page;
void *vaddr = NULL;
- page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
+ page = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
if (page)
vaddr = page_address(page);
return vaddr;
@@ -612,7 +612,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
if (!alloc)
return NULL;
- context = alloc_pgtable_page(iommu->node);
+ context = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
if (!context)
return NULL;
@@ -908,7 +908,8 @@ pgtable_walk:
#endif
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
- unsigned long pfn, int *target_level)
+ unsigned long pfn, int *target_level,
+ gfp_t gfp)
{
struct dma_pte *parent, *pte;
int level = agaw_to_level(domain->agaw);
@@ -935,7 +936,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
if (!dma_pte_present(pte)) {
uint64_t pteval;
- tmp_page = alloc_pgtable_page(domain->nid);
+ tmp_page = alloc_pgtable_page(domain->nid, gfp);
if (!tmp_page)
return NULL;
@@ -1186,7 +1187,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
struct root_entry *root;
- root = (struct root_entry *)alloc_pgtable_page(iommu->node);
+ root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC);
if (!root) {
pr_err("Allocating root entry for %s failed\n",
iommu->name);
@@ -2150,7 +2151,8 @@ static void switch_to_super_page(struct dmar_domain *domain,
while (start_pfn <= end_pfn) {
if (!pte)
- pte = pfn_to_dma_pte(domain, start_pfn, &level);
+ pte = pfn_to_dma_pte(domain, start_pfn, &level,
+ GFP_ATOMIC);
if (dma_pte_present(pte)) {
dma_pte_free_pagetable(domain, start_pfn,
@@ -2172,7 +2174,8 @@ static void switch_to_super_page(struct dmar_domain *domain,
static int
__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
- unsigned long phys_pfn, unsigned long nr_pages, int prot)
+ unsigned long phys_pfn, unsigned long nr_pages, int prot,
+ gfp_t gfp)
{
struct dma_pte *first_pte = NULL, *pte = NULL;
unsigned int largepage_lvl = 0;
@@ -2202,7 +2205,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
phys_pfn, nr_pages);
- pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
+ pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl,
+ gfp);
if (!pte)
return -ENOMEM;
first_pte = pte;
@@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
return __domain_mapping(domain, first_vpfn,
first_vpfn, last_vpfn - first_vpfn + 1,
- DMA_PTE_READ|DMA_PTE_WRITE);
+ DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL);
}
static int md_domain_init(struct dmar_domain *domain, int guest_width);
@@ -2676,7 +2680,7 @@ static int copy_context_table(struct intel_iommu *iommu,
if (!old_ce)
goto out;
- new_ce = alloc_pgtable_page(iommu->node);
+ new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL);
if (!new_ce)
goto out_unmap;
@@ -4136,7 +4140,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
domain->max_addr = 0;
/* always allocate the top pgd */
- domain->pgd = alloc_pgtable_page(domain->nid);
+ domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
@@ -4298,7 +4302,7 @@ static int intel_iommu_map(struct iommu_domain *domain,
the low bits of hpa would take us onto the next page */
size = aligned_nrpages(hpa, size);
return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
- hpa >> VTD_PAGE_SHIFT, size, prot);
+ hpa >> VTD_PAGE_SHIFT, size, prot, gfp);
}
static int intel_iommu_map_pages(struct iommu_domain *domain,
@@ -4333,7 +4337,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
/* Cope with horrid API which requires us to unmap more than the
size argument if it happens to be a large-page mapping. */
- BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
+ BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
+ GFP_ATOMIC));
if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
size = VTD_PAGE_SIZE << level_to_offset_bits(level);
@@ -4392,7 +4397,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
int level = 0;
u64 phys = 0;
- pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
+ pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
+ GFP_ATOMIC);
if (pte && dma_pte_present(pte))
phys = dma_pte_addr(pte) +
(iova & (BIT_MASK(level_to_offset_bits(level) +
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 06e61e474856..ca9a035e0110 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -737,7 +737,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
extern int dmar_ir_support(void);
-void *alloc_pgtable_page(int node);
+void *alloc_pgtable_page(int node, gfp_t gfp);
void free_pgtable_page(void *vaddr);
void iommu_flush_write_buffer(struct intel_iommu *iommu);
struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index fb3c7020028d..c5bf74e9372d 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -200,7 +200,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
retry:
entries = get_pasid_table_from_pde(&dir[dir_index]);
if (!entries) {
- entries = alloc_pgtable_page(info->iommu->node);
+ entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC);
if (!entries)
return NULL;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9135540d7d59..42591766266d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -954,7 +954,7 @@ map_end:
if (map_size) {
ret = iommu_map(domain, addr - map_size,
addr - map_size, map_size,
- entry->prot);
+ entry->prot, GFP_KERNEL);
if (ret)
goto out;
map_size = 0;
@@ -2354,34 +2354,27 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
-static int _iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+int iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
const struct iommu_domain_ops *ops = domain->ops;
int ret;
+ might_sleep_if(gfpflags_allow_blocking(gfp));
+
+ /* Discourage passing strange GFP flags */
+ if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
+ __GFP_HIGHMEM)))
+ return -EINVAL;
+
ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
if (ret == 0 && ops->iotlb_sync_map)
ops->iotlb_sync_map(domain, iova, size);
return ret;
}
-
-int iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
-{
- might_sleep();
- return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL);
-}
EXPORT_SYMBOL_GPL(iommu_map);
-int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
-{
- return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC);
-}
-EXPORT_SYMBOL_GPL(iommu_map_atomic);
-
static size_t __iommu_unmap_pages(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -2471,9 +2464,9 @@ size_t iommu_unmap_fast(struct iommu_domain *domain,
}
EXPORT_SYMBOL_GPL(iommu_unmap_fast);
-static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot,
- gfp_t gfp)
+ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp)
{
const struct iommu_domain_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
@@ -2481,6 +2474,13 @@ static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
unsigned int i = 0;
int ret;
+ might_sleep_if(gfpflags_allow_blocking(gfp));
+
+ /* Discourage passing strange GFP flags */
+ if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
+ __GFP_HIGHMEM)))
+ return -EINVAL;
+
while (i <= nents) {
phys_addr_t s_phys = sg_phys(sg);
@@ -2520,21 +2520,8 @@ out_err:
return ret;
}
-
-ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot)
-{
- might_sleep();
- return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL);
-}
EXPORT_SYMBOL_GPL(iommu_map_sg);
-ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot)
-{
- return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
-}
-
/**
* report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
* @domain: the iommu domain where the fault has happened
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 1e1d3509efae..f8d92c9bb65b 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -456,7 +456,8 @@ static int batch_iommu_map_small(struct iommu_domain *domain,
size % PAGE_SIZE);
while (size) {
- rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot);
+ rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot,
+ GFP_KERNEL_ACCOUNT);
if (rc)
goto err_unmap;
iova += PAGE_SIZE;
@@ -500,7 +501,8 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain,
else
rc = iommu_map(domain, iova,
PFN_PHYS(batch->pfns[cur]) + page_offset,
- next_iova - iova, area->iommu_prot);
+ next_iova - iova, area->iommu_prot,
+ GFP_KERNEL_ACCOUNT);
if (rc)
goto err_unmap;
iova = next_iova;
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 5591dab99446..6849644e2892 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -52,7 +52,7 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
if (!s390_domain)
return NULL;
- s390_domain->dma_table = dma_alloc_cpu_table();
+ s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
if (!s390_domain->dma_table) {
kfree(s390_domain);
return NULL;
@@ -257,7 +257,8 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
phys_addr_t pa, dma_addr_t dma_addr,
- unsigned long nr_pages, int flags)
+ unsigned long nr_pages, int flags,
+ gfp_t gfp)
{
phys_addr_t page_addr = pa & PAGE_MASK;
unsigned long *entry;
@@ -265,7 +266,8 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
int rc;
for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+ entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
+ gfp);
if (unlikely(!entry)) {
rc = -ENOMEM;
goto undo_cpu_trans;
@@ -281,7 +283,7 @@ undo_cpu_trans:
while (i-- > 0) {
dma_addr -= PAGE_SIZE;
entry = dma_walk_cpu_trans(s390_domain->dma_table,
- dma_addr);
+ dma_addr, gfp);
if (!entry)
break;
dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
@@ -298,7 +300,8 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
int rc = 0;
for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+ entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
+ GFP_ATOMIC);
if (unlikely(!entry)) {
rc = -EINVAL;
break;
@@ -336,7 +339,7 @@ static int s390_iommu_map_pages(struct iommu_domain *domain,
flags |= ZPCI_TABLE_PROTECTED;
rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
- pgcount, flags);
+ pgcount, flags, gfp);
if (!rc)
*mapped = size;
diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c
index 142d4c74017c..07d4dceb5e72 100644
--- a/drivers/media/platform/qcom/venus/firmware.c
+++ b/drivers/media/platform/qcom/venus/firmware.c
@@ -158,7 +158,7 @@ static int venus_boot_no_tz(struct venus_core *core, phys_addr_t mem_phys,
core->fw.mapped_mem_size = mem_size;
ret = iommu_map(iommu, VENUS_FW_START_ADDR, mem_phys, mem_size,
- IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV);
+ IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV, GFP_KERNEL);
if (ret) {
dev_err(dev, "could not map video firmware region\n");
return ret;
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 9ec5af323f73..991a7d39f066 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -466,7 +466,8 @@ static int ipa_imem_init(struct ipa *ipa, unsigned long addr, size_t size)
size = PAGE_ALIGN(size + addr - phys);
iova = phys; /* We just want a direct mapping */
- ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE);
+ ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE,
+ GFP_KERNEL);
if (ret)
return ret;
@@ -574,7 +575,8 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size)
size = PAGE_ALIGN(size + addr - phys);
iova = phys; /* We just want a direct mapping */
- ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE);
+ ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE,
+ GFP_KERNEL);
if (ret)
return ret;
diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c
index cfcb759a87de..9a82f0336d95 100644
--- a/drivers/net/wireless/ath/ath10k/snoc.c
+++ b/drivers/net/wireless/ath/ath10k/snoc.c
@@ -1639,7 +1639,7 @@ static int ath10k_fw_init(struct ath10k *ar)
ret = iommu_map(iommu_dom, ar_snoc->fw.fw_start_addr,
ar->msa.paddr, ar->msa.mem_size,
- IOMMU_READ | IOMMU_WRITE);
+ IOMMU_READ | IOMMU_WRITE, GFP_KERNEL);
if (ret) {
ath10k_err(ar, "failed to map firmware region: %d\n", ret);
goto err_iommu_detach;
diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c
index d34a4d6325b2..df8fdc7067f9 100644
--- a/drivers/net/wireless/ath/ath11k/ahb.c
+++ b/drivers/net/wireless/ath/ath11k/ahb.c
@@ -1021,7 +1021,7 @@ static int ath11k_ahb_fw_resources_init(struct ath11k_base *ab)
ret = iommu_map(iommu_dom, ab_ahb->fw.msa_paddr,
ab_ahb->fw.msa_paddr, ab_ahb->fw.msa_size,
- IOMMU_READ | IOMMU_WRITE);
+ IOMMU_READ | IOMMU_WRITE, GFP_KERNEL);
if (ret) {
ath11k_err(ab, "failed to map firmware region: %d\n", ret);
goto err_iommu_detach;
@@ -1029,7 +1029,7 @@ static int ath11k_ahb_fw_resources_init(struct ath11k_base *ab)
ret = iommu_map(iommu_dom, ab_ahb->fw.ce_paddr,
ab_ahb->fw.ce_paddr, ab_ahb->fw.ce_size,
- IOMMU_READ | IOMMU_WRITE);
+ IOMMU_READ | IOMMU_WRITE, GFP_KERNEL);
if (ret) {
ath11k_err(ab, "failed to map firmware CE region: %d\n", ret);
goto err_iommu_unmap;
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 1cd4815a6dd1..80072b6b6283 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -643,7 +643,8 @@ static int rproc_handle_devmem(struct rproc *rproc, void *ptr,
if (!mapping)
return -ENOMEM;
- ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags);
+ ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags,
+ GFP_KERNEL);
if (ret) {
dev_err(dev, "failed to map devmem: %d\n", ret);
goto out;
@@ -737,7 +738,7 @@ static int rproc_alloc_carveout(struct rproc *rproc,
}
ret = iommu_map(rproc->domain, mem->da, dma, mem->len,
- mem->flags);
+ mem->flags, GFP_KERNEL);
if (ret) {
dev_err(dev, "iommu_map failed: %d\n", ret);
goto free_mapping;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 23c24fe98c00..e14f86a8ef52 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1480,7 +1480,8 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
list_for_each_entry(d, &iommu->domain_list, next) {
ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
- npage << PAGE_SHIFT, prot | IOMMU_CACHE);
+ npage << PAGE_SHIFT, prot | IOMMU_CACHE,
+ GFP_KERNEL);
if (ret)
goto unwind;
@@ -1777,8 +1778,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
size = npage << PAGE_SHIFT;
}
- ret = iommu_map(domain->domain, iova, phys,
- size, dma->prot | IOMMU_CACHE);
+ ret = iommu_map(domain->domain, iova, phys, size,
+ dma->prot | IOMMU_CACHE, GFP_KERNEL);
if (ret) {
if (!dma->iommu_mapped) {
vfio_unpin_pages_remote(dma, iova,
@@ -1866,7 +1867,7 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain)
return;
ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
- IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE);
+ IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE, GFP_KERNEL);
if (!ret) {
size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index ec32f785dfde..fd1536de5b1d 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -792,7 +792,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
r = ops->set_map(vdpa, asid, iotlb);
} else {
r = iommu_map(v->domain, iova, pa, size,
- perm_to_iommu_flags(perm));
+ perm_to_iommu_flags(perm), GFP_KERNEL);
}
if (r) {
vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a8063f26ff69..3589d1b8f922 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -471,19 +471,15 @@ extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain,
extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot);
-extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot);
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size);
extern size_t iommu_unmap_fast(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather);
extern ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot);
-extern ssize_t iommu_map_sg_atomic(struct iommu_domain *domain,
- unsigned long iova, struct scatterlist *sg,
- unsigned int nents, int prot);
+ struct scatterlist *sg, unsigned int nents,
+ int prot, gfp_t gfp);
extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
extern void iommu_set_fault_handler(struct iommu_domain *domain,
iommu_fault_handler_t handler, void *token);
@@ -777,14 +773,7 @@ static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
}
static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
-{
- return -ENODEV;
-}
-
-static inline int iommu_map_atomic(struct iommu_domain *domain,
- unsigned long iova, phys_addr_t paddr,
- size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
return -ENODEV;
}
@@ -804,14 +793,7 @@ static inline size_t iommu_unmap_fast(struct iommu_domain *domain,
static inline ssize_t iommu_map_sg(struct iommu_domain *domain,
unsigned long iova, struct scatterlist *sg,
- unsigned int nents, int prot)
-{
- return -ENODEV;
-}
-
-static inline ssize_t iommu_map_sg_atomic(struct iommu_domain *domain,
- unsigned long iova, struct scatterlist *sg,
- unsigned int nents, int prot)
+ unsigned int nents, int prot, gfp_t gfp)
{
return -ENODEV;
}
@@ -1122,7 +1104,8 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
static inline size_t iommu_map_sgtable(struct iommu_domain *domain,
unsigned long iova, struct sg_table *sgt, int prot)
{
- return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot);
+ return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot,
+ GFP_KERNEL);
}
#ifdef CONFIG_IOMMU_DEBUGFS