diff options
author | Vasant Hegde <vasant.hegde@amd.com> | 2022-08-25 06:39:36 +0000 |
---|---|---|
committer | Joerg Roedel <jroedel@suse.de> | 2022-09-07 16:12:35 +0200 |
commit | aaac38f614871df252aa7459647bf68d42f7c3e7 (patch) | |
tree | d3ffddd3ebef1fb2fa6fd4a0c1365f75eb58bb44 /drivers/iommu | |
parent | be1af02b277417be735d8513195e5ba1bc3c3a3d (diff) |
iommu/amd: Initial support for AMD IOMMU v2 page table
Introduce IO page table framework support for AMD IOMMU v2 page table.
This patch implements 4 level page table within iommu amd driver and
supports 4K/2M/1G page sizes.
Signed-off-by: Vasant Hegde <vasant.hegde@amd.com>
Link: https://lore.kernel.org/r/20220825063939.8360-7-vasant.hegde@amd.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/amd/Makefile | 2 | ||||
-rw-r--r-- | drivers/iommu/amd/amd_iommu_types.h | 5 | ||||
-rw-r--r-- | drivers/iommu/amd/io_pgtable_v2.c | 415 | ||||
-rw-r--r-- | drivers/iommu/io-pgtable.c | 1 |
4 files changed, 421 insertions, 2 deletions
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index a935f8f4b974..773d8aa00283 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 5b1019dab328..660c1f044912 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -276,6 +276,8 @@ * 512GB Pages are not supported due to a hardware bug */ #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) +/* 4K, 2MB, 1G page sizes are supported */ +#define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30)) /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) @@ -526,7 +528,8 @@ struct amd_io_pgtable { struct io_pgtable iop; int mode; u64 *root; - atomic64_t pt_root; /* pgtable root and pgtable mode */ + atomic64_t pt_root; /* pgtable root and pgtable mode */ + u64 *pgd; /* v2 pgtable pgd pointer */ }; /* diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c new file mode 100644 index 000000000000..8638ddf6fb3b --- /dev/null +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table v2 allocator. + * + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> + * Author: Vasant Hegde <vasant.hegde@amd.com> + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include <linux/bitops.h> +#include <linux/io-pgtable.h> +#include <linux/kernel.h> + +#include <asm/barrier.h> + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +#define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */ +#define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */ +#define IOMMU_PAGE_USER BIT_ULL(2) /* Userspace addressable */ +#define IOMMU_PAGE_PWT BIT_ULL(3) /* Page write through */ +#define IOMMU_PAGE_PCD BIT_ULL(4) /* Page cache disabled */ +#define IOMMU_PAGE_ACCESS BIT_ULL(5) /* Was accessed (updated by IOMMU) */ +#define IOMMU_PAGE_DIRTY BIT_ULL(6) /* Was written to (updated by IOMMU) */ +#define IOMMU_PAGE_PSE BIT_ULL(7) /* Page Size Extensions */ +#define IOMMU_PAGE_NX BIT_ULL(63) /* No execute */ + +#define MAX_PTRS_PER_PAGE 512 + +#define IOMMU_PAGE_SIZE_2M BIT_ULL(21) +#define IOMMU_PAGE_SIZE_1G BIT_ULL(30) + + +static inline int get_pgtable_level(void) +{ + /* 5 level page table is not supported */ + return PAGE_MODE_4_LEVEL; +} + +static inline bool is_large_pte(u64 pte) +{ + return (pte & IOMMU_PAGE_PSE); +} + +static inline void *alloc_pgtable_page(void) +{ + return (void *)get_zeroed_page(GFP_KERNEL); +} + +static inline u64 set_pgtable_attr(u64 *page) +{ + u64 prot; + + prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER; + prot |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; + + return (iommu_virt_to_phys(page) | prot); +} + +static inline void *get_pgtable_pte(u64 pte) +{ + return iommu_phys_to_virt(pte & PM_ADDR_MASK); +} + +static u64 set_pte_attr(u64 paddr, u64 pg_size, int prot) +{ + u64 pte; + + pte = __sme_set(paddr & PM_ADDR_MASK); + pte |= IOMMU_PAGE_PRESENT | IOMMU_PAGE_USER; + pte |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; + + if (prot & IOMMU_PROT_IW) + pte |= IOMMU_PAGE_RW; + + /* Large page */ + if (pg_size == IOMMU_PAGE_SIZE_1G || pg_size == IOMMU_PAGE_SIZE_2M) + pte |= IOMMU_PAGE_PSE; + + return pte; +} + +static inline u64 get_alloc_page_size(u64 size) +{ + if (size >= IOMMU_PAGE_SIZE_1G) + return IOMMU_PAGE_SIZE_1G; + + if (size >= IOMMU_PAGE_SIZE_2M) + return IOMMU_PAGE_SIZE_2M; + + return PAGE_SIZE; +} + +static inline int page_size_to_level(u64 pg_size) +{ + if (pg_size == IOMMU_PAGE_SIZE_1G) + return PAGE_MODE_3_LEVEL; + if (pg_size == IOMMU_PAGE_SIZE_2M) + return PAGE_MODE_2_LEVEL; + + return PAGE_MODE_1_LEVEL; +} + +static inline void free_pgtable_page(u64 *pt) +{ + free_page((unsigned long)pt); +} + +static void free_pgtable(u64 *pt, int level) +{ + u64 *p; + int i; + + for (i = 0; i < MAX_PTRS_PER_PAGE; i++) { + /* PTE present? */ + if (!IOMMU_PTE_PRESENT(pt[i])) + continue; + + if (is_large_pte(pt[i])) + continue; + + /* + * Free the next level. No need to look at l1 tables here since + * they can only contain leaf PTEs; just free them directly. + */ + p = get_pgtable_pte(pt[i]); + if (level > 2) + free_pgtable(p, level - 1); + else + free_pgtable_page(p); + } + + free_pgtable_page(pt); +} + +/* Allocate page table */ +static u64 *v2_alloc_pte(u64 *pgd, unsigned long iova, + unsigned long pg_size, bool *updated) +{ + u64 *pte, *page; + int level, end_level; + + level = get_pgtable_level() - 1; + end_level = page_size_to_level(pg_size); + pte = &pgd[PM_LEVEL_INDEX(level, iova)]; + iova = PAGE_SIZE_ALIGN(iova, PAGE_SIZE); + + while (level >= end_level) { + u64 __pte, __npte; + + __pte = *pte; + + if (IOMMU_PTE_PRESENT(__pte) && is_large_pte(__pte)) { + /* Unmap large pte */ + cmpxchg64(pte, *pte, 0ULL); + *updated = true; + continue; + } + + if (!IOMMU_PTE_PRESENT(__pte)) { + page = alloc_pgtable_page(); + if (!page) + return NULL; + + __npte = set_pgtable_attr(page); + /* pte could have been changed somewhere. */ + if (cmpxchg64(pte, __pte, __npte) != __pte) + free_pgtable_page(page); + else if (IOMMU_PTE_PRESENT(__pte)) + *updated = true; + + continue; + } + + level -= 1; + pte = get_pgtable_pte(__pte); + pte = &pte[PM_LEVEL_INDEX(level, iova)]; + } + + /* Tear down existing pte entries */ + if (IOMMU_PTE_PRESENT(*pte)) { + u64 *__pte; + + *updated = true; + __pte = get_pgtable_pte(*pte); + cmpxchg64(pte, *pte, 0ULL); + if (pg_size == IOMMU_PAGE_SIZE_1G) + free_pgtable(__pte, end_level - 1); + else if (pg_size == IOMMU_PAGE_SIZE_2M) + free_pgtable_page(__pte); + } + + return pte; +} + +/* + * This function checks if there is a PTE for a given dma address. + * If there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long iova, unsigned long *page_size) +{ + u64 *pte; + int level; + + level = get_pgtable_level() - 1; + pte = &pgtable->pgd[PM_LEVEL_INDEX(level, iova)]; + /* Default page size is 4K */ + *page_size = PAGE_SIZE; + + while (level) { + /* Not present */ + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + /* Walk to the next level */ + pte = get_pgtable_pte(*pte); + pte = &pte[PM_LEVEL_INDEX(level - 1, iova)]; + + /* Large page */ + if (is_large_pte(*pte)) { + if (level == PAGE_MODE_3_LEVEL) + *page_size = IOMMU_PAGE_SIZE_1G; + else if (level == PAGE_MODE_2_LEVEL) + *page_size = IOMMU_PAGE_SIZE_2M; + else + return NULL; /* Wrongly set PSE bit in PTE */ + + break; + } + + level -= 1; + } + + return pte; +} + +static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) +{ + struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); + struct io_pgtable_cfg *cfg = &pdom->iop.iop.cfg; + u64 *pte; + unsigned long map_size; + unsigned long mapped_size = 0; + unsigned long o_iova = iova; + size_t size = pgcount << __ffs(pgsize); + int count = 0; + int ret = 0; + bool updated = false; + + if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize) || !pgcount) + return -EINVAL; + + if (!(prot & IOMMU_PROT_MASK)) + return -EINVAL; + + while (mapped_size < size) { + map_size = get_alloc_page_size(pgsize); + pte = v2_alloc_pte(pdom->iop.pgd, iova, map_size, &updated); + if (!pte) { + ret = -EINVAL; + goto out; + } + + *pte = set_pte_attr(paddr, map_size, prot); + + count++; + iova += map_size; + paddr += map_size; + mapped_size += map_size; + } + +out: + if (updated) { + if (count > 1) + amd_iommu_flush_tlb(&pdom->domain, 0); + else + amd_iommu_flush_page(&pdom->domain, 0, o_iova); + } + + if (mapped) + *mapped += mapped_size; + + return ret; +} + +static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops, + unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &pgtable->iop.cfg; + unsigned long unmap_size; + unsigned long unmapped = 0; + size_t size = pgcount << __ffs(pgsize); + u64 *pte; + + if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount)) + return 0; + + while (unmapped < size) { + pte = fetch_pte(pgtable, iova, &unmap_size); + if (!pte) + return unmapped; + + *pte = 0ULL; + + iova = (iova & ~(unmap_size - 1)) + unmap_size; + unmapped += unmap_size; + } + + return unmapped; +} + +static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + pte = fetch_pte(pgtable, iova, &pte_pgsize); + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = __sme_clr(*pte & PM_ADDR_MASK); + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + +/* + * ---------------------------------------------------- + */ +static void v2_tlb_flush_all(void *cookie) +{ +} + +static void v2_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v2_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) +{ +} + +static const struct iommu_flush_ops v2_flush_ops = { + .tlb_flush_all = v2_tlb_flush_all, + .tlb_flush_walk = v2_tlb_flush_walk, + .tlb_add_page = v2_tlb_add_page, +}; + +static void v2_free_pgtable(struct io_pgtable *iop) +{ + struct protection_domain *pdom; + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); + + pdom = container_of(pgtable, struct protection_domain, iop); + if (!(pdom->flags & PD_IOMMUV2_MASK)) + return; + + /* + * Make changes visible to IOMMUs. No need to clear gcr3 entry + * as gcr3 table is already freed. + */ + amd_iommu_domain_update(pdom); + + /* Free page table */ + free_pgtable(pgtable->pgd, get_pgtable_level()); +} + +static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); + struct protection_domain *pdom = (struct protection_domain *)cookie; + int ret; + + pgtable->pgd = alloc_pgtable_page(); + if (!pgtable->pgd) + return NULL; + + ret = amd_iommu_domain_set_gcr3(&pdom->domain, 0, iommu_virt_to_phys(pgtable->pgd)); + if (ret) + goto err_free_pgd; + + pgtable->iop.ops.map_pages = iommu_v2_map_pages; + pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages; + pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys; + + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2, + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, + cfg->tlb = &v2_flush_ops; + + return &pgtable->iop; + +err_free_pgd: + free_pgtable_page(pgtable->pgd); + + return NULL; +} + +struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = { + .alloc = v2_alloc_pgtable, + .free = v2_free_pgtable, +}; diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index f4bfcef98297..ac02a45ed798 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -27,6 +27,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { #endif #ifdef CONFIG_AMD_IOMMU [AMD_IOMMU_V1] = &io_pgtable_amd_iommu_v1_init_fns, + [AMD_IOMMU_V2] = &io_pgtable_amd_iommu_v2_init_fns, #endif }; |