diff options
author | Benjamin Tissoires <benjamin.tissoires@redhat.com> | 2022-10-05 10:21:55 +0100 |
---|---|---|
committer | Benjamin Tissoires <benjamin.tissoires@redhat.com> | 2022-10-05 10:21:55 +0100 |
commit | edd1533d3ccd82dd5d600986d27d524e6be4c5fd (patch) | |
tree | 1ac5ae82ea63114d5c13212e2819531e4507f800 /drivers/iommu/amd | |
parent | 7d8fe4cfc54b5fb2093e12cffa8ca74d3c88e0fa (diff) | |
parent | 98d67f250472cdd0f8d083830be3ec9dbb0c65a8 (diff) |
Merge branch 'for-6.1/logitech' into for-linus
- Add hanlding of all Bluetooth HID++ devices and fixes in hid++
(Bastien Nocera)
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r-- | drivers/iommu/amd/amd_iommu.h | 18 | ||||
-rw-r--r-- | drivers/iommu/amd/amd_iommu_types.h | 186 | ||||
-rw-r--r-- | drivers/iommu/amd/init.c | 942 | ||||
-rw-r--r-- | drivers/iommu/amd/io_pgtable.c | 6 | ||||
-rw-r--r-- | drivers/iommu/amd/iommu.c | 585 | ||||
-rw-r--r-- | drivers/iommu/amd/iommu_v2.c | 67 | ||||
-rw-r--r-- | drivers/iommu/amd/quirks.c | 4 |
7 files changed, 1133 insertions, 675 deletions
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 1ab31074f5b3..84e5bb1bf01b 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -13,12 +13,13 @@ extern irqreturn_t amd_iommu_int_thread(int irq, void *data); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); -extern void amd_iommu_apply_erratum_63(u16 devid); +extern void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu); extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); extern int amd_iommu_init_api(void); +extern void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid); #ifdef CONFIG_AMD_IOMMU_DEBUGFS void amd_iommu_debugfs_setup(struct amd_iommu *iommu); @@ -114,10 +115,17 @@ void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) amd_iommu_domain_set_pt_root(domain, 0); } +static inline int get_pci_sbdf_id(struct pci_dev *pdev) +{ + int seg = pci_domain_nr(pdev->bus); + u16 devid = pci_dev_id(pdev); + + return PCI_SEG_DEVID_TO_SBDF(seg, devid); +} extern bool translation_pre_enabled(struct amd_iommu *iommu); extern bool amd_iommu_is_attach_deferred(struct device *dev); -extern int __init add_special_device(u8 type, u8 id, u16 *devid, +extern int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line); #ifdef CONFIG_DMI @@ -128,4 +136,10 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { } extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); +extern struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); + +extern u64 amd_iommu_efr; +extern u64 amd_iommu_efr2; + +extern bool amd_iommu_snp_en; #endif diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 72d0f5e2f651..5b1019dab328 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -67,6 +67,7 @@ #define MMIO_INTCAPXT_EVT_OFFSET 0x0170 #define MMIO_INTCAPXT_PPR_OFFSET 0x0178 #define MMIO_INTCAPXT_GALOG_OFFSET 0x0180 +#define MMIO_EXT_FEATURES2 0x01A0 #define MMIO_CMD_HEAD_OFFSET 0x2000 #define MMIO_CMD_TAIL_OFFSET 0x2008 #define MMIO_EVT_HEAD_OFFSET 0x2010 @@ -102,6 +103,12 @@ #define FEATURE_GLXVAL_SHIFT 14 #define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT) +/* Extended Feature 2 Bits */ +#define FEATURE_SNPAVICSUP_SHIFT 5 +#define FEATURE_SNPAVICSUP_MASK (0x07ULL << FEATURE_SNPAVICSUP_SHIFT) +#define FEATURE_SNPAVICSUP_GAM(x) \ + ((x & FEATURE_SNPAVICSUP_MASK) >> FEATURE_SNPAVICSUP_SHIFT == 0x1) + /* Note: * The current driver only support 16-bit PASID. * Currently, hardware only implement upto 16-bit PASID @@ -143,27 +150,28 @@ #define EVENT_FLAG_I 0x008 /* feature control bits */ -#define CONTROL_IOMMU_EN 0x00ULL -#define CONTROL_HT_TUN_EN 0x01ULL -#define CONTROL_EVT_LOG_EN 0x02ULL -#define CONTROL_EVT_INT_EN 0x03ULL -#define CONTROL_COMWAIT_EN 0x04ULL -#define CONTROL_INV_TIMEOUT 0x05ULL -#define CONTROL_PASSPW_EN 0x08ULL -#define CONTROL_RESPASSPW_EN 0x09ULL -#define CONTROL_COHERENT_EN 0x0aULL -#define CONTROL_ISOC_EN 0x0bULL -#define CONTROL_CMDBUF_EN 0x0cULL -#define CONTROL_PPRLOG_EN 0x0dULL -#define CONTROL_PPRINT_EN 0x0eULL -#define CONTROL_PPR_EN 0x0fULL -#define CONTROL_GT_EN 0x10ULL -#define CONTROL_GA_EN 0x11ULL -#define CONTROL_GAM_EN 0x19ULL -#define CONTROL_GALOG_EN 0x1CULL -#define CONTROL_GAINT_EN 0x1DULL -#define CONTROL_XT_EN 0x32ULL -#define CONTROL_INTCAPXT_EN 0x33ULL +#define CONTROL_IOMMU_EN 0 +#define CONTROL_HT_TUN_EN 1 +#define CONTROL_EVT_LOG_EN 2 +#define CONTROL_EVT_INT_EN 3 +#define CONTROL_COMWAIT_EN 4 +#define CONTROL_INV_TIMEOUT 5 +#define CONTROL_PASSPW_EN 8 +#define CONTROL_RESPASSPW_EN 9 +#define CONTROL_COHERENT_EN 10 +#define CONTROL_ISOC_EN 11 +#define CONTROL_CMDBUF_EN 12 +#define CONTROL_PPRLOG_EN 13 +#define CONTROL_PPRINT_EN 14 +#define CONTROL_PPR_EN 15 +#define CONTROL_GT_EN 16 +#define CONTROL_GA_EN 17 +#define CONTROL_GAM_EN 25 +#define CONTROL_GALOG_EN 28 +#define CONTROL_GAINT_EN 29 +#define CONTROL_XT_EN 50 +#define CONTROL_INTCAPXT_EN 51 +#define CONTROL_SNPAVIC_EN 61 #define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) #define CTRL_INV_TO_NONE 0 @@ -445,8 +453,6 @@ struct irq_remap_table { u32 *table; }; -extern struct irq_remap_table **irq_lookup_table; - /* Interrupt remapping feature used? */ extern bool amd_iommu_irq_remap; @@ -456,6 +462,16 @@ extern bool amdr_ivrs_remap_support; /* kmem_cache to get tables with 128 byte alignement */ extern struct kmem_cache *amd_iommu_irq_cache; +#define PCI_SBDF_TO_SEGID(sbdf) (((sbdf) >> 16) & 0xffff) +#define PCI_SBDF_TO_DEVID(sbdf) ((sbdf) & 0xffff) +#define PCI_SEG_DEVID_TO_SBDF(seg, devid) ((((u32)(seg) & 0xffff) << 16) | \ + ((devid) & 0xffff)) + +/* Make iterating over all pci segment easier */ +#define for_each_pci_segment(pci_seg) \ + list_for_each_entry((pci_seg), &amd_iommu_pci_seg_list, list) +#define for_each_pci_segment_safe(pci_seg, next) \ + list_for_each_entry_safe((pci_seg), (next), &amd_iommu_pci_seg_list, list) /* * Make iterating over all IOMMUs easier */ @@ -478,13 +494,14 @@ extern struct kmem_cache *amd_iommu_irq_cache; struct amd_iommu_fault { u64 address; /* IO virtual address of the fault*/ u32 pasid; /* Address space identifier */ - u16 device_id; /* Originating PCI device id */ + u32 sbdf; /* Originating PCI device id */ u16 tag; /* PPR tag */ u16 flags; /* Fault flags */ }; +struct amd_iommu; struct iommu_domain; struct irq_domain; struct amd_irte_ops; @@ -531,6 +548,75 @@ struct protection_domain { }; /* + * This structure contains information about one PCI segment in the system. + */ +struct amd_iommu_pci_seg { + /* List with all PCI segments in the system */ + struct list_head list; + + /* List of all available dev_data structures */ + struct llist_head dev_data_list; + + /* PCI segment number */ + u16 id; + + /* Largest PCI device id we expect translation requests for */ + u16 last_bdf; + + /* Size of the device table */ + u32 dev_table_size; + + /* Size of the alias table */ + u32 alias_table_size; + + /* Size of the rlookup table */ + u32 rlookup_table_size; + + /* + * device table virtual address + * + * Pointer to the per PCI segment device table. + * It is indexed by the PCI device id or the HT unit id and contains + * information about the domain the device belongs to as well as the + * page table root pointer. + */ + struct dev_table_entry *dev_table; + + /* + * The rlookup iommu table is used to find the IOMMU which is + * responsible for a specific device. It is indexed by the PCI + * device id. + */ + struct amd_iommu **rlookup_table; + + /* + * This table is used to find the irq remapping table for a given + * device id quickly. + */ + struct irq_remap_table **irq_lookup_table; + + /* + * Pointer to a device table which the content of old device table + * will be copied to. It's only be used in kdump kernel. + */ + struct dev_table_entry *old_dev_tbl_cpy; + + /* + * The alias table is a driver specific data structure which contains the + * mappings of the PCI device ids to the actual requestor ids on the IOMMU. + * More than one device can share the same requestor id. + */ + u16 *alias_table; + + /* + * A list of required unity mappings we find in ACPI. It is not locked + * because as runtime it is only read. It is created at ACPI table + * parsing time. + */ + struct list_head unity_map; +}; + +/* * Structure where we save information about one hardware AMD IOMMU in the * system. */ @@ -567,6 +653,9 @@ struct amd_iommu { /* Extended features */ u64 features; + /* Extended features 2 */ + u64 features2; + /* IOMMUv2 */ bool is_iommu_v2; @@ -581,7 +670,7 @@ struct amd_iommu { u16 cap_ptr; /* pci domain of this IOMMU */ - u16 pci_seg; + struct amd_iommu_pci_seg *pci_seg; /* start of exclusion range of that IOMMU */ u64 exclusion_start; @@ -666,8 +755,8 @@ struct acpihid_map_entry { struct list_head list; u8 uid[ACPIHID_UID_LEN]; u8 hid[ACPIHID_HID_LEN]; - u16 devid; - u16 root_devid; + u32 devid; + u32 root_devid; bool cmd_line; struct iommu_group *group; }; @@ -675,7 +764,7 @@ struct acpihid_map_entry { struct devid_map { struct list_head list; u8 id; - u16 devid; + u32 devid; bool cmd_line; }; @@ -689,7 +778,7 @@ struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ struct llist_node dev_data_list; /* For global dev_data_list */ struct protection_domain *domain; /* Domain the device is bound to */ - struct pci_dev *pdev; + struct device *dev; u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ struct { @@ -710,6 +799,12 @@ extern struct list_head hpet_map; extern struct list_head acpihid_map; /* + * List with all PCI segments in the system. This list is not locked because + * it is only written at driver initialization time + */ +extern struct list_head amd_iommu_pci_seg_list; + +/* * List with all IOMMUs in the system. This list is not locked because it is * only written and read at driver initialization or suspend time */ @@ -749,38 +844,12 @@ struct unity_map_entry { }; /* - * List of all unity mappings. It is not locked because as runtime it is only - * read. It is created at ACPI table parsing time. - */ -extern struct list_head amd_iommu_unity_map; - -/* * Data structures for device handling */ -/* - * Device table used by hardware. Read and write accesses by software are - * locked with the amd_iommu_pd_table lock. - */ -extern struct dev_table_entry *amd_iommu_dev_table; - -/* - * Alias table to find requestor ids to device ids. Not locked because only - * read on runtime. - */ -extern u16 *amd_iommu_alias_table; - -/* - * Reverse lookup table to find the IOMMU which translates a specific device. - */ -extern struct amd_iommu **amd_iommu_rlookup_table; - /* size of the dma_ops aperture as power of 2 */ extern unsigned amd_iommu_aperture_order; -/* largest PCI device id we expect translation requests for */ -extern u16 amd_iommu_last_bdf; - /* allocation bitmap for domain ids */ extern unsigned long *amd_iommu_pd_alloc_bitmap; @@ -913,6 +982,7 @@ struct irq_2_irte { struct amd_ir_data { u32 cached_ga_tag; + struct amd_iommu *iommu; struct irq_2_irte irq_2_irte; struct msi_msg msi_entry; void *entry; /* Pointer to union irte or struct irte_ga */ @@ -930,9 +1000,9 @@ struct amd_ir_data { struct amd_irte_ops { void (*prepare)(void *, u32, bool, u8, u32, int); - void (*activate)(void *, u16, u16); - void (*deactivate)(void *, u16, u16); - void (*set_affinity)(void *, u16, u16, u8, u32); + void (*activate)(struct amd_iommu *iommu, void *, u16, u16); + void (*deactivate)(struct amd_iommu *iommu, void *, u16, u16); + void (*set_affinity)(struct amd_iommu *iommu, void *, u16, u16, u8, u32); void *(*get)(struct irq_remap_table *, int); void (*set_allocated)(struct irq_remap_table *, int); bool (*is_allocated)(struct irq_remap_table *, int); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1d08f87e734b..fdc642362c14 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -84,6 +84,10 @@ #define ACPI_DEVFLAG_ATSDIS 0x10000000 #define LOOP_TIMEOUT 2000000 + +#define IVRS_GET_SBDF_ID(seg, bus, dev, fd) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ + | ((dev & 0x1f) << 3) | (fn & 0x7)) + /* * ACPI table definitions * @@ -110,7 +114,7 @@ struct ivhd_header { /* Following only valid on IVHD type 11h and 40h */ u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */ - u64 res; + u64 efr_reg2; } __attribute__((packed)); /* @@ -141,7 +145,8 @@ struct ivmd_header { u16 length; u16 devid; u16 aux; - u64 resv; + u16 pci_seg; + u8 resv[6]; u64 range_start; u64 range_length; } __attribute__((packed)); @@ -159,11 +164,15 @@ static bool amd_iommu_disabled __initdata; static bool amd_iommu_force_enable __initdata; static int amd_iommu_target_ivhd_type; -u16 amd_iommu_last_bdf; /* largest PCI device id we have - to handle */ -LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings - we find in ACPI */ +/* Global EFR and EFR2 registers */ +u64 amd_iommu_efr; +u64 amd_iommu_efr2; +/* SNP is enabled on the system? */ +bool amd_iommu_snp_en; +EXPORT_SYMBOL(amd_iommu_snp_en); + +LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ @@ -186,47 +195,11 @@ bool amdr_ivrs_remap_support __read_mostly; bool amd_iommu_force_isolation __read_mostly; /* - * Pointer to the device table which is shared by all AMD IOMMUs - * it is indexed by the PCI device id or the HT unit id and contains - * information about the domain the device belongs to as well as the - * page table root pointer. - */ -struct dev_table_entry *amd_iommu_dev_table; -/* - * Pointer to a device table which the content of old device table - * will be copied to. It's only be used in kdump kernel. - */ -static struct dev_table_entry *old_dev_tbl_cpy; - -/* - * The alias table is a driver specific data structure which contains the - * mappings of the PCI device ids to the actual requestor ids on the IOMMU. - * More than one device can share the same requestor id. - */ -u16 *amd_iommu_alias_table; - -/* - * The rlookup table is used to find the IOMMU which is responsible - * for a specific device. It is also indexed by the PCI device id. - */ -struct amd_iommu **amd_iommu_rlookup_table; - -/* - * This table is used to find the irq remapping table for a given device id - * quickly. - */ -struct irq_remap_table **irq_lookup_table; - -/* * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap * to know which ones are already in use. */ unsigned long *amd_iommu_pd_alloc_bitmap; -static u32 dev_table_size; /* size of the device table */ -static u32 alias_table_size; /* size of the alias table */ -static u32 rlookup_table_size; /* size if the rlookup table */ - enum iommu_init_state { IOMMU_START_STATE, IOMMU_IVRS_DETECTED, @@ -256,7 +229,7 @@ static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); -static void init_device_table_dma(void); +static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); static bool amd_iommu_pre_enabled = true; @@ -281,16 +254,10 @@ static void init_translation_status(struct amd_iommu *iommu) iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; } -static inline void update_last_devid(u16 devid) -{ - if (devid > amd_iommu_last_bdf) - amd_iommu_last_bdf = devid; -} - -static inline unsigned long tbl_size(int entry_size) +static inline unsigned long tbl_size(int entry_size, int last_bdf) { unsigned shift = PAGE_SHIFT + - get_order(((int)amd_iommu_last_bdf + 1) * entry_size); + get_order((last_bdf + 1) * entry_size); return 1UL << shift; } @@ -300,21 +267,46 @@ int amd_iommu_get_num_iommus(void) return amd_iommus_present; } -#ifdef CONFIG_IRQ_REMAP -static bool check_feature_on_all_iommus(u64 mask) +/* + * Iterate through all the IOMMUs to get common EFR + * masks among all IOMMUs and warn if found inconsistency. + */ +static void get_global_efr(void) { - bool ret = false; struct amd_iommu *iommu; for_each_iommu(iommu) { - ret = iommu_feature(iommu, mask); - if (!ret) - return false; + u64 tmp = iommu->features; + u64 tmp2 = iommu->features2; + + if (list_is_first(&iommu->list, &amd_iommu_list)) { + amd_iommu_efr = tmp; + amd_iommu_efr2 = tmp2; + continue; + } + + if (amd_iommu_efr == tmp && + amd_iommu_efr2 == tmp2) + continue; + + pr_err(FW_BUG + "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n", + tmp, tmp2, amd_iommu_efr, amd_iommu_efr2, + iommu->index, iommu->pci_seg->id, + PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid), + PCI_FUNC(iommu->devid)); + + amd_iommu_efr &= tmp; + amd_iommu_efr2 &= tmp2; } - return true; + pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2); +} + +static bool check_feature_on_all_iommus(u64 mask) +{ + return !!(amd_iommu_efr & mask); } -#endif /* * For IVHD type 0x11/0x40, EFR is also available via IVHD. @@ -324,8 +316,10 @@ static bool check_feature_on_all_iommus(u64 mask) static void __init early_iommu_features_init(struct amd_iommu *iommu, struct ivhd_header *h) { - if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) + if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) { iommu->features = h->efr_reg; + iommu->features2 = h->efr_reg2; + } if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP) amdr_ivrs_remap_support = true; } @@ -399,7 +393,7 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu) u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); u64 entry = start & PM_ADDR_MASK; - if (!iommu_feature(iommu, FEATURE_SNP)) + if (!check_feature_on_all_iommus(FEATURE_SNP)) return; /* Note: @@ -421,10 +415,12 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu) static void iommu_set_device_table(struct amd_iommu *iommu) { u64 entry; + u32 dev_table_size = iommu->pci_seg->dev_table_size; + void *dev_table = (void *)get_dev_table(iommu); BUG_ON(iommu->mmio_base == NULL); - entry = iommu_virt_to_phys(amd_iommu_dev_table); + entry = iommu_virt_to_phys(dev_table); entry |= (dev_table_size >> 12) - 1; memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, &entry, sizeof(entry)); @@ -557,6 +553,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) { u8 *p = (void *)h, *end = (void *)h; struct ivhd_entry *dev; + int last_devid = -EINVAL; u32 ivhd_size = get_ivhd_header_size(h); @@ -573,14 +570,14 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) switch (dev->type) { case IVHD_DEV_ALL: /* Use maximum BDF value for DEV_ALL */ - update_last_devid(0xffff); - break; + return 0xffff; case IVHD_DEV_SELECT: case IVHD_DEV_RANGE_END: case IVHD_DEV_ALIAS: case IVHD_DEV_EXT_SELECT: /* all the above subfield types refer to device ids */ - update_last_devid(dev->devid); + if (dev->devid > last_devid) + last_devid = dev->devid; break; default: break; @@ -590,7 +587,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) WARN_ON(p != end); - return 0; + return last_devid; } static int __init check_ivrs_checksum(struct acpi_table_header *table) @@ -614,38 +611,125 @@ static int __init check_ivrs_checksum(struct acpi_table_header *table) * id which we need to handle. This is the first of three functions which parse * the ACPI table. So we check the checksum here. */ -static int __init find_last_devid_acpi(struct acpi_table_header *table) +static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg) { u8 *p = (u8 *)table, *end = (u8 *)table; struct ivhd_header *h; + int last_devid, last_bdf = 0; p += IVRS_HEADER_LENGTH; end += table->length; while (p < end) { h = (struct ivhd_header *)p; - if (h->type == amd_iommu_target_ivhd_type) { - int ret = find_last_devid_from_ivhd(h); - - if (ret) - return ret; + if (h->pci_seg == pci_seg && + h->type == amd_iommu_target_ivhd_type) { + last_devid = find_last_devid_from_ivhd(h); + + if (last_devid < 0) + return -EINVAL; + if (last_devid > last_bdf) + last_bdf = last_devid; } p += h->length; } WARN_ON(p != end); - return 0; + return last_bdf; } /**************************************************************************** * * The following functions belong to the code path which parses the ACPI table * the second time. In this ACPI parsing iteration we allocate IOMMU specific - * data structures, initialize the device/alias/rlookup table and also - * basically initialize the hardware. + * data structures, initialize the per PCI segment device/alias/rlookup table + * and also basically initialize the hardware. * ****************************************************************************/ +/* Allocate per PCI segment device table */ +static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) +{ + pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32, + get_order(pci_seg->dev_table_size)); + if (!pci_seg->dev_table) + return -ENOMEM; + + return 0; +} + +static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) +{ + free_pages((unsigned long)pci_seg->dev_table, + get_order(pci_seg->dev_table_size)); + pci_seg->dev_table = NULL; +} + +/* Allocate per PCI segment IOMMU rlookup table. */ +static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) +{ + pci_seg->rlookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(pci_seg->rlookup_table_size)); + if (pci_seg->rlookup_table == NULL) + return -ENOMEM; + + return 0; +} + +static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) +{ + free_pages((unsigned long)pci_seg->rlookup_table, + get_order(pci_seg->rlookup_table_size)); + pci_seg->rlookup_table = NULL; +} + +static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) +{ + pci_seg->irq_lookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(pci_seg->rlookup_table_size)); + kmemleak_alloc(pci_seg->irq_lookup_table, + pci_seg->rlookup_table_size, 1, GFP_KERNEL); + if (pci_seg->irq_lookup_table == NULL) + return -ENOMEM; + + return 0; +} + +static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) +{ + kmemleak_free(pci_seg->irq_lookup_table); + free_pages((unsigned long)pci_seg->irq_lookup_table, + get_order(pci_seg->rlookup_table_size)); + pci_seg->irq_lookup_table = NULL; +} + +static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) +{ + int i; + + pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(pci_seg->alias_table_size)); + if (!pci_seg->alias_table) + return -ENOMEM; + + /* + * let all alias entries point to itself + */ + for (i = 0; i <= pci_seg->last_bdf; ++i) + pci_seg->alias_table[i] = i; + + return 0; +} + +static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) +{ + free_pages((unsigned long)pci_seg->alias_table, + get_order(pci_seg->alias_table_size)); + pci_seg->alias_table = NULL; +} + /* * Allocates the command buffer. This buffer is per AMD IOMMU. We can * write commands to that buffer later and the IOMMU will execute them @@ -724,7 +808,7 @@ static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, void *buf = (void *)__get_free_pages(gfp, order); if (buf && - iommu_feature(iommu, FEATURE_SNP) && + check_feature_on_all_iommus(FEATURE_SNP) && set_memory_4k((unsigned long)buf, (1 << order))) { free_pages((unsigned long)buf, order); buf = NULL; @@ -815,20 +899,15 @@ static void free_ga_log(struct amd_iommu *iommu) #endif } +#ifdef CONFIG_IRQ_REMAP static int iommu_ga_log_enable(struct amd_iommu *iommu) { -#ifdef CONFIG_IRQ_REMAP u32 status, i; u64 entry; if (!iommu->ga_log) return -EINVAL; - /* Check if already running */ - status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - if (WARN_ON(status & (MMIO_STATUS_GALOG_RUN_MASK))) - return 0; - entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, &entry, sizeof(entry)); @@ -852,13 +931,12 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) if (WARN_ON(i >= LOOP_TIMEOUT)) return -EINVAL; -#endif /* CONFIG_IRQ_REMAP */ + return 0; } static int iommu_init_ga_log(struct amd_iommu *iommu) { -#ifdef CONFIG_IRQ_REMAP if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) return 0; @@ -876,10 +954,8 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) err_out: free_ga_log(iommu); return -EINVAL; -#else - return 0; -#endif /* CONFIG_IRQ_REMAP */ } +#endif /* CONFIG_IRQ_REMAP */ static int __init alloc_cwwb_sem(struct amd_iommu *iommu) { @@ -916,56 +992,59 @@ static void iommu_enable_gt(struct amd_iommu *iommu) } /* sets a specific bit in the device table entry. */ -static void set_dev_entry_bit(u16 devid, u8 bit) +static void __set_dev_entry_bit(struct dev_table_entry *dev_table, + u16 devid, u8 bit) { int i = (bit >> 6) & 0x03; int _bit = bit & 0x3f; - amd_iommu_dev_table[devid].data[i] |= (1UL << _bit); + dev_table[devid].data[i] |= (1UL << _bit); +} + +static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) +{ + struct dev_table_entry *dev_table = get_dev_table(iommu); + + return __set_dev_entry_bit(dev_table, devid, bit); } -static int get_dev_entry_bit(u16 devid, u8 bit) +static int __get_dev_entry_bit(struct dev_table_entry *dev_table, + u16 devid, u8 bit) { int i = (bit >> 6) & 0x03; int _bit = bit & 0x3f; - return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit; + return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit; } +static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) +{ + struct dev_table_entry *dev_table = get_dev_table(iommu); -static bool copy_device_table(void) + return __get_dev_entry_bit(dev_table, devid, bit); +} + +static bool __copy_device_table(struct amd_iommu *iommu) { - u64 int_ctl, int_tab_len, entry = 0, last_entry = 0; + u64 int_ctl, int_tab_len, entry = 0; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; struct dev_table_entry *old_devtb = NULL; u32 lo, hi, devid, old_devtb_size; phys_addr_t old_devtb_phys; - struct amd_iommu *iommu; u16 dom_id, dte_v, irq_v; gfp_t gfp_flag; u64 tmp; - if (!amd_iommu_pre_enabled) - return false; - - pr_warn("Translation is already enabled - trying to copy translation structures\n"); - for_each_iommu(iommu) { - /* All IOMMUs should use the same device table with the same size */ - lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); - hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); - entry = (((u64) hi) << 32) + lo; - if (last_entry && last_entry != entry) { - pr_err("IOMMU:%d should use the same dev table as others!\n", - iommu->index); - return false; - } - last_entry = entry; + /* Each IOMMU use separate device table with the same size */ + lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); + hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); + entry = (((u64) hi) << 32) + lo; - old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; - if (old_devtb_size != dev_table_size) { - pr_err("The device table size of IOMMU:%d is not expected!\n", - iommu->index); - return false; - } + old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; + if (old_devtb_size != pci_seg->dev_table_size) { + pr_err("The device table size of IOMMU:%d is not expected!\n", + iommu->index); + return false; } /* @@ -981,38 +1060,38 @@ static bool copy_device_table(void) } old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel()) ? (__force void *)ioremap_encrypted(old_devtb_phys, - dev_table_size) - : memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB); + pci_seg->dev_table_size) + : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB); if (!old_devtb) return false; gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32; - old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, - get_order(dev_table_size)); - if (old_dev_tbl_cpy == NULL) { + pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, + get_order(pci_seg->dev_table_size)); + if (pci_seg->old_dev_tbl_cpy == NULL) { pr_err("Failed to allocate memory for copying old device table!\n"); memunmap(old_devtb); return false; } - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { - old_dev_tbl_cpy[devid] = old_devtb[devid]; + for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { + pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid]; dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; if (dte_v && dom_id) { - old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; - old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; + pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; + pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); /* If gcr3 table existed, mask it out */ if (old_devtb[devid].data[0] & DTE_FLAG_GV) { tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; - old_dev_tbl_cpy[devid].data[1] &= ~tmp; + pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp; tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; tmp |= DTE_FLAG_GV; - old_dev_tbl_cpy[devid].data[0] &= ~tmp; + pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp; } } @@ -1027,7 +1106,7 @@ static bool copy_device_table(void) return false; } - old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; + pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; } } memunmap(old_devtb); @@ -1035,21 +1114,42 @@ static bool copy_device_table(void) return true; } -void amd_iommu_apply_erratum_63(u16 devid) +static bool copy_device_table(void) { - int sysmgt; + struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; - sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | - (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); + if (!amd_iommu_pre_enabled) + return false; - if (sysmgt == 0x01) - set_dev_entry_bit(devid, DEV_ENTRY_IW); + pr_warn("Translation is already enabled - trying to copy translation structures\n"); + + /* + * All IOMMUs within PCI segment shares common device table. + * Hence copy device table only once per PCI segment. + */ + for_each_pci_segment(pci_seg) { + for_each_iommu(iommu) { + if (pci_seg->id != iommu->pci_seg->id) + continue; + if (!__copy_device_table(iommu)) + return false; + break; + } + } + + return true; } -/* Writes the specific IOMMU for a device into the rlookup table */ -static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) +void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) { - amd_iommu_rlookup_table[devid] = iommu; + int sysmgt; + + sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) | + (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1); + + if (sysmgt == 0x01) + set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW); } /* @@ -1060,26 +1160,26 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, u16 devid, u32 flags, u32 ext_flags) { if (flags & ACPI_DEVFLAG_INITPASS) - set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS); if (flags & ACPI_DEVFLAG_EXTINT) - set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS); if (flags & ACPI_DEVFLAG_NMI) - set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS); if (flags & ACPI_DEVFLAG_SYSMGT1) - set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1); if (flags & ACPI_DEVFLAG_SYSMGT2) - set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2); if (flags & ACPI_DEVFLAG_LINT0) - set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS); if (flags & ACPI_DEVFLAG_LINT1) - set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS); - amd_iommu_apply_erratum_63(devid); + amd_iommu_apply_erratum_63(iommu, devid); - set_iommu_for_device(iommu, devid); + amd_iommu_set_rlookup_table(iommu, devid); } -int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line) +int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) { struct devid_map *entry; struct list_head *list; @@ -1116,7 +1216,7 @@ int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line) return 0; } -static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid, +static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, bool cmd_line) { struct acpihid_map_entry *entry; @@ -1195,10 +1295,11 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, { u8 *p = (u8 *)h; u8 *end = p, flags = 0; - u16 devid = 0, devid_start = 0, devid_to = 0; + u16 devid = 0, devid_start = 0, devid_to = 0, seg_id; u32 dev_i, ext_flags = 0; bool alias = false; struct ivhd_entry *e; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; u32 ivhd_size; int ret; @@ -1230,19 +1331,21 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, while (p < end) { e = (struct ivhd_entry *)p; + seg_id = pci_seg->id; + switch (e->type) { case IVHD_DEV_ALL: DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); - for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i) + for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i) set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); break; case IVHD_DEV_SELECT: - DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x " + DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x " "flags: %02x\n", - PCI_BUS_NUM(e->devid), + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags); @@ -1253,8 +1356,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_SELECT_RANGE_START: DUMP_printk(" DEV_SELECT_RANGE_START\t " - "devid: %02x:%02x.%x flags: %02x\n", - PCI_BUS_NUM(e->devid), + "devid: %04x:%02x:%02x.%x flags: %02x\n", + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags); @@ -1266,9 +1369,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_ALIAS: - DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x " + DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x " "flags: %02x devid_to: %02x:%02x.%x\n", - PCI_BUS_NUM(e->devid), + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags, @@ -1280,18 +1383,18 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, devid_to = e->ext >> 8; set_dev_entry_from_acpi(iommu, devid , e->flags, 0); set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); - amd_iommu_alias_table[devid] = devid_to; + pci_seg->alias_table[devid] = devid_to; break; case IVHD_DEV_ALIAS_RANGE: DUMP_printk(" DEV_ALIAS_RANGE\t\t " - "devid: %02x:%02x.%x flags: %02x " - "devid_to: %02x:%02x.%x\n", - PCI_BUS_NUM(e->devid), + "devid: %04x:%02x:%02x.%x flags: %02x " + "devid_to: %04x:%02x:%02x.%x\n", + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags, - PCI_BUS_NUM(e->ext >> 8), + seg_id, PCI_BUS_NUM(e->ext >> 8), PCI_SLOT(e->ext >> 8), PCI_FUNC(e->ext >> 8)); @@ -1303,9 +1406,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_EXT_SELECT: - DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x " + DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x " "flags: %02x ext: %08x\n", - PCI_BUS_NUM(e->devid), + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags, e->ext); @@ -1317,8 +1420,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_EXT_SELECT_RANGE: DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " - "%02x:%02x.%x flags: %02x ext: %08x\n", - PCI_BUS_NUM(e->devid), + "%04x:%02x:%02x.%x flags: %02x ext: %08x\n", + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags, e->ext); @@ -1330,15 +1433,15 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_RANGE_END: - DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", - PCI_BUS_NUM(e->devid), + DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n", + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid)); devid = e->devid; for (dev_i = devid_start; dev_i <= devid; ++dev_i) { if (alias) { - amd_iommu_alias_table[dev_i] = devid_to; + pci_seg->alias_table[dev_i] = devid_to; set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags); } @@ -1349,11 +1452,11 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_SPECIAL: { u8 handle, type; const char *var; - u16 devid; + u32 devid; int ret; handle = e->ext & 0xff; - devid = (e->ext >> 8) & 0xffff; + devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8)); type = (e->ext >> 24) & 0xff; if (type == IVHD_SPECIAL_IOAPIC) @@ -1363,9 +1466,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, else var = "UNKNOWN"; - DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n", + DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n", var, (int)handle, - PCI_BUS_NUM(devid), + seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); @@ -1383,7 +1486,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; } case IVHD_DEV_ACPI_HID: { - u16 devid; + u32 devid; u8 hid[ACPIHID_HID_LEN]; u8 uid[ACPIHID_UID_LEN]; int ret; @@ -1426,9 +1529,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; } - devid = e->devid; - DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n", - hid, uid, + devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); + DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n", + hid, uid, seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); @@ -1458,6 +1561,74 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, return 0; } +/* Allocate PCI segment data structure */ +static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, + struct acpi_table_header *ivrs_base) +{ + struct amd_iommu_pci_seg *pci_seg; + int last_bdf; + + /* + * First parse ACPI tables to find the largest Bus/Dev/Func we need to + * handle in this PCI segment. Upon this information the shared data + * structures for the PCI segments in the system will be allocated. + */ + last_bdf = find_last_devid_acpi(ivrs_base, id); + if (last_bdf < 0) + return NULL; + + pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL); + if (pci_seg == NULL) + return NULL; + + pci_seg->last_bdf = last_bdf; + DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); + pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf); + pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf); + pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf); + + pci_seg->id = id; + init_llist_head(&pci_seg->dev_data_list); + INIT_LIST_HEAD(&pci_seg->unity_map); + list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); + + if (alloc_dev_table(pci_seg)) + return NULL; + if (alloc_alias_table(pci_seg)) + return NULL; + if (alloc_rlookup_table(pci_seg)) + return NULL; + + return pci_seg; +} + +static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id, + struct acpi_table_header *ivrs_base) +{ + struct amd_iommu_pci_seg *pci_seg; + + for_each_pci_segment(pci_seg) { + if (pci_seg->id == id) + return pci_seg; + } + + return alloc_pci_segment(id, ivrs_base); +} + +static void __init free_pci_segments(void) +{ + struct amd_iommu_pci_seg *pci_seg, *next; + + for_each_pci_segment_safe(pci_seg, next) { + list_del(&pci_seg->list); + free_irq_lookup_table(pci_seg); + free_rlookup_table(pci_seg); + free_alias_table(pci_seg); + free_dev_table(pci_seg); + kfree(pci_seg); + } +} + static void __init free_iommu_one(struct amd_iommu *iommu) { free_cwwb_sem(iommu); @@ -1542,9 +1713,15 @@ static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) * together and also allocates the command buffer and programs the * hardware. It does NOT enable the IOMMU. This is done afterwards. */ -static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) +static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, + struct acpi_table_header *ivrs_base) { - int ret; + struct amd_iommu_pci_seg *pci_seg; + + pci_seg = get_pci_segment(h->pci_seg, ivrs_base); + if (pci_seg == NULL) + return -ENOMEM; + iommu->pci_seg = pci_seg; raw_spin_lock_init(&iommu->lock); iommu->cmd_sem_val = 0; @@ -1566,7 +1743,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) */ iommu->devid = h->devid; iommu->cap_ptr = h->cap_ptr; - iommu->pci_seg = h->pci_seg; iommu->mmio_phys = h->mmio_phys; switch (h->type) { @@ -1621,6 +1797,13 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->mmio_base) return -ENOMEM; + return init_iommu_from_acpi(iommu, h); +} + +static int __init init_iommu_one_late(struct amd_iommu *iommu) +{ + int ret; + if (alloc_cwwb_sem(iommu)) return -ENOMEM; @@ -1642,10 +1825,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (amd_iommu_pre_enabled) amd_iommu_pre_enabled = translation_pre_enabled(iommu); - ret = init_iommu_from_acpi(iommu, h); - if (ret) - return ret; - if (amd_iommu_irq_remap) { ret = amd_iommu_create_irq_domain(iommu); if (ret) @@ -1656,7 +1835,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) * Make sure IOMMU is not considered to translate itself. The IVRS * table tells us so, but this is a lie! */ - amd_iommu_rlookup_table[iommu->devid] = NULL; + iommu->pci_seg->rlookup_table[iommu->devid] = NULL; return 0; } @@ -1701,15 +1880,16 @@ static int __init init_iommu_all(struct acpi_table_header *table) end += table->length; p += IVRS_HEADER_LENGTH; + /* Phase 1: Process all IVHD blocks */ while (p < end) { h = (struct ivhd_header *)p; if (*p == amd_iommu_target_ivhd_type) { - DUMP_printk("device: %02x:%02x.%01x cap: %04x " - "seg: %d flags: %01x info %04x\n", - PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid), - PCI_FUNC(h->devid), h->cap_ptr, - h->pci_seg, h->flags, h->info); + DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x " + "flags: %01x info %04x\n", + h->pci_seg, PCI_BUS_NUM(h->devid), + PCI_SLOT(h->devid), PCI_FUNC(h->devid), + h->cap_ptr, h->flags, h->info); DUMP_printk(" mmio-addr: %016llx\n", h->mmio_phys); @@ -1717,7 +1897,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) if (iommu == NULL) return -ENOMEM; - ret = init_iommu_one(iommu, h); + ret = init_iommu_one(iommu, h, table); if (ret) return ret; } @@ -1726,6 +1906,16 @@ static int __init init_iommu_all(struct acpi_table_header *table) } WARN_ON(p != end); + /* Phase 2 : Early feature support check */ + get_global_efr(); + + /* Phase 3 : Enabling IOMMU features */ + for_each_iommu(iommu) { + ret = init_iommu_one_late(iommu); + if (ret) + return ret; + } + return 0; } @@ -1762,7 +1952,7 @@ static ssize_t amd_iommu_show_features(struct device *dev, char *buf) { struct amd_iommu *iommu = dev_to_amd_iommu(dev); - return sprintf(buf, "%llx\n", iommu->features); + return sprintf(buf, "%llx:%llx\n", iommu->features2, iommu->features); } static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); @@ -1789,16 +1979,18 @@ static const struct attribute_group *amd_iommu_groups[] = { */ static void __init late_iommu_features_init(struct amd_iommu *iommu) { - u64 features; + u64 features, features2; if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) return; /* read extended feature bits */ features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); + features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2); if (!iommu->features) { iommu->features = features; + iommu->features2 = features2; return; } @@ -1806,9 +1998,13 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu) * Sanity check and warn if EFR values from * IVHD and MMIO conflict. */ - if (features != iommu->features) - pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx).\n", - features, iommu->features); + if (features != iommu->features || + features2 != iommu->features2) { + pr_warn(FW_WARN + "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n", + features, iommu->features, + features2, iommu->features2); + } } static int __init iommu_init_pci(struct amd_iommu *iommu) @@ -1816,7 +2012,8 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) int cap_ptr = iommu->cap_ptr; int ret; - iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid), + iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, + PCI_BUS_NUM(iommu->devid), iommu->devid & 0xff); if (!iommu->dev) return -ENODEV; @@ -1863,10 +2060,6 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) return -ENOMEM; - ret = iommu_init_ga_log(iommu); - if (ret) - return ret; - if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { pr_info("Using strict mode due to virtualization\n"); iommu_set_dma_strict(); @@ -1879,7 +2072,8 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) int i, j; iommu->root_pdev = - pci_get_domain_bus_and_slot(0, iommu->dev->bus->number, + pci_get_domain_bus_and_slot(iommu->pci_seg->id, + iommu->dev->bus->number, PCI_DEVFN(0, 0)); /* @@ -1906,8 +2100,11 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) amd_iommu_erratum_746_workaround(iommu); amd_iommu_ats_write_check_workaround(iommu); - iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev, + ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev, amd_iommu_groups, "ivhd%d", iommu->index); + if (ret) + return ret; + iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); return pci_enable_device(iommu->dev); @@ -1928,7 +2125,7 @@ static void print_iommu_info(void) pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pr_info("Extended features (%#llx):", iommu->features); + pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2); for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { if (iommu_feature(iommu, (1ULL << i))) @@ -1938,13 +2135,14 @@ static void print_iommu_info(void) if (iommu->features & FEATURE_GAM_VAPIC) pr_cont(" GA_vAPIC"); + if (iommu->features & FEATURE_SNP) + pr_cont(" SNP"); + pr_cont("\n"); } } if (irq_remapping_enabled) { pr_info("Interrupt remapping enabled\n"); - if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) - pr_info("Virtual APIC enabled\n"); if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) pr_info("X2APIC enabled\n"); } @@ -1953,6 +2151,7 @@ static void print_iommu_info(void) static int __init amd_iommu_init_pci(void) { struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; int ret; for_each_iommu(iommu) { @@ -1983,7 +2182,8 @@ static int __init amd_iommu_init_pci(void) goto out; } - init_device_table_dma(); + for_each_pci_segment(pci_seg) + init_device_table_dma(pci_seg); for_each_iommu(iommu) iommu_flush_all_caches(iommu); @@ -2232,9 +2432,6 @@ enable_faults: if (iommu->ppr_log != NULL) iommu_feature_enable(iommu, CONTROL_PPRINT_EN); - - iommu_ga_log_enable(iommu); - return 0; } @@ -2249,19 +2446,28 @@ enable_faults: static void __init free_unity_maps(void) { struct unity_map_entry *entry, *next; + struct amd_iommu_pci_seg *p, *pci_seg; - list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) { - list_del(&entry->list); - kfree(entry); + for_each_pci_segment_safe(pci_seg, p) { + list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) { + list_del(&entry->list); + kfree(entry); + } } } /* called for unity map ACPI definition */ -static int __init init_unity_map_range(struct ivmd_header *m) +static int __init init_unity_map_range(struct ivmd_header *m, + struct acpi_table_header *ivrs_base) { struct unity_map_entry *e = NULL; + struct amd_iommu_pci_seg *pci_seg; char *s; + pci_seg = get_pci_segment(m->pci_seg, ivrs_base); + if (pci_seg == NULL) + return -ENOMEM; + e = kzalloc(sizeof(*e), GFP_KERNEL); if (e == NULL) return -ENOMEM; @@ -2277,7 +2483,7 @@ static int __init init_unity_map_range(struct ivmd_header *m) case ACPI_IVMD_TYPE_ALL: s = "IVMD_TYPE_ALL\t\t"; e->devid_start = 0; - e->devid_end = amd_iommu_last_bdf; + e->devid_end = pci_seg->last_bdf; break; case ACPI_IVMD_TYPE_RANGE: s = "IVMD_TYPE_RANGE\t\t"; @@ -2299,14 +2505,16 @@ static int __init init_unity_map_range(struct ivmd_header *m) if (m->flags & IVMD_FLAG_EXCL_RANGE) e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1; - DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" - " range_start: %016llx range_end: %016llx flags: %x\n", s, + DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: " + "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx" + " flags: %x\n", s, m->pci_seg, PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start), - PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end), + PCI_FUNC(e->devid_start), m->pci_seg, + PCI_BUS_NUM(e->devid_end), PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), e->address_start, e->address_end, m->flags); - list_add_tail(&e->list, &amd_iommu_unity_map); + list_add_tail(&e->list, &pci_seg->unity_map); return 0; } @@ -2323,7 +2531,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) while (p < end) { m = (struct ivmd_header *)p; if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) - init_unity_map_range(m); + init_unity_map_range(m, table); p += m->length; } @@ -2334,35 +2542,48 @@ static int __init init_memory_definitions(struct acpi_table_header *table) /* * Init the device table to not allow DMA access for devices */ -static void init_device_table_dma(void) +static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) { u32 devid; + struct dev_table_entry *dev_table = pci_seg->dev_table; - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { - set_dev_entry_bit(devid, DEV_ENTRY_VALID); - set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); + if (dev_table == NULL) + return; + + for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { + __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID); + if (!amd_iommu_snp_en) + __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); } } -static void __init uninit_device_table_dma(void) +static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg) { u32 devid; + struct dev_table_entry *dev_table = pci_seg->dev_table; + + if (dev_table == NULL) + return; - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { - amd_iommu_dev_table[devid].data[0] = 0ULL; - amd_iommu_dev_table[devid].data[1] = 0ULL; + for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { + dev_table[devid].data[0] = 0ULL; + dev_table[devid].data[1] = 0ULL; } } static void init_device_table(void) { + struct amd_iommu_pci_seg *pci_seg; u32 devid; if (!amd_iommu_irq_remap) return; - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) - set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN); + for_each_pci_segment(pci_seg) { + for (devid = 0; devid <= pci_seg->last_bdf; ++devid) + __set_dev_entry_bit(pci_seg->dev_table, + devid, DEV_ENTRY_IRQ_TBL_EN); + } } static void iommu_init_flags(struct amd_iommu *iommu) @@ -2440,8 +2661,6 @@ static void iommu_enable_ga(struct amd_iommu *iommu) #ifdef CONFIG_IRQ_REMAP switch (amd_iommu_guest_ir) { case AMD_IOMMU_GUEST_IR_VAPIC: - iommu_feature_enable(iommu, CONTROL_GAM_EN); - fallthrough; case AMD_IOMMU_GUEST_IR_LEGACY_GA: iommu_feature_enable(iommu, CONTROL_GA_EN); iommu->irte_ops = &irte_128_ops; @@ -2478,7 +2697,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) static void early_enable_iommus(void) { struct amd_iommu *iommu; - + struct amd_iommu_pci_seg *pci_seg; if (!copy_device_table()) { /* @@ -2488,9 +2707,14 @@ static void early_enable_iommus(void) */ if (amd_iommu_pre_enabled) pr_err("Failed to copy DEV table from previous kernel.\n"); - if (old_dev_tbl_cpy != NULL) - free_pages((unsigned long)old_dev_tbl_cpy, - get_order(dev_table_size)); + + for_each_pci_segment(pci_seg) { + if (pci_seg->old_dev_tbl_cpy != NULL) { + free_pages((unsigned long)pci_seg->old_dev_tbl_cpy, + get_order(pci_seg->dev_table_size)); + pci_seg->old_dev_tbl_cpy = NULL; + } + } for_each_iommu(iommu) { clear_translation_pre_enabled(iommu); @@ -2498,9 +2722,13 @@ static void early_enable_iommus(void) } } else { pr_info("Copied DEV table from previous kernel.\n"); - free_pages((unsigned long)amd_iommu_dev_table, - get_order(dev_table_size)); - amd_iommu_dev_table = old_dev_tbl_cpy; + + for_each_pci_segment(pci_seg) { + free_pages((unsigned long)pci_seg->dev_table, + get_order(pci_seg->dev_table_size)); + pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; + } + for_each_iommu(iommu) { iommu_disable_command_buffer(iommu); iommu_disable_event_buffer(iommu); @@ -2512,19 +2740,6 @@ static void early_enable_iommus(void) iommu_flush_all_caches(iommu); } } - -#ifdef CONFIG_IRQ_REMAP - /* - * Note: We have already checked GASup from IVRS table. - * Now, we need to make sure that GAMSup is set. - */ - if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && - !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) - amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; - - if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) - amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); -#endif } static void enable_iommus_v2(void) @@ -2537,10 +2752,72 @@ static void enable_iommus_v2(void) } } +static void enable_iommus_vapic(void) +{ +#ifdef CONFIG_IRQ_REMAP + u32 status, i; + struct amd_iommu *iommu; + + for_each_iommu(iommu) { + /* + * Disable GALog if already running. It could have been enabled + * in the previous boot before kdump. + */ + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) + continue; + + iommu_feature_disable(iommu, CONTROL_GALOG_EN); + iommu_feature_disable(iommu, CONTROL_GAINT_EN); + + /* + * Need to set and poll check the GALOGRun bit to zero before + * we can set/ modify GA Log registers safely. + */ + for (i = 0; i < LOOP_TIMEOUT; ++i) { + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) + break; + udelay(10); + } + + if (WARN_ON(i >= LOOP_TIMEOUT)) + return; + } + + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && + !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) { + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; + return; + } + + if (amd_iommu_snp_en && + !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) { + pr_warn("Force to disable Virtual APIC due to SNP\n"); + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; + return; + } + + /* Enabling GAM and SNPAVIC support */ + for_each_iommu(iommu) { + if (iommu_init_ga_log(iommu) || + iommu_ga_log_enable(iommu)) + return; + + iommu_feature_enable(iommu, CONTROL_GAM_EN); + if (amd_iommu_snp_en) + iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN); + } + + amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); + pr_info("Virtual APIC enabled\n"); +#endif +} + static void enable_iommus(void) { early_enable_iommus(); - + enable_iommus_vapic(); enable_iommus_v2(); } @@ -2590,27 +2867,11 @@ static struct syscore_ops amd_iommu_syscore_ops = { static void __init free_iommu_resources(void) { - kmemleak_free(irq_lookup_table); - free_pages((unsigned long)irq_lookup_table, - get_order(rlookup_table_size)); - irq_lookup_table = NULL; - kmem_cache_destroy(amd_iommu_irq_cache); amd_iommu_irq_cache = NULL; - free_pages((unsigned long)amd_iommu_rlookup_table, - get_order(rlookup_table_size)); - amd_iommu_rlookup_table = NULL; - - free_pages((unsigned long)amd_iommu_alias_table, - get_order(alias_table_size)); - amd_iommu_alias_table = NULL; - - free_pages((unsigned long)amd_iommu_dev_table, - get_order(dev_table_size)); - amd_iommu_dev_table = NULL; - free_iommu_all(); + free_pci_segments(); } /* SB IOAPIC is always on this device in AMD systems */ @@ -2709,7 +2970,7 @@ static void __init ivinfo_init(void *ivrs) static int __init early_amd_iommu_init(void) { struct acpi_table_header *ivrs_base; - int i, remap_cache_sz, ret; + int remap_cache_sz, ret; acpi_status status; if (!amd_iommu_detected) @@ -2737,42 +2998,8 @@ static int __init early_amd_iommu_init(void) amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); - /* - * First parse ACPI tables to find the largest Bus/Dev/Func - * we need to handle. Upon this information the shared data - * structures for the IOMMUs in the system will be allocated - */ - ret = find_last_devid_acpi(ivrs_base); - if (ret) - goto out; - - dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); - alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); - rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); - /* Device table - directly used by all IOMMUs */ ret = -ENOMEM; - amd_iommu_dev_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO | GFP_DMA32, - get_order(dev_table_size)); - if (amd_iommu_dev_table == NULL) - goto out; - - /* - * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the - * IOMMU see for that device - */ - amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL, - get_order(alias_table_size)); - if (amd_iommu_alias_table == NULL) - goto out; - - /* IOMMU rlookup table - find the IOMMU for a specific device */ - amd_iommu_rlookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); - if (amd_iommu_rlookup_table == NULL) - goto out; amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, @@ -2781,12 +3008,6 @@ static int __init early_amd_iommu_init(void) goto out; /* - * let all alias entries point to itself - */ - for (i = 0; i <= amd_iommu_last_bdf; ++i) - amd_iommu_alias_table[i] = i; - - /* * never allocate domain 0 because its used as the non-allocated and * error value placeholder */ @@ -2808,6 +3029,7 @@ static int __init early_amd_iommu_init(void) amd_iommu_irq_remap = check_ioapic_information(); if (amd_iommu_irq_remap) { + struct amd_iommu_pci_seg *pci_seg; /* * Interrupt remapping enabled, create kmem_cache for the * remapping tables. @@ -2824,13 +3046,10 @@ static int __init early_amd_iommu_init(void) if (!amd_iommu_irq_cache) goto out; - irq_lookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); - kmemleak_alloc(irq_lookup_table, rlookup_table_size, - 1, GFP_KERNEL); - if (!irq_lookup_table) - goto out; + for_each_pci_segment(pci_seg) { + if (alloc_irq_lookup_table(pci_seg)) + goto out; + } } ret = init_memory_definitions(ivrs_base); @@ -2937,6 +3156,7 @@ static int __init state_next(void) register_syscore_ops(&amd_iommu_syscore_ops); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; + enable_iommus_vapic(); enable_iommus_v2(); break; case IOMMU_PCI_INIT: @@ -2967,8 +3187,11 @@ static int __init state_next(void) free_iommu_resources(); } else { struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; + + for_each_pci_segment(pci_seg) + uninit_device_table_dma(pci_seg); - uninit_device_table_dma(); for_each_iommu(iommu) iommu_flush_all_caches(iommu); } @@ -3161,15 +3384,17 @@ static int __init parse_amd_iommu_options(char *str) static int __init parse_ivrs_ioapic(char *str) { - unsigned int bus, dev, fn; + u32 seg = 0, bus, dev, fn; int ret, id, i; - u16 devid; + u32 devid; ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - pr_err("Invalid command line: ivrs_ioapic%s\n", str); - return 1; + ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); + if (ret != 5) { + pr_err("Invalid command line: ivrs_ioapic%s\n", str); + return 1; + } } if (early_ioapic_map_size == EARLY_MAP_SIZE) { @@ -3178,7 +3403,7 @@ static int __init parse_ivrs_ioapic(char *str) return 1; } - devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); cmdline_maps = true; i = early_ioapic_map_size++; @@ -3191,15 +3416,17 @@ static int __init parse_ivrs_ioapic(char *str) static int __init parse_ivrs_hpet(char *str) { - unsigned int bus, dev, fn; + u32 seg = 0, bus, dev, fn; int ret, id, i; - u16 devid; + u32 devid; ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - pr_err("Invalid command line: ivrs_hpet%s\n", str); - return 1; + ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); + if (ret != 5) { + pr_err("Invalid command line: ivrs_hpet%s\n", str); + return 1; + } } if (early_hpet_map_size == EARLY_MAP_SIZE) { @@ -3208,7 +3435,7 @@ static int __init parse_ivrs_hpet(char *str) return 1; } - devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); cmdline_maps = true; i = early_hpet_map_size++; @@ -3221,15 +3448,18 @@ static int __init parse_ivrs_hpet(char *str) static int __init parse_ivrs_acpihid(char *str) { - u32 bus, dev, fn; + u32 seg = 0, bus, dev, fn; char *hid, *uid, *p; char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; int ret, i; ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid); if (ret != 4) { - pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); - return 1; + ret = sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid); + if (ret != 5) { + pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); + return 1; + } } p = acpiid; @@ -3244,8 +3474,7 @@ static int __init parse_ivrs_acpihid(char *str) i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); - early_acpihid_map[i].devid = - ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); early_acpihid_map[i].cmd_line = true; return 1; @@ -3260,7 +3489,12 @@ __setup("ivrs_acpihid", parse_ivrs_acpihid); bool amd_iommu_v2_supported(void) { - return amd_iommu_v2_present; + /* + * Since DTE[Mode]=0 is prohibited on SNP-enabled system + * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without + * setting up IOMMUv1 page table. + */ + return amd_iommu_v2_present && !amd_iommu_snp_en; } EXPORT_SYMBOL(amd_iommu_v2_supported); @@ -3363,3 +3597,41 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); } + +#ifdef CONFIG_AMD_MEM_ENCRYPT +int amd_iommu_snp_enable(void) +{ + /* + * The SNP support requires that IOMMU must be enabled, and is + * not configured in the passthrough mode. + */ + if (no_iommu || iommu_default_passthrough()) { + pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported"); + return -EINVAL; + } + + /* + * Prevent enabling SNP after IOMMU_ENABLED state because this process + * affect how IOMMU driver sets up data structures and configures + * IOMMU hardware. + */ + if (init_state > IOMMU_ENABLED) { + pr_err("SNP: Too late to enable SNP for IOMMU.\n"); + return -EINVAL; + } + + amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP); + if (!amd_iommu_snp_en) + return -EINVAL; + + pr_info("SNP enabled\n"); + + /* Enforce IOMMU v1 pagetable when SNP is enabled. */ + if (amd_iommu_pgtable != AMD_IOMMU_V1) { + pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n"); + amd_iommu_pgtable = AMD_IOMMU_V1; + } + + return 0; +} +#endif diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 6608d1717574..7d4b61e5db47 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -258,7 +258,7 @@ static u64 *alloc_pte(struct protection_domain *domain, __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); /* pte could have been changed somewhere. */ - if (cmpxchg64(pte, __pte, __npte) != __pte) + if (!try_cmpxchg64(pte, &__pte, __npte)) free_page((unsigned long)page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; @@ -341,10 +341,8 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist) u64 *pt; int mode; - while (cmpxchg64(pte, pteval, 0) != pteval) { + while (!try_cmpxchg64(pte, &pteval, 0)) pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); - pteval = *pte; - } if (!IOMMU_PTE_PRESENT(pteval)) return; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 840831d5d2ad..65b8e4fd8217 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -62,9 +62,6 @@ static DEFINE_SPINLOCK(pd_bitmap_lock); -/* List of all available dev_data structures */ -static LLIST_HEAD(dev_data_list); - LIST_HEAD(ioapic_map); LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); @@ -95,13 +92,6 @@ static void detach_device(struct device *dev); * ****************************************************************************/ -static inline u16 get_pci_device_id(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - - return pci_dev_id(pdev); -} - static inline int get_acpihid_device_id(struct device *dev, struct acpihid_map_entry **entry) { @@ -122,16 +112,74 @@ static inline int get_acpihid_device_id(struct device *dev, return -EINVAL; } -static inline int get_device_id(struct device *dev) +static inline int get_device_sbdf_id(struct device *dev) { - int devid; + int sbdf; if (dev_is_pci(dev)) - devid = get_pci_device_id(dev); + sbdf = get_pci_sbdf_id(to_pci_dev(dev)); else - devid = get_acpihid_device_id(dev, NULL); + sbdf = get_acpihid_device_id(dev, NULL); + + return sbdf; +} + +struct dev_table_entry *get_dev_table(struct amd_iommu *iommu) +{ + struct dev_table_entry *dev_table; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; + + BUG_ON(pci_seg == NULL); + dev_table = pci_seg->dev_table; + BUG_ON(dev_table == NULL); + + return dev_table; +} + +static inline u16 get_device_segment(struct device *dev) +{ + u16 seg; + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + seg = pci_domain_nr(pdev->bus); + } else { + u32 devid = get_acpihid_device_id(dev, NULL); + + seg = PCI_SBDF_TO_SEGID(devid); + } + + return seg; +} + +/* Writes the specific IOMMU for a device into the PCI segment rlookup table */ +void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid) +{ + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; + + pci_seg->rlookup_table[devid] = iommu; +} + +static struct amd_iommu *__rlookup_amd_iommu(u16 seg, u16 devid) +{ + struct amd_iommu_pci_seg *pci_seg; + + for_each_pci_segment(pci_seg) { + if (pci_seg->id == seg) + return pci_seg->rlookup_table[devid]; + } + return NULL; +} - return devid; +static struct amd_iommu *rlookup_amd_iommu(struct device *dev) +{ + u16 seg = get_device_segment(dev); + int devid = get_device_sbdf_id(dev); + + if (devid < 0) + return NULL; + return __rlookup_amd_iommu(seg, PCI_SBDF_TO_DEVID(devid)); } static struct protection_domain *to_pdomain(struct iommu_domain *dom) @@ -139,9 +187,10 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } -static struct iommu_dev_data *alloc_dev_data(u16 devid) +static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); if (!dev_data) @@ -151,19 +200,20 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) dev_data->devid = devid; ratelimit_default_init(&dev_data->rs); - llist_add(&dev_data->dev_data_list, &dev_data_list); + llist_add(&dev_data->dev_data_list, &pci_seg->dev_data_list); return dev_data; } -static struct iommu_dev_data *search_dev_data(u16 devid) +static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; struct llist_node *node; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; - if (llist_empty(&dev_data_list)) + if (llist_empty(&pci_seg->dev_data_list)) return NULL; - node = dev_data_list.first; + node = pci_seg->dev_data_list.first; llist_for_each_entry(dev_data, node, dev_data_list) { if (dev_data->devid == devid) return dev_data; @@ -174,67 +224,74 @@ static struct iommu_dev_data *search_dev_data(u16 devid) static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) { + struct amd_iommu *iommu; + struct dev_table_entry *dev_table; u16 devid = pci_dev_id(pdev); if (devid == alias) return 0; - amd_iommu_rlookup_table[alias] = - amd_iommu_rlookup_table[devid]; - memcpy(amd_iommu_dev_table[alias].data, - amd_iommu_dev_table[devid].data, - sizeof(amd_iommu_dev_table[alias].data)); + iommu = rlookup_amd_iommu(&pdev->dev); + if (!iommu) + return 0; + + amd_iommu_set_rlookup_table(iommu, alias); + dev_table = get_dev_table(iommu); + memcpy(dev_table[alias].data, + dev_table[devid].data, + sizeof(dev_table[alias].data)); return 0; } -static void clone_aliases(struct pci_dev *pdev) +static void clone_aliases(struct amd_iommu *iommu, struct device *dev) { - if (!pdev) + struct pci_dev *pdev; + + if (!dev_is_pci(dev)) return; + pdev = to_pci_dev(dev); /* * The IVRS alias stored in the alias table may not be * part of the PCI DMA aliases if it's bus differs * from the original device. */ - clone_alias(pdev, amd_iommu_alias_table[pci_dev_id(pdev)], NULL); + clone_alias(pdev, iommu->pci_seg->alias_table[pci_dev_id(pdev)], NULL); pci_for_each_dma_alias(pdev, clone_alias, NULL); } -static struct pci_dev *setup_aliases(struct device *dev) +static void setup_aliases(struct amd_iommu *iommu, struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; u16 ivrs_alias; /* For ACPI HID devices, there are no aliases */ if (!dev_is_pci(dev)) - return NULL; + return; /* * Add the IVRS alias to the pci aliases if it is on the same * bus. The IVRS table may know about a quirk that we don't. */ - ivrs_alias = amd_iommu_alias_table[pci_dev_id(pdev)]; + ivrs_alias = pci_seg->alias_table[pci_dev_id(pdev)]; if (ivrs_alias != pci_dev_id(pdev) && PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) pci_add_dma_alias(pdev, ivrs_alias & 0xff, 1); - clone_aliases(pdev); - - return pdev; + clone_aliases(iommu, dev); } -static struct iommu_dev_data *find_dev_data(u16 devid) +static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - dev_data = search_dev_data(devid); + dev_data = search_dev_data(iommu, devid); if (dev_data == NULL) { - dev_data = alloc_dev_data(devid); + dev_data = alloc_dev_data(iommu, devid); if (!dev_data) return NULL; @@ -296,42 +353,49 @@ static bool pci_iommuv2_capable(struct pci_dev *pdev) */ static bool check_device(struct device *dev) { - int devid; + struct amd_iommu_pci_seg *pci_seg; + struct amd_iommu *iommu; + int devid, sbdf; if (!dev) return false; - devid = get_device_id(dev); - if (devid < 0) + sbdf = get_device_sbdf_id(dev); + if (sbdf < 0) return false; + devid = PCI_SBDF_TO_DEVID(sbdf); - /* Out of our scope? */ - if (devid > amd_iommu_last_bdf) + iommu = rlookup_amd_iommu(dev); + if (!iommu) return false; - if (amd_iommu_rlookup_table[devid] == NULL) + /* Out of our scope? */ + pci_seg = iommu->pci_seg; + if (devid > pci_seg->last_bdf) return false; return true; } -static int iommu_init_device(struct device *dev) +static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) { struct iommu_dev_data *dev_data; - int devid; + int devid, sbdf; if (dev_iommu_priv_get(dev)) return 0; - devid = get_device_id(dev); - if (devid < 0) - return devid; + sbdf = get_device_sbdf_id(dev); + if (sbdf < 0) + return sbdf; - dev_data = find_dev_data(devid); + devid = PCI_SBDF_TO_DEVID(sbdf); + dev_data = find_dev_data(iommu, devid); if (!dev_data) return -ENOMEM; - dev_data->pdev = setup_aliases(dev); + dev_data->dev = dev; + setup_aliases(iommu, dev); /* * By default we use passthrough mode for IOMMUv2 capable device. @@ -341,9 +405,6 @@ static int iommu_init_device(struct device *dev) */ if ((iommu_default_passthrough() || !amd_iommu_force_isolation) && dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) { - struct amd_iommu *iommu; - - iommu = amd_iommu_rlookup_table[dev_data->devid]; dev_data->iommu_v2 = iommu->is_iommu_v2; } @@ -352,18 +413,21 @@ static int iommu_init_device(struct device *dev) return 0; } -static void iommu_ignore_device(struct device *dev) +static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev) { - int devid; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; + struct dev_table_entry *dev_table = get_dev_table(iommu); + int devid, sbdf; - devid = get_device_id(dev); - if (devid < 0) + sbdf = get_device_sbdf_id(dev); + if (sbdf < 0) return; - amd_iommu_rlookup_table[devid] = NULL; - memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); + devid = PCI_SBDF_TO_DEVID(sbdf); + pci_seg->rlookup_table[devid] = NULL; + memset(&dev_table[devid], 0, sizeof(struct dev_table_entry)); - setup_aliases(dev); + setup_aliases(iommu, dev); } static void amd_iommu_uninit_device(struct device *dev) @@ -391,13 +455,13 @@ static void amd_iommu_uninit_device(struct device *dev) * ****************************************************************************/ -static void dump_dte_entry(u16 devid) +static void dump_dte_entry(struct amd_iommu *iommu, u16 devid) { int i; + struct dev_table_entry *dev_table = get_dev_table(iommu); for (i = 0; i < 4; ++i) - pr_err("DTE[%d]: %016llx\n", i, - amd_iommu_dev_table[devid].data[i]); + pr_err("DTE[%d]: %016llx\n", i, dev_table[devid].data[i]); } static void dump_command(unsigned long phys_addr) @@ -409,7 +473,7 @@ static void dump_command(unsigned long phys_addr) pr_err("CMD[%d]: %08x\n", i, cmd->data[i]); } -static void amd_iommu_report_rmp_hw_error(volatile u32 *event) +static void amd_iommu_report_rmp_hw_error(struct amd_iommu *iommu, volatile u32 *event) { struct iommu_dev_data *dev_data = NULL; int devid, vmg_tag, flags; @@ -421,7 +485,7 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event) flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; spa = ((u64)event[3] << 32) | (event[2] & 0xFFFFFFF8); - pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid), devid & 0xff); if (pdev) dev_data = dev_iommu_priv_get(&pdev->dev); @@ -432,8 +496,8 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event) vmg_tag, spa, flags); } } else { - pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%04x:%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), vmg_tag, spa, flags); } @@ -441,7 +505,7 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event) pci_dev_put(pdev); } -static void amd_iommu_report_rmp_fault(volatile u32 *event) +static void amd_iommu_report_rmp_fault(struct amd_iommu *iommu, volatile u32 *event) { struct iommu_dev_data *dev_data = NULL; int devid, flags_rmp, vmg_tag, flags; @@ -454,7 +518,7 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event) flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; gpa = ((u64)event[3] << 32) | event[2]; - pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid), devid & 0xff); if (pdev) dev_data = dev_iommu_priv_get(&pdev->dev); @@ -465,8 +529,8 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event) vmg_tag, gpa, flags_rmp, flags); } } else { - pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%04x:%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), vmg_tag, gpa, flags_rmp, flags); } @@ -480,13 +544,14 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event) #define IS_WRITE_REQUEST(flags) \ ((flags) & EVENT_FLAG_RW) -static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, +static void amd_iommu_report_page_fault(struct amd_iommu *iommu, + u16 devid, u16 domain_id, u64 address, int flags) { struct iommu_dev_data *dev_data = NULL; struct pci_dev *pdev; - pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid), devid & 0xff); if (pdev) dev_data = dev_iommu_priv_get(&pdev->dev); @@ -511,8 +576,8 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, domain_id, address, flags); } } else { - pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%04x:%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), domain_id, address, flags); } @@ -549,26 +614,26 @@ retry: } if (type == EVENT_TYPE_IO_FAULT) { - amd_iommu_report_page_fault(devid, pasid, address, flags); + amd_iommu_report_page_fault(iommu, devid, pasid, address, flags); return; } switch (type) { case EVENT_TYPE_ILL_DEV: - dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); - dump_dte_entry(devid); + dump_dte_entry(iommu, devid); break; case EVENT_TYPE_DEV_TAB_ERR: - dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%04x:%02x:%02x.%x " "address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address, flags); break; case EVENT_TYPE_PAGE_TAB_ERR: - dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%04x:%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; case EVENT_TYPE_ILL_CMD: @@ -580,26 +645,26 @@ retry: address, flags); break; case EVENT_TYPE_IOTLB_INV_TO: - dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%llx]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%04x:%02x:%02x.%x address=0x%llx]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address); break; case EVENT_TYPE_INV_DEV_REQ: - dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; case EVENT_TYPE_RMP_FAULT: - amd_iommu_report_rmp_fault(event); + amd_iommu_report_rmp_fault(iommu, event); break; case EVENT_TYPE_RMP_HW_ERR: - amd_iommu_report_rmp_hw_error(event); + amd_iommu_report_rmp_hw_error(iommu, event); break; case EVENT_TYPE_INV_PPR_REQ: pasid = PPR_PASID(*((u64 *)__evt)); tag = event[1] & 0x03FF; - dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags, tag); break; default: @@ -636,7 +701,7 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) fault.address = raw[1]; fault.pasid = PPR_PASID(raw[0]); - fault.device_id = PPR_DEVID(raw[0]); + fault.sbdf = PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, PPR_DEVID(raw[0])); fault.tag = PPR_TAG(raw[0]); fault.flags = PPR_FLAGS(raw[0]); @@ -1125,8 +1190,9 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) { u32 devid; + u16 last_bdf = iommu->pci_seg->last_bdf; - for (devid = 0; devid <= 0xffff; ++devid) + for (devid = 0; devid <= last_bdf; ++devid) iommu_flush_dte(iommu, devid); iommu_completion_wait(iommu); @@ -1139,8 +1205,9 @@ static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu) { u32 dom_id; + u16 last_bdf = iommu->pci_seg->last_bdf; - for (dom_id = 0; dom_id <= 0xffff; ++dom_id) { + for (dom_id = 0; dom_id <= last_bdf; ++dom_id) { struct iommu_cmd cmd; build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, dom_id, 1); @@ -1183,8 +1250,9 @@ static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) static void amd_iommu_flush_irt_all(struct amd_iommu *iommu) { u32 devid; + u16 last_bdf = iommu->pci_seg->last_bdf; - for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++) + for (devid = 0; devid <= last_bdf; devid++) iommu_flush_irt(iommu, devid); iommu_completion_wait(iommu); @@ -1212,7 +1280,9 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data, int qdep; qdep = dev_data->ats.qdep; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + return -EINVAL; build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size); @@ -1232,20 +1302,28 @@ static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data) static int device_flush_dte(struct iommu_dev_data *dev_data) { struct amd_iommu *iommu; + struct pci_dev *pdev = NULL; + struct amd_iommu_pci_seg *pci_seg; u16 alias; int ret; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + return -EINVAL; - if (dev_data->pdev) - ret = pci_for_each_dma_alias(dev_data->pdev, + if (dev_is_pci(dev_data->dev)) + pdev = to_pci_dev(dev_data->dev); + + if (pdev) + ret = pci_for_each_dma_alias(pdev, device_flush_dte_alias, iommu); else ret = iommu_flush_dte(iommu, dev_data->devid); if (ret) return ret; - alias = amd_iommu_alias_table[dev_data->devid]; + pci_seg = iommu->pci_seg; + alias = pci_seg->alias_table[dev_data->devid]; if (alias != dev_data->devid) { ret = iommu_flush_dte(iommu, alias); if (ret) @@ -1461,28 +1539,35 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } -static void set_dte_entry(u16 devid, struct protection_domain *domain, - bool ats, bool ppr) +static void set_dte_entry(struct amd_iommu *iommu, u16 devid, + struct protection_domain *domain, bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; + struct dev_table_entry *dev_table = get_dev_table(iommu); if (domain->iop.mode != PAGE_MODE_NONE) pte_root = iommu_virt_to_phys(domain->iop.root); pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; - pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; - flags = amd_iommu_dev_table[devid].data[1]; + pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V; + + /* + * When SNP is enabled, Only set TV bit when IOMMU + * page translation is in use. + */ + if (!amd_iommu_snp_en || (domain->id != 0)) + pte_root |= DTE_FLAG_TV; + + flags = dev_table[devid].data[1]; if (ats) flags |= DTE_FLAG_IOTLB; if (ppr) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - if (iommu_feature(iommu, FEATURE_EPHSUP)) pte_root |= 1ULL << DEV_ENTRY_PPR; } @@ -1516,9 +1601,9 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, flags &= ~DEV_DOMID_MASK; flags |= domain->id; - old_domid = amd_iommu_dev_table[devid].data[1] & DEV_DOMID_MASK; - amd_iommu_dev_table[devid].data[1] = flags; - amd_iommu_dev_table[devid].data[0] = pte_root; + old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK; + dev_table[devid].data[1] = flags; + dev_table[devid].data[0] = pte_root; /* * A kdump kernel might be replacing a domain ID that was copied from @@ -1526,19 +1611,23 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, * entries for the old domain ID that is being overwritten */ if (old_domid) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - amd_iommu_flush_tlb_domid(iommu, old_domid); } } -static void clear_dte_entry(u16 devid) +static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) { + struct dev_table_entry *dev_table = get_dev_table(iommu); + /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = DTE_FLAG_V | DTE_FLAG_TV; - amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK; + dev_table[devid].data[0] = DTE_FLAG_V; + + if (!amd_iommu_snp_en) + dev_table[devid].data[0] |= DTE_FLAG_TV; + + dev_table[devid].data[1] &= DTE_FLAG_MASK; - amd_iommu_apply_erratum_63(devid); + amd_iommu_apply_erratum_63(iommu, devid); } static void do_attach(struct iommu_dev_data *dev_data, @@ -1547,7 +1636,9 @@ static void do_attach(struct iommu_dev_data *dev_data, struct amd_iommu *iommu; bool ats; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + return; ats = dev_data->ats.enabled; /* Update data structures */ @@ -1559,9 +1650,9 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_cnt += 1; /* Update device table */ - set_dte_entry(dev_data->devid, domain, + set_dte_entry(iommu, dev_data->devid, domain, ats, dev_data->iommu_v2); - clone_aliases(dev_data->pdev); + clone_aliases(iommu, dev_data->dev); device_flush_dte(dev_data); } @@ -1571,13 +1662,15 @@ static void do_detach(struct iommu_dev_data *dev_data) struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + return; /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); - clear_dte_entry(dev_data->devid); - clone_aliases(dev_data->pdev); + clear_dte_entry(iommu, dev_data->devid); + clone_aliases(iommu, dev_data->dev); /* Flush the DTE entry */ device_flush_dte(dev_data); @@ -1749,23 +1842,24 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) { struct iommu_device *iommu_dev; struct amd_iommu *iommu; - int ret, devid; + int ret; if (!check_device(dev)) return ERR_PTR(-ENODEV); - devid = get_device_id(dev); - iommu = amd_iommu_rlookup_table[devid]; + iommu = rlookup_amd_iommu(dev); + if (!iommu) + return ERR_PTR(-ENODEV); if (dev_iommu_priv_get(dev)) return &iommu->iommu; - ret = iommu_init_device(dev); + ret = iommu_init_device(iommu, dev); if (ret) { if (ret != -ENOTSUPP) dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); iommu_dev = ERR_PTR(ret); - iommu_ignore_device(dev); + iommu_ignore_device(iommu, dev); } else { amd_iommu_set_pci_msi_domain(dev, iommu); iommu_dev = &iommu->iommu; @@ -1785,13 +1879,14 @@ static void amd_iommu_probe_finalize(struct device *dev) static void amd_iommu_release_device(struct device *dev) { - int devid = get_device_id(dev); struct amd_iommu *iommu; if (!check_device(dev)) return; - iommu = amd_iommu_rlookup_table[devid]; + iommu = rlookup_amd_iommu(dev); + if (!iommu) + return; amd_iommu_uninit_device(dev); iommu_completion_wait(iommu); @@ -1816,9 +1911,13 @@ static void update_device_table(struct protection_domain *domain) struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, + struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev); + + if (!iommu) + continue; + set_dte_entry(iommu, dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); - clone_aliases(dev_data->pdev); + clone_aliases(iommu, dev_data->dev); } } @@ -1969,6 +2068,13 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { struct protection_domain *domain; + /* + * Since DTE[Mode]=0 is prohibited on SNP-enabled system, + * default to use IOMMU_DOMAIN_DMA[_FQ]. + */ + if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY)) + return NULL; + domain = protection_domain_alloc(type); if (!domain) return NULL; @@ -2004,7 +2110,6 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); - int devid = get_device_id(dev); struct amd_iommu *iommu; if (!check_device(dev)) @@ -2013,7 +2118,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, if (dev_data->domain != NULL) detach_device(dev); - iommu = amd_iommu_rlookup_table[devid]; + iommu = rlookup_amd_iommu(dev); if (!iommu) return; @@ -2040,7 +2145,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, dev_data = dev_iommu_priv_get(dev); dev_data->defer_attach = false; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev); if (!iommu) return -EINVAL; @@ -2169,13 +2274,21 @@ static void amd_iommu_get_resv_regions(struct device *dev, { struct iommu_resv_region *region; struct unity_map_entry *entry; - int devid; + struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; + int devid, sbdf; - devid = get_device_id(dev); - if (devid < 0) + sbdf = get_device_sbdf_id(dev); + if (sbdf < 0) + return; + + devid = PCI_SBDF_TO_DEVID(sbdf); + iommu = rlookup_amd_iommu(dev); + if (!iommu) return; + pci_seg = iommu->pci_seg; - list_for_each_entry(entry, &amd_iommu_unity_map, list) { + list_for_each_entry(entry, &pci_seg->unity_map, list) { int type, prot = 0; size_t length; @@ -2280,7 +2393,6 @@ const struct iommu_ops amd_iommu_ops = { .probe_finalize = amd_iommu_probe_finalize, .device_group = amd_iommu_device_group, .get_resv_regions = amd_iommu_get_resv_regions, - .put_resv_regions = generic_iommu_put_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, .def_domain_type = amd_iommu_def_domain_type, @@ -2419,8 +2531,9 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, continue; qdep = dev_data->ats.qdep; - iommu = amd_iommu_rlookup_table[dev_data->devid]; - + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + continue; build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid, qdep, address, size); @@ -2582,7 +2695,9 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, struct iommu_cmd cmd; dev_data = dev_iommu_priv_get(&pdev->dev); - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(&pdev->dev); + if (!iommu) + return -ENODEV; build_complete_ppr(&cmd, dev_data->devid, pasid, status, tag, dev_data->pri_tlp); @@ -2644,30 +2759,35 @@ EXPORT_SYMBOL(amd_iommu_device_info); static struct irq_chip amd_ir_chip; static DEFINE_SPINLOCK(iommu_table_lock); -static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) +static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, + struct irq_remap_table *table) { u64 dte; + struct dev_table_entry *dev_table = get_dev_table(iommu); - dte = amd_iommu_dev_table[devid].data[2]; + dte = dev_table[devid].data[2]; dte &= ~DTE_IRQ_PHYS_ADDR_MASK; dte |= iommu_virt_to_phys(table->table); dte |= DTE_IRQ_REMAP_INTCTL; dte |= DTE_INTTABLEN; dte |= DTE_IRQ_REMAP_ENABLE; - amd_iommu_dev_table[devid].data[2] = dte; + dev_table[devid].data[2] = dte; } -static struct irq_remap_table *get_irq_table(u16 devid) +static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid) { struct irq_remap_table *table; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; - if (WARN_ONCE(!amd_iommu_rlookup_table[devid], - "%s: no iommu for devid %x\n", __func__, devid)) + if (WARN_ONCE(!pci_seg->rlookup_table[devid], + "%s: no iommu for devid %x:%x\n", + __func__, pci_seg->id, devid)) return NULL; - table = irq_lookup_table[devid]; - if (WARN_ONCE(!table, "%s: no table for devid %x\n", __func__, devid)) + table = pci_seg->irq_lookup_table[devid]; + if (WARN_ONCE(!table, "%s: no table for devid %x:%x\n", + __func__, pci_seg->id, devid)) return NULL; return table; @@ -2700,8 +2820,10 @@ static struct irq_remap_table *__alloc_irq_table(void) static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid, struct irq_remap_table *table) { - irq_lookup_table[devid] = table; - set_dte_irq_entry(devid, table); + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; + + pci_seg->irq_lookup_table[devid] = table; + set_dte_irq_entry(iommu, devid, table); iommu_flush_dte(iommu, devid); } @@ -2709,35 +2831,38 @@ static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias, void *data) { struct irq_remap_table *table = data; + struct amd_iommu_pci_seg *pci_seg; + struct amd_iommu *iommu = rlookup_amd_iommu(&pdev->dev); - irq_lookup_table[alias] = table; - set_dte_irq_entry(alias, table); + if (!iommu) + return -EINVAL; - iommu_flush_dte(amd_iommu_rlookup_table[alias], alias); + pci_seg = iommu->pci_seg; + pci_seg->irq_lookup_table[alias] = table; + set_dte_irq_entry(iommu, alias, table); + iommu_flush_dte(pci_seg->rlookup_table[alias], alias); return 0; } -static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev) +static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, + u16 devid, struct pci_dev *pdev) { struct irq_remap_table *table = NULL; struct irq_remap_table *new_table = NULL; - struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; unsigned long flags; u16 alias; spin_lock_irqsave(&iommu_table_lock, flags); - iommu = amd_iommu_rlookup_table[devid]; - if (!iommu) - goto out_unlock; - - table = irq_lookup_table[devid]; + pci_seg = iommu->pci_seg; + table = pci_seg->irq_lookup_table[devid]; if (table) goto out_unlock; - alias = amd_iommu_alias_table[devid]; - table = irq_lookup_table[alias]; + alias = pci_seg->alias_table[devid]; + table = pci_seg->irq_lookup_table[alias]; if (table) { set_remap_table_entry(iommu, devid, table); goto out_wait; @@ -2751,11 +2876,11 @@ static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev) spin_lock_irqsave(&iommu_table_lock, flags); - table = irq_lookup_table[devid]; + table = pci_seg->irq_lookup_table[devid]; if (table) goto out_unlock; - table = irq_lookup_table[alias]; + table = pci_seg->irq_lookup_table[alias]; if (table) { set_remap_table_entry(iommu, devid, table); goto out_wait; @@ -2786,18 +2911,14 @@ out_unlock: return table; } -static int alloc_irq_index(u16 devid, int count, bool align, - struct pci_dev *pdev) +static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count, + bool align, struct pci_dev *pdev) { struct irq_remap_table *table; int index, c, alignment = 1; unsigned long flags; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - if (!iommu) - return -ENODEV; - - table = alloc_irq_table(devid, pdev); + table = alloc_irq_table(iommu, devid, pdev); if (!table) return -ENODEV; @@ -2836,20 +2957,15 @@ out: return index; } -static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, - struct amd_ir_data *data) +static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index, + struct irte_ga *irte, struct amd_ir_data *data) { bool ret; struct irq_remap_table *table; - struct amd_iommu *iommu; unsigned long flags; struct irte_ga *entry; - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - return -EINVAL; - - table = get_irq_table(devid); + table = get_irq_table(iommu, devid); if (!table) return -ENOMEM; @@ -2880,17 +2996,13 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, return 0; } -static int modify_irte(u16 devid, int index, union irte *irte) +static int modify_irte(struct amd_iommu *iommu, + u16 devid, int index, union irte *irte) { struct irq_remap_table *table; - struct amd_iommu *iommu; unsigned long flags; - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - return -EINVAL; - - table = get_irq_table(devid); + table = get_irq_table(iommu, devid); if (!table) return -ENOMEM; @@ -2904,17 +3016,12 @@ static int modify_irte(u16 devid, int index, union irte *irte) return 0; } -static void free_irte(u16 devid, int index) +static void free_irte(struct amd_iommu *iommu, u16 devid, int index) { struct irq_remap_table *table; - struct amd_iommu *iommu; unsigned long flags; - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - return; - - table = get_irq_table(devid); + table = get_irq_table(iommu, devid); if (!table) return; @@ -2956,49 +3063,49 @@ static void irte_ga_prepare(void *entry, irte->lo.fields_remap.valid = 1; } -static void irte_activate(void *entry, u16 devid, u16 index) +static void irte_activate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) { union irte *irte = (union irte *) entry; irte->fields.valid = 1; - modify_irte(devid, index, irte); + modify_irte(iommu, devid, index, irte); } -static void irte_ga_activate(void *entry, u16 devid, u16 index) +static void irte_ga_activate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) { struct irte_ga *irte = (struct irte_ga *) entry; irte->lo.fields_remap.valid = 1; - modify_irte_ga(devid, index, irte, NULL); + modify_irte_ga(iommu, devid, index, irte, NULL); } -static void irte_deactivate(void *entry, u16 devid, u16 index) +static void irte_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) { union irte *irte = (union irte *) entry; irte->fields.valid = 0; - modify_irte(devid, index, irte); + modify_irte(iommu, devid, index, irte); } -static void irte_ga_deactivate(void *entry, u16 devid, u16 index) +static void irte_ga_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) { struct irte_ga *irte = (struct irte_ga *) entry; irte->lo.fields_remap.valid = 0; - modify_irte_ga(devid, index, irte, NULL); + modify_irte_ga(iommu, devid, index, irte, NULL); } -static void irte_set_affinity(void *entry, u16 devid, u16 index, +static void irte_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index, u8 vector, u32 dest_apicid) { union irte *irte = (union irte *) entry; irte->fields.vector = vector; irte->fields.destination = dest_apicid; - modify_irte(devid, index, irte); + modify_irte(iommu, devid, index, irte); } -static void irte_ga_set_affinity(void *entry, u16 devid, u16 index, +static void irte_ga_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index, u8 vector, u32 dest_apicid) { struct irte_ga *irte = (struct irte_ga *) entry; @@ -3009,7 +3116,7 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index, APICID_TO_IRTE_DEST_LO(dest_apicid); irte->hi.fields.destination = APICID_TO_IRTE_DEST_HI(dest_apicid); - modify_irte_ga(devid, index, irte, NULL); + modify_irte_ga(iommu, devid, index, irte, NULL); } } @@ -3068,7 +3175,7 @@ static int get_devid(struct irq_alloc_info *info) return get_hpet_devid(info->devid); case X86_IRQ_ALLOC_TYPE_PCI_MSI: case X86_IRQ_ALLOC_TYPE_PCI_MSIX: - return get_device_id(msi_desc_to_dev(info->desc)); + return get_device_sbdf_id(msi_desc_to_dev(info->desc)); default: WARN_ON_ONCE(1); return -1; @@ -3097,7 +3204,7 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data, int devid, int index, int sub_handle) { struct irq_2_irte *irte_info = &data->irq_2_irte; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + struct amd_iommu *iommu = data->iommu; if (!iommu) return; @@ -3148,8 +3255,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, struct irq_alloc_info *info = arg; struct irq_data *irq_data; struct amd_ir_data *data = NULL; + struct amd_iommu *iommu; struct irq_cfg *cfg; - int i, ret, devid; + int i, ret, devid, seg, sbdf; int index; if (!info) @@ -3165,8 +3273,14 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; - devid = get_devid(info); - if (devid < 0) + sbdf = get_devid(info); + if (sbdf < 0) + return -EINVAL; + + seg = PCI_SBDF_TO_SEGID(sbdf); + devid = PCI_SBDF_TO_DEVID(sbdf); + iommu = __rlookup_amd_iommu(seg, devid); + if (!iommu) return -EINVAL; ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); @@ -3175,9 +3289,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { struct irq_remap_table *table; - struct amd_iommu *iommu; - table = alloc_irq_table(devid, NULL); + table = alloc_irq_table(iommu, devid, NULL); if (table) { if (!table->min_index) { /* @@ -3185,7 +3298,6 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, * interrupts. */ table->min_index = 32; - iommu = amd_iommu_rlookup_table[devid]; for (i = 0; i < 32; ++i) iommu->irte_ops->set_allocated(table, i); } @@ -3198,10 +3310,10 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX) { bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI); - index = alloc_irq_index(devid, nr_irqs, align, + index = alloc_irq_index(iommu, devid, nr_irqs, align, msi_desc_to_pci_dev(info->desc)); } else { - index = alloc_irq_index(devid, nr_irqs, false, NULL); + index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL); } if (index < 0) { @@ -3233,6 +3345,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, goto out_free_data; } + data->iommu = iommu; irq_data->hwirq = (devid << 16) + i; irq_data->chip_data = data; irq_data->chip = &amd_ir_chip; @@ -3249,7 +3362,7 @@ out_free_data: kfree(irq_data->chip_data); } for (i = 0; i < nr_irqs; i++) - free_irte(devid, index + i); + free_irte(iommu, devid, index + i); out_free_parent: irq_domain_free_irqs_common(domain, virq, nr_irqs); return ret; @@ -3268,7 +3381,7 @@ static void irq_remapping_free(struct irq_domain *domain, unsigned int virq, if (irq_data && irq_data->chip_data) { data = irq_data->chip_data; irte_info = &data->irq_2_irte; - free_irte(irte_info->devid, irte_info->index); + free_irte(data->iommu, irte_info->devid, irte_info->index); kfree(data->entry); kfree(data); } @@ -3286,13 +3399,13 @@ static int irq_remapping_activate(struct irq_domain *domain, { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; - struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; + struct amd_iommu *iommu = data->iommu; struct irq_cfg *cfg = irqd_cfg(irq_data); if (!iommu) return 0; - iommu->irte_ops->activate(data->entry, irte_info->devid, + iommu->irte_ops->activate(iommu, data->entry, irte_info->devid, irte_info->index); amd_ir_update_irte(irq_data, iommu, data, irte_info, cfg); return 0; @@ -3303,10 +3416,10 @@ static void irq_remapping_deactivate(struct irq_domain *domain, { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; - struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; + struct amd_iommu *iommu = data->iommu; if (iommu) - iommu->irte_ops->deactivate(data->entry, irte_info->devid, + iommu->irte_ops->deactivate(iommu, data->entry, irte_info->devid, irte_info->index); } @@ -3326,8 +3439,8 @@ static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec, if (devid < 0) return 0; + iommu = __rlookup_amd_iommu((devid >> 16), (devid & 0xffff)); - iommu = amd_iommu_rlookup_table[devid]; return iommu && iommu->ir_domain == d; } @@ -3361,7 +3474,7 @@ int amd_iommu_activate_guest_mode(void *data) entry->hi.fields.vector = ir_data->ga_vector; entry->lo.fields_vapic.ga_tag = ir_data->ga_tag; - return modify_irte_ga(ir_data->irq_2_irte.devid, + return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, ir_data->irq_2_irte.index, entry, ir_data); } EXPORT_SYMBOL(amd_iommu_activate_guest_mode); @@ -3391,7 +3504,7 @@ int amd_iommu_deactivate_guest_mode(void *data) entry->hi.fields.destination = APICID_TO_IRTE_DEST_HI(cfg->dest_apicid); - return modify_irte_ga(ir_data->irq_2_irte.devid, + return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, ir_data->irq_2_irte.index, entry, ir_data); } EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); @@ -3399,12 +3512,16 @@ EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) { int ret; - struct amd_iommu *iommu; struct amd_iommu_pi_data *pi_data = vcpu_info; struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data; struct amd_ir_data *ir_data = data->chip_data; struct irq_2_irte *irte_info = &ir_data->irq_2_irte; - struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid); + struct iommu_dev_data *dev_data; + + if (ir_data->iommu == NULL) + return -EINVAL; + + dev_data = search_dev_data(ir_data->iommu, irte_info->devid); /* Note: * This device has never been set up for guest mode. @@ -3426,10 +3543,6 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) pi_data->is_guest_mode = false; } - iommu = amd_iommu_rlookup_table[irte_info->devid]; - if (iommu == NULL) - return -EINVAL; - pi_data->prev_ga_tag = ir_data->cached_ga_tag; if (pi_data->is_guest_mode) { ir_data->ga_root_ptr = (pi_data->base >> 12); @@ -3463,7 +3576,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, * Atomically updates the IRTE with the new destination, vector * and flushes the interrupt entry cache. */ - iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid, + iommu->irte_ops->set_affinity(iommu, ir_data->entry, irte_info->devid, irte_info->index, cfg->vector, cfg->dest_apicid); } @@ -3475,7 +3588,7 @@ static int amd_ir_set_affinity(struct irq_data *data, struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct irq_cfg *cfg = irqd_cfg(data); struct irq_data *parent = data->parent_data; - struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; + struct amd_iommu *iommu = ir_data->iommu; int ret; if (!iommu) @@ -3545,11 +3658,11 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data) !ref || !entry || !entry->lo.fields_vapic.guest_mode) return 0; - iommu = amd_iommu_rlookup_table[devid]; + iommu = ir_data->iommu; if (!iommu) return -ENODEV; - table = get_irq_table(devid); + table = get_irq_table(iommu, devid); if (!table) return -ENODEV; diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index afb3efd565b7..696d5555be57 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -51,7 +51,7 @@ struct pasid_state { struct device_state { struct list_head list; - u16 devid; + u32 sbdf; atomic_t count; struct pci_dev *pdev; struct pasid_state **states; @@ -83,35 +83,25 @@ static struct workqueue_struct *iommu_wq; static void free_pasid_states(struct device_state *dev_state); -static u16 device_id(struct pci_dev *pdev) -{ - u16 devid; - - devid = pdev->bus->number; - devid = (devid << 8) | pdev->devfn; - - return devid; -} - -static struct device_state *__get_device_state(u16 devid) +static struct device_state *__get_device_state(u32 sbdf) { struct device_state *dev_state; list_for_each_entry(dev_state, &state_list, list) { - if (dev_state->devid == devid) + if (dev_state->sbdf == sbdf) return dev_state; } return NULL; } -static struct device_state *get_device_state(u16 devid) +static struct device_state *get_device_state(u32 sbdf) { struct device_state *dev_state; unsigned long flags; spin_lock_irqsave(&state_lock, flags); - dev_state = __get_device_state(devid); + dev_state = __get_device_state(sbdf); if (dev_state != NULL) atomic_inc(&dev_state->count); spin_unlock_irqrestore(&state_lock, flags); @@ -528,15 +518,16 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) unsigned long flags; struct fault *fault; bool finish; - u16 tag, devid; + u16 tag, devid, seg_id; int ret; iommu_fault = data; tag = iommu_fault->tag & 0x1ff; finish = (iommu_fault->tag >> 9) & 1; - devid = iommu_fault->device_id; - pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + seg_id = PCI_SBDF_TO_SEGID(iommu_fault->sbdf); + devid = PCI_SBDF_TO_DEVID(iommu_fault->sbdf); + pdev = pci_get_domain_bus_and_slot(seg_id, PCI_BUS_NUM(devid), devid & 0xff); if (!pdev) return -ENODEV; @@ -550,7 +541,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) goto out; } - dev_state = get_device_state(iommu_fault->device_id); + dev_state = get_device_state(iommu_fault->sbdf); if (dev_state == NULL) goto out; @@ -609,7 +600,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, struct pasid_state *pasid_state; struct device_state *dev_state; struct mm_struct *mm; - u16 devid; + u32 sbdf; int ret; might_sleep(); @@ -617,8 +608,8 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, if (!amd_iommu_v2_supported()) return -ENODEV; - devid = device_id(pdev); - dev_state = get_device_state(devid); + sbdf = get_pci_sbdf_id(pdev); + dev_state = get_device_state(sbdf); if (dev_state == NULL) return -EINVAL; @@ -692,15 +683,15 @@ void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid) { struct pasid_state *pasid_state; struct device_state *dev_state; - u16 devid; + u32 sbdf; might_sleep(); if (!amd_iommu_v2_supported()) return; - devid = device_id(pdev); - dev_state = get_device_state(devid); + sbdf = get_pci_sbdf_id(pdev); + dev_state = get_device_state(sbdf); if (dev_state == NULL) return; @@ -742,7 +733,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) struct iommu_group *group; unsigned long flags; int ret, tmp; - u16 devid; + u32 sbdf; might_sleep(); @@ -759,7 +750,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) if (pasids <= 0 || pasids > (PASID_MASK + 1)) return -EINVAL; - devid = device_id(pdev); + sbdf = get_pci_sbdf_id(pdev); dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL); if (dev_state == NULL) @@ -768,7 +759,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) spin_lock_init(&dev_state->lock); init_waitqueue_head(&dev_state->wq); dev_state->pdev = pdev; - dev_state->devid = devid; + dev_state->sbdf = sbdf; tmp = pasids; for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) @@ -806,7 +797,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) spin_lock_irqsave(&state_lock, flags); - if (__get_device_state(devid) != NULL) { + if (__get_device_state(sbdf) != NULL) { spin_unlock_irqrestore(&state_lock, flags); ret = -EBUSY; goto out_free_domain; @@ -838,16 +829,16 @@ void amd_iommu_free_device(struct pci_dev *pdev) { struct device_state *dev_state; unsigned long flags; - u16 devid; + u32 sbdf; if (!amd_iommu_v2_supported()) return; - devid = device_id(pdev); + sbdf = get_pci_sbdf_id(pdev); spin_lock_irqsave(&state_lock, flags); - dev_state = __get_device_state(devid); + dev_state = __get_device_state(sbdf); if (dev_state == NULL) { spin_unlock_irqrestore(&state_lock, flags); return; @@ -867,18 +858,18 @@ int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, { struct device_state *dev_state; unsigned long flags; - u16 devid; + u32 sbdf; int ret; if (!amd_iommu_v2_supported()) return -ENODEV; - devid = device_id(pdev); + sbdf = get_pci_sbdf_id(pdev); spin_lock_irqsave(&state_lock, flags); ret = -EINVAL; - dev_state = __get_device_state(devid); + dev_state = __get_device_state(sbdf); if (dev_state == NULL) goto out_unlock; @@ -898,18 +889,18 @@ int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, { struct device_state *dev_state; unsigned long flags; - u16 devid; + u32 sbdf; int ret; if (!amd_iommu_v2_supported()) return -ENODEV; - devid = device_id(pdev); + sbdf = get_pci_sbdf_id(pdev); spin_lock_irqsave(&state_lock, flags); ret = -EINVAL; - dev_state = __get_device_state(devid); + dev_state = __get_device_state(sbdf); if (dev_state == NULL) goto out_unlock; diff --git a/drivers/iommu/amd/quirks.c b/drivers/iommu/amd/quirks.c index 5120ce4fdce3..79dbb8f33b47 100644 --- a/drivers/iommu/amd/quirks.c +++ b/drivers/iommu/amd/quirks.c @@ -15,7 +15,7 @@ struct ivrs_quirk_entry { u8 id; - u16 devid; + u32 devid; }; enum { @@ -49,7 +49,7 @@ static int __init ivrs_ioapic_quirk_cb(const struct dmi_system_id *d) const struct ivrs_quirk_entry *i; for (i = d->driver_data; i->id != 0 && i->devid != 0; i++) - add_special_device(IVHD_SPECIAL_IOAPIC, i->id, (u16 *)&i->devid, 0); + add_special_device(IVHD_SPECIAL_IOAPIC, i->id, (u32 *)&i->devid, 0); return 0; } |