diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-21 12:40:50 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-21 12:40:50 -0800 |
commit | 341d041daae52cd5f014f68c1c7d9039db818fca (patch) | |
tree | 87630919e6b6b456c8b7d32c2c5d52de6264b247 /drivers/iommu/arm | |
parent | 51ae62a12c242e49229db23b96d03ecc15efc0d1 (diff) | |
parent | 6d026e6d48cd2a95407c8fdd8d6187b871401c23 (diff) |
Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd
Pull iommufd updates from Jason Gunthorpe:
"Several new features and uAPI for iommufd:
- IOMMU_IOAS_MAP_FILE allows passing in a file descriptor as the
backing memory for an iommu mapping. To date VFIO/iommufd have used
VMA's and pin_user_pages(), this now allows using memfds and
memfd_pin_folios(). Notably this creates a pure folio path from the
memfd to the iommu page table where memory is never broken down to
PAGE_SIZE.
- IOMMU_IOAS_CHANGE_PROCESS moves the pinned page accounting between
two processes. Combined with the above this allows iommufd to
support a VMM re-start using exec() where something like qemu would
exec() a new version of itself and fd pass the memfds/iommufd/etc
to the new process. The memfd allows DMA access to the memory to
continue while the new process is getting setup, and the
CHANGE_PROCESS updates all the accounting.
- Support for fault reporting to userspace on non-PRI HW, such as ARM
stall-mode embedded devices.
- IOMMU_VIOMMU_ALLOC introduces the concept of a HW/driver backed
virtual iommu. This will be used by VMMs to access hardware
features that are contained with in a VM. The first use is to
inform the kernel of the virtual SID to physical SID mapping when
issuing SID based invalidation on ARM. Further uses will tie HW
features that are directly accessed by the VM, such as invalidation
queue assignment and others.
- IOMMU_VDEVICE_ALLOC informs the kernel about the mapping of virtual
device to physical device within a VIOMMU. Minimially this is used
to translate VM issued cache invalidation commands from virtual to
physical device IDs.
- Enhancements to IOMMU_HWPT_INVALIDATE and IOMMU_HWPT_ALLOC to work
with the VIOMMU
- ARM SMMuv3 support for nested translation. Using the VIOMMU and
VDEVICE the driver can model this HW's behavior for nested
translation. This includes a shared branch from Will"
* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (51 commits)
iommu/arm-smmu-v3: Import IOMMUFD module namespace
iommufd: IOMMU_IOAS_CHANGE_PROCESS selftest
iommufd: Add IOMMU_IOAS_CHANGE_PROCESS
iommufd: Lock all IOAS objects
iommufd: Export do_update_pinned
iommu/arm-smmu-v3: Support IOMMU_HWPT_INVALIDATE using a VIOMMU object
iommu/arm-smmu-v3: Allow ATS for IOMMU_DOMAIN_NESTED
iommu/arm-smmu-v3: Use S2FWB for NESTED domains
iommu/arm-smmu-v3: Support IOMMU_DOMAIN_NESTED
iommu/arm-smmu-v3: Support IOMMU_VIOMMU_ALLOC
Documentation: userspace-api: iommufd: Update vDEVICE
iommufd/selftest: Add vIOMMU coverage for IOMMU_HWPT_INVALIDATE ioctl
iommufd/selftest: Add IOMMU_TEST_OP_DEV_CHECK_CACHE test command
iommufd/selftest: Add mock_viommu_cache_invalidate
iommufd/viommu: Add iommufd_viommu_find_dev helper
iommu: Add iommu_copy_struct_from_full_user_array helper
iommufd: Allow hwpt_id to carry viommu_id for IOMMU_HWPT_INVALIDATE
iommu/viommu: Add cache_invalidate to iommufd_viommu_ops
iommufd/selftest: Add IOMMU_VDEVICE_ALLOC test coverage
iommufd/viommu: Add IOMMUFD_OBJ_VDEVICE and IOMMU_VDEVICE_ALLOC ioctl
...
Diffstat (limited to 'drivers/iommu/arm')
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/Makefile | 1 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 401 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 139 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 92 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu.c | 16 |
5 files changed, 585 insertions, 64 deletions
diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile index dc98c88b48c8..493a659cc66b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/Makefile +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o arm_smmu_v3-y := arm-smmu-v3.o +arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_IOMMUFD) += arm-smmu-v3-iommufd.o arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o arm_smmu_v3-$(CONFIG_TEGRA241_CMDQV) += tegra241-cmdqv.o diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c new file mode 100644 index 000000000000..6cc14d82399f --- /dev/null +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + */ + +#include <uapi/linux/iommufd.h> + +#include "arm-smmu-v3.h" + +void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type) +{ + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct iommu_hw_info_arm_smmuv3 *info; + u32 __iomem *base_idr; + unsigned int i; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + base_idr = master->smmu->base + ARM_SMMU_IDR0; + for (i = 0; i <= 5; i++) + info->idr[i] = readl_relaxed(base_idr + i); + info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR); + info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR); + + *length = sizeof(*info); + *type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3; + + return info; +} + +static void arm_smmu_make_nested_cd_table_ste( + struct arm_smmu_ste *target, struct arm_smmu_master *master, + struct arm_smmu_nested_domain *nested_domain, bool ats_enabled) +{ + arm_smmu_make_s2_domain_ste( + target, master, nested_domain->vsmmu->s2_parent, ats_enabled); + + target->data[0] = cpu_to_le64(STRTAB_STE_0_V | + FIELD_PREP(STRTAB_STE_0_CFG, + STRTAB_STE_0_CFG_NESTED)); + target->data[0] |= nested_domain->ste[0] & + ~cpu_to_le64(STRTAB_STE_0_CFG); + target->data[1] |= nested_domain->ste[1]; +} + +/* + * Create a physical STE from the virtual STE that userspace provided when it + * created the nested domain. Using the vSTE userspace can request: + * - Non-valid STE + * - Abort STE + * - Bypass STE (install the S2, no CD table) + * - CD table STE (install the S2 and the userspace CD table) + */ +static void arm_smmu_make_nested_domain_ste( + struct arm_smmu_ste *target, struct arm_smmu_master *master, + struct arm_smmu_nested_domain *nested_domain, bool ats_enabled) +{ + unsigned int cfg = + FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0])); + + /* + * Userspace can request a non-valid STE through the nesting interface. + * We relay that into an abort physical STE with the intention that + * C_BAD_STE for this SID can be generated to userspace. + */ + if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) + cfg = STRTAB_STE_0_CFG_ABORT; + + switch (cfg) { + case STRTAB_STE_0_CFG_S1_TRANS: + arm_smmu_make_nested_cd_table_ste(target, master, nested_domain, + ats_enabled); + break; + case STRTAB_STE_0_CFG_BYPASS: + arm_smmu_make_s2_domain_ste(target, master, + nested_domain->vsmmu->s2_parent, + ats_enabled); + break; + case STRTAB_STE_0_CFG_ABORT: + default: + arm_smmu_make_abort_ste(target); + break; + } +} + +static int arm_smmu_attach_dev_nested(struct iommu_domain *domain, + struct device *dev) +{ + struct arm_smmu_nested_domain *nested_domain = + to_smmu_nested_domain(domain); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_attach_state state = { + .master = master, + .old_domain = iommu_get_domain_for_dev(dev), + .ssid = IOMMU_NO_PASID, + }; + struct arm_smmu_ste ste; + int ret; + + if (nested_domain->vsmmu->smmu != master->smmu) + return -EINVAL; + if (arm_smmu_ssids_in_use(&master->cd_table)) + return -EBUSY; + + mutex_lock(&arm_smmu_asid_lock); + /* + * The VM has to control the actual ATS state at the PCI device because + * we forward the invalidations directly from the VM. If the VM doesn't + * think ATS is on it will not generate ATC flushes and the ATC will + * become incoherent. Since we can't access the actual virtual PCI ATS + * config bit here base this off the EATS value in the STE. If the EATS + * is set then the VM must generate ATC flushes. + */ + state.disable_ats = !nested_domain->enable_ats; + ret = arm_smmu_attach_prepare(&state, domain); + if (ret) { + mutex_unlock(&arm_smmu_asid_lock); + return ret; + } + + arm_smmu_make_nested_domain_ste(&ste, master, nested_domain, + state.ats_enabled); + arm_smmu_install_ste_for_dev(master, &ste); + arm_smmu_attach_commit(&state); + mutex_unlock(&arm_smmu_asid_lock); + return 0; +} + +static void arm_smmu_domain_nested_free(struct iommu_domain *domain) +{ + kfree(to_smmu_nested_domain(domain)); +} + +static const struct iommu_domain_ops arm_smmu_nested_ops = { + .attach_dev = arm_smmu_attach_dev_nested, + .free = arm_smmu_domain_nested_free, +}; + +static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg, + bool *enable_ats) +{ + unsigned int eats; + unsigned int cfg; + + if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) { + memset(arg->ste, 0, sizeof(arg->ste)); + return 0; + } + + /* EIO is reserved for invalid STE data. */ + if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) || + (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED)) + return -EIO; + + cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0])); + if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS && + cfg != STRTAB_STE_0_CFG_S1_TRANS) + return -EIO; + + /* + * Only Full ATS or ATS UR is supported + * The EATS field will be set by arm_smmu_make_nested_domain_ste() + */ + eats = FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(arg->ste[1])); + arg->ste[1] &= ~cpu_to_le64(STRTAB_STE_1_EATS); + if (eats != STRTAB_STE_1_EATS_ABT && eats != STRTAB_STE_1_EATS_TRANS) + return -EIO; + + if (cfg == STRTAB_STE_0_CFG_S1_TRANS) + *enable_ats = (eats == STRTAB_STE_1_EATS_TRANS); + return 0; +} + +static struct iommu_domain * +arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, + const struct iommu_user_data *user_data) +{ + struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); + const u32 SUPPORTED_FLAGS = IOMMU_HWPT_FAULT_ID_VALID; + struct arm_smmu_nested_domain *nested_domain; + struct iommu_hwpt_arm_smmuv3 arg; + bool enable_ats = false; + int ret; + + /* + * Faults delivered to the nested domain are faults that originated by + * the S1 in the domain. The core code will match all PASIDs when + * delivering the fault due to user_pasid_table + */ + if (flags & ~SUPPORTED_FLAGS) + return ERR_PTR(-EOPNOTSUPP); + + ret = iommu_copy_struct_from_user(&arg, user_data, + IOMMU_HWPT_DATA_ARM_SMMUV3, ste); + if (ret) + return ERR_PTR(ret); + + ret = arm_smmu_validate_vste(&arg, &enable_ats); + if (ret) + return ERR_PTR(ret); + + nested_domain = kzalloc(sizeof(*nested_domain), GFP_KERNEL_ACCOUNT); + if (!nested_domain) + return ERR_PTR(-ENOMEM); + + nested_domain->domain.type = IOMMU_DOMAIN_NESTED; + nested_domain->domain.ops = &arm_smmu_nested_ops; + nested_domain->enable_ats = enable_ats; + nested_domain->vsmmu = vsmmu; + nested_domain->ste[0] = arg.ste[0]; + nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS); + + return &nested_domain->domain; +} + +static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid) +{ + struct arm_smmu_master *master; + struct device *dev; + int ret = 0; + + xa_lock(&vsmmu->core.vdevs); + dev = iommufd_viommu_find_dev(&vsmmu->core, (unsigned long)vsid); + if (!dev) { + ret = -EIO; + goto unlock; + } + master = dev_iommu_priv_get(dev); + + /* At this moment, iommufd only supports PCI device that has one SID */ + if (sid) + *sid = master->streams[0].id; +unlock: + xa_unlock(&vsmmu->core.vdevs); + return ret; +} + +/* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */ +struct arm_vsmmu_invalidation_cmd { + union { + u64 cmd[2]; + struct iommu_viommu_arm_smmuv3_invalidate ucmd; + }; +}; + +/* + * Convert, in place, the raw invalidation command into an internal format that + * can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are + * stored in CPU endian. + * + * Enforce the VMID or SID on the command. + */ +static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu, + struct arm_vsmmu_invalidation_cmd *cmd) +{ + /* Commands are le64 stored in u64 */ + cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]); + cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]); + + switch (cmd->cmd[0] & CMDQ_0_OP) { + case CMDQ_OP_TLBI_NSNH_ALL: + /* Convert to NH_ALL */ + cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL | + FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid); + cmd->cmd[1] = 0; + break; + case CMDQ_OP_TLBI_NH_VA: + case CMDQ_OP_TLBI_NH_VAA: + case CMDQ_OP_TLBI_NH_ALL: + case CMDQ_OP_TLBI_NH_ASID: + cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID; + cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid); + break; + case CMDQ_OP_ATC_INV: + case CMDQ_OP_CFGI_CD: + case CMDQ_OP_CFGI_CD_ALL: { + u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]); + + if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid)) + return -EIO; + cmd->cmd[0] &= ~CMDQ_CFGI_0_SID; + cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid); + break; + } + default: + return -EIO; + } + return 0; +} + +static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu, + struct iommu_user_data_array *array) +{ + struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); + struct arm_smmu_device *smmu = vsmmu->smmu; + struct arm_vsmmu_invalidation_cmd *last; + struct arm_vsmmu_invalidation_cmd *cmds; + struct arm_vsmmu_invalidation_cmd *cur; + struct arm_vsmmu_invalidation_cmd *end; + int ret; + + cmds = kcalloc(array->entry_num, sizeof(*cmds), GFP_KERNEL); + if (!cmds) + return -ENOMEM; + cur = cmds; + end = cmds + array->entry_num; + + static_assert(sizeof(*cmds) == 2 * sizeof(u64)); + ret = iommu_copy_struct_from_full_user_array( + cmds, sizeof(*cmds), array, + IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3); + if (ret) + goto out; + + last = cmds; + while (cur != end) { + ret = arm_vsmmu_convert_user_cmd(vsmmu, cur); + if (ret) + goto out; + + /* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */ + cur++; + if (cur != end && (cur - last) != CMDQ_BATCH_ENTRIES - 1) + continue; + + /* FIXME always uses the main cmdq rather than trying to group by type */ + ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd, + cur - last, true); + if (ret) { + cur--; + goto out; + } + last = cur; + } +out: + array->entry_num = cur - cmds; + kfree(cmds); + return ret; +} + +static const struct iommufd_viommu_ops arm_vsmmu_ops = { + .alloc_domain_nested = arm_vsmmu_alloc_domain_nested, + .cache_invalidate = arm_vsmmu_cache_invalidate, +}; + +struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, + struct iommu_domain *parent, + struct iommufd_ctx *ictx, + unsigned int viommu_type) +{ + struct arm_smmu_device *smmu = + iommu_get_iommu_dev(dev, struct arm_smmu_device, iommu); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_domain *s2_parent = to_smmu_domain(parent); + struct arm_vsmmu *vsmmu; + + if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3) + return ERR_PTR(-EOPNOTSUPP); + + if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) + return ERR_PTR(-EOPNOTSUPP); + + if (s2_parent->smmu != master->smmu) + return ERR_PTR(-EINVAL); + + /* + * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW + * defect is needed to determine if arm_vsmmu_cache_invalidate() needs + * any change to remove this. + */ + if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) + return ERR_PTR(-EOPNOTSUPP); + + /* + * Must support some way to prevent the VM from bypassing the cache + * because VFIO currently does not do any cache maintenance. canwbs + * indicates the device is fully coherent and no cache maintenance is + * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make + * things non-coherent using the memattr, but No-Snoop behavior is not + * effected. + */ + if (!arm_smmu_master_canwbs(master) && + !(smmu->features & ARM_SMMU_FEAT_S2FWB)) + return ERR_PTR(-EOPNOTSUPP); + + vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core, + &arm_vsmmu_ops); + if (IS_ERR(vsmmu)) + return ERR_CAST(vsmmu); + + vsmmu->smmu = smmu; + vsmmu->s2_parent = s2_parent; + /* FIXME Move VMID allocation from the S2 domain allocation to here */ + vsmmu->vmid = s2_parent->s2_cfg.vmid; + + return &vsmmu->core; +} + +MODULE_IMPORT_NS(IOMMUFD); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 353fea58cd31..10b4dbc8d027 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -295,6 +295,7 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) case CMDQ_OP_TLBI_NH_ASID: cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); fallthrough; + case CMDQ_OP_TLBI_NH_ALL: case CMDQ_OP_TLBI_S12_VMALL: cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); break; @@ -765,9 +766,9 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, * insert their own list of commands then all of the commands from one * CPU will appear before any of the commands from the other CPU. */ -static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, - struct arm_smmu_cmdq *cmdq, - u64 *cmds, int n, bool sync) +int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, u64 *cmds, int n, + bool sync) { u64 cmd_sync[CMDQ_ENT_DWORDS]; u32 prod; @@ -1045,7 +1046,8 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) /* S2 translates */ if (cfg & BIT(1)) { used_bits[1] |= - cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG); + cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS | + STRTAB_STE_1_SHCFG); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | @@ -1549,7 +1551,6 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, } } -VISIBLE_IF_KUNIT void arm_smmu_make_abort_ste(struct arm_smmu_ste *target) { memset(target, 0, sizeof(*target)); @@ -1632,7 +1633,6 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, } EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste); -VISIBLE_IF_KUNIT void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, @@ -1655,6 +1655,8 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, FIELD_PREP(STRTAB_STE_1_EATS, ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB) + target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB); if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); @@ -2105,7 +2107,16 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, if (!master->ats_enabled) continue; - arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, &cmd); + if (master_domain->nested_ats_flush) { + /* + * If a S2 used as a nesting parent is changed we have + * no option but to completely flush the ATC. + */ + arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); + } else { + arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, + &cmd); + } for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; @@ -2232,6 +2243,15 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, } __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); + if (smmu_domain->nest_parent) { + /* + * When the S2 domain changes all the nested S1 ASIDs have to be + * flushed too. + */ + cmd.opcode = CMDQ_OP_TLBI_NH_ALL; + arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd); + } + /* * Unfortunately, this can't be leaf-only since we may have * zapped an entire table. @@ -2293,6 +2313,8 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) case IOMMU_CAP_CACHE_COHERENCY: /* Assume that a coherent TCU implies coherent TBUs */ return master->smmu->features & ARM_SMMU_FEAT_COHERENCY; + case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: + return arm_smmu_master_canwbs(master); case IOMMU_CAP_NOEXEC: case IOMMU_CAP_DEFERRED_FLUSH: return true; @@ -2303,6 +2325,26 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) } } +static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_master_domain *master_domain; + unsigned long flags; + bool ret = true; + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + if (!arm_smmu_master_canwbs(master_domain->master)) { + ret = false; + break; + } + } + smmu_domain->enforce_cache_coherency = ret; + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + return ret; +} + struct arm_smmu_domain *arm_smmu_domain_alloc(void) { struct arm_smmu_domain *smmu_domain; @@ -2442,6 +2484,9 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, pgtbl_cfg.oas = smmu->oas; fmt = ARM_64_LPAE_S2; finalise_stage_fn = arm_smmu_domain_finalise_s2; + if ((smmu->features & ARM_SMMU_FEAT_S2FWB) && + (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB; break; default: return -EINVAL; @@ -2483,8 +2528,8 @@ arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) } } -static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master, - const struct arm_smmu_ste *target) +void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master, + const struct arm_smmu_ste *target) { int i, j; struct arm_smmu_device *smmu = master->smmu; @@ -2595,7 +2640,7 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) static struct arm_smmu_master_domain * arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, struct arm_smmu_master *master, - ioasid_t ssid) + ioasid_t ssid, bool nested_ats_flush) { struct arm_smmu_master_domain *master_domain; @@ -2604,7 +2649,8 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { if (master_domain->master == master && - master_domain->ssid == ssid) + master_domain->ssid == ssid && + master_domain->nested_ats_flush == nested_ats_flush) return master_domain; } return NULL; @@ -2624,6 +2670,8 @@ to_smmu_domain_devices(struct iommu_domain *domain) if ((domain->type & __IOMMU_DOMAIN_PAGING) || domain->type == IOMMU_DOMAIN_SVA) return to_smmu_domain(domain); + if (domain->type == IOMMU_DOMAIN_NESTED) + return to_smmu_nested_domain(domain)->vsmmu->s2_parent; return NULL; } @@ -2633,13 +2681,18 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, { struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain); struct arm_smmu_master_domain *master_domain; + bool nested_ats_flush = false; unsigned long flags; if (!smmu_domain) return; + if (domain->type == IOMMU_DOMAIN_NESTED) + nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats; + spin_lock_irqsave(&smmu_domain->devices_lock, flags); - master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid); + master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid, + nested_ats_flush); if (master_domain) { list_del(&master_domain->devices_elm); kfree(master_domain); @@ -2649,16 +2702,6 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); } -struct arm_smmu_attach_state { - /* Inputs */ - struct iommu_domain *old_domain; - struct arm_smmu_master *master; - bool cd_needs_ats; - ioasid_t ssid; - /* Resulting state */ - bool ats_enabled; -}; - /* * Start the sequence to attach a domain to a master. The sequence contains three * steps: @@ -2679,8 +2722,8 @@ struct arm_smmu_attach_state { * new_domain can be a non-paging domain. In this case ATS will not be enabled, * and invalidations won't be tracked. */ -static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, - struct iommu_domain *new_domain) +int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, + struct iommu_domain *new_domain) { struct arm_smmu_master *master = state->master; struct arm_smmu_master_domain *master_domain; @@ -2706,7 +2749,8 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, * enabled if we have arm_smmu_domain, those always have page * tables. */ - state->ats_enabled = arm_smmu_ats_supported(master); + state->ats_enabled = !state->disable_ats && + arm_smmu_ats_supported(master); } if (smmu_domain) { @@ -2715,6 +2759,9 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, return -ENOMEM; master_domain->master = master; master_domain->ssid = state->ssid; + if (new_domain->type == IOMMU_DOMAIN_NESTED) + master_domain->nested_ats_flush = + to_smmu_nested_domain(new_domain)->enable_ats; /* * During prepare we want the current smmu_domain and new @@ -2731,6 +2778,14 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, * one of them. */ spin_lock_irqsave(&smmu_domain->devices_lock, flags); + if (smmu_domain->enforce_cache_coherency && + !arm_smmu_master_canwbs(master)) { + spin_unlock_irqrestore(&smmu_domain->devices_lock, + flags); + kfree(master_domain); + return -EINVAL; + } + if (state->ats_enabled) atomic_inc(&smmu_domain->nr_ats_masters); list_add(&master_domain->devices_elm, &smmu_domain->devices); @@ -2754,7 +2809,7 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, * completes synchronizing the PCI device's ATC and finishes manipulating the * smmu_domain->devices list. */ -static void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) +void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) { struct arm_smmu_master *master = state->master; @@ -3084,7 +3139,8 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags, const struct iommu_user_data *user_data) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); - const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; + const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | + IOMMU_HWPT_ALLOC_NEST_PARENT; struct arm_smmu_domain *smmu_domain; int ret; @@ -3097,6 +3153,15 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags, if (IS_ERR(smmu_domain)) return ERR_CAST(smmu_domain); + if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) { + if (!(master->smmu->features & ARM_SMMU_FEAT_NESTING)) { + ret = -EOPNOTSUPP; + goto err_free; + } + smmu_domain->stage = ARM_SMMU_DOMAIN_S2; + smmu_domain->nest_parent = true; + } + smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags); @@ -3378,21 +3443,6 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) return group; } -static int arm_smmu_enable_nesting(struct iommu_domain *domain) -{ - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - int ret = 0; - - mutex_lock(&smmu_domain->init_mutex); - if (smmu_domain->smmu) - ret = -EPERM; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_S2; - mutex_unlock(&smmu_domain->init_mutex); - - return ret; -} - static int arm_smmu_of_xlate(struct device *dev, const struct of_phandle_args *args) { @@ -3491,6 +3541,7 @@ static struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, + .hw_info = arm_smmu_hw_info, .domain_alloc_paging = arm_smmu_domain_alloc_paging, .domain_alloc_sva = arm_smmu_sva_domain_alloc, .domain_alloc_user = arm_smmu_domain_alloc_user, @@ -3504,17 +3555,19 @@ static struct iommu_ops arm_smmu_ops = { .dev_disable_feat = arm_smmu_dev_disable_feature, .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, + .viommu_alloc = arm_vsmmu_alloc, + .user_pasid_table = 1, .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = arm_smmu_attach_dev, + .enforce_cache_coherency = arm_smmu_enforce_cache_coherency, .set_dev_pasid = arm_smmu_s1_set_dev_pasid, .map_pages = arm_smmu_map_pages, .unmap_pages = arm_smmu_unmap_pages, .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, - .enable_nesting = arm_smmu_enable_nesting, .free = arm_smmu_domain_free_paging, } }; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 1e9952ca989f..af25f092303f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -10,6 +10,7 @@ #include <linux/bitfield.h> #include <linux/iommu.h> +#include <linux/iommufd.h> #include <linux/kernel.h> #include <linux/mmzone.h> #include <linux/sizes.h> @@ -57,6 +58,7 @@ struct arm_smmu_device; #define IDR1_SIDSIZE GENMASK(5, 0) #define ARM_SMMU_IDR3 0xc +#define IDR3_FWB (1 << 8) #define IDR3_RIL (1 << 10) #define ARM_SMMU_IDR5 0x14 @@ -81,6 +83,8 @@ struct arm_smmu_device; #define IIDR_REVISION GENMASK(15, 12) #define IIDR_IMPLEMENTER GENMASK(11, 0) +#define ARM_SMMU_AIDR 0x1C + #define ARM_SMMU_CR0 0x20 #define CR0_ATSCHK (1 << 4) #define CR0_CMDQEN (1 << 3) @@ -241,6 +245,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_0_CFG_BYPASS 4 #define STRTAB_STE_0_CFG_S1_TRANS 5 #define STRTAB_STE_0_CFG_S2_TRANS 6 +#define STRTAB_STE_0_CFG_NESTED 7 #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4) #define STRTAB_STE_0_S1FMT_LINEAR 0 @@ -261,6 +266,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4) #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6) +#define STRTAB_STE_1_S2FWB (1UL << 25) #define STRTAB_STE_1_S1STALLD (1UL << 27) #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28) @@ -292,6 +298,15 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4) +/* These bits can be controlled by userspace for STRTAB_STE_0_CFG_NESTED */ +#define STRTAB_STE_0_NESTING_ALLOWED \ + cpu_to_le64(STRTAB_STE_0_V | STRTAB_STE_0_CFG | STRTAB_STE_0_S1FMT | \ + STRTAB_STE_0_S1CTXPTR_MASK | STRTAB_STE_0_S1CDMAX) +#define STRTAB_STE_1_NESTING_ALLOWED \ + cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | \ + STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | \ + STRTAB_STE_1_S1STALLD | STRTAB_STE_1_EATS) + /* * Context descriptors. * @@ -511,8 +526,10 @@ struct arm_smmu_cmdq_ent { }; } cfgi; + #define CMDQ_OP_TLBI_NH_ALL 0x10 #define CMDQ_OP_TLBI_NH_ASID 0x11 #define CMDQ_OP_TLBI_NH_VA 0x12 + #define CMDQ_OP_TLBI_NH_VAA 0x13 #define CMDQ_OP_TLBI_EL2_ALL 0x20 #define CMDQ_OP_TLBI_EL2_ASID 0x21 #define CMDQ_OP_TLBI_EL2_VA 0x22 @@ -726,6 +743,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20) #define ARM_SMMU_FEAT_HA (1 << 21) #define ARM_SMMU_FEAT_HD (1 << 22) +#define ARM_SMMU_FEAT_S2FWB (1 << 23) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -811,10 +829,20 @@ struct arm_smmu_domain { /* List of struct arm_smmu_master_domain */ struct list_head devices; spinlock_t devices_lock; + bool enforce_cache_coherency : 1; + bool nest_parent : 1; struct mmu_notifier mmu_notifier; }; +struct arm_smmu_nested_domain { + struct iommu_domain domain; + struct arm_vsmmu *vsmmu; + bool enable_ats : 1; + + __le64 ste[2]; +}; + /* The following are exposed for testing purposes. */ struct arm_smmu_entry_writer_ops; struct arm_smmu_entry_writer { @@ -827,21 +855,22 @@ struct arm_smmu_entry_writer_ops { void (*sync)(struct arm_smmu_entry_writer *writer); }; +void arm_smmu_make_abort_ste(struct arm_smmu_ste *target); +void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, + struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain, + bool ats_enabled); + #if IS_ENABLED(CONFIG_KUNIT) void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits); void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *cur, const __le64 *target); void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits); -void arm_smmu_make_abort_ste(struct arm_smmu_ste *target); void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, struct arm_smmu_ste *target); void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, struct arm_smmu_master *master, bool ats_enabled, unsigned int s1dss); -void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain, - bool ats_enabled); void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, struct arm_smmu_master *master, struct mm_struct *mm, u16 asid); @@ -851,6 +880,7 @@ struct arm_smmu_master_domain { struct list_head devices_elm; struct arm_smmu_master *master; ioasid_t ssid; + bool nested_ats_flush : 1; }; static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) @@ -858,6 +888,12 @@ static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) return container_of(dom, struct arm_smmu_domain, domain); } +static inline struct arm_smmu_nested_domain * +to_smmu_nested_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct arm_smmu_nested_domain, domain); +} + extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock; @@ -893,6 +929,33 @@ int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq); +static inline bool arm_smmu_master_canwbs(struct arm_smmu_master *master) +{ + return dev_iommu_fwspec_get(master->dev)->flags & + IOMMU_FWSPEC_PCI_RC_CANWBS; +} + +struct arm_smmu_attach_state { + /* Inputs */ + struct iommu_domain *old_domain; + struct arm_smmu_master *master; + bool cd_needs_ats; + bool disable_ats; + ioasid_t ssid; + /* Resulting state */ + bool ats_enabled; +}; + +int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, + struct iommu_domain *new_domain); +void arm_smmu_attach_commit(struct arm_smmu_attach_state *state); +void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master, + const struct arm_smmu_ste *target); + +int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, u64 *cmds, int n, + bool sync); + #ifdef CONFIG_ARM_SMMU_V3_SVA bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); bool arm_smmu_master_sva_supported(struct arm_smmu_master *master); @@ -949,4 +1012,23 @@ tegra241_cmdqv_probe(struct arm_smmu_device *smmu) return ERR_PTR(-ENODEV); } #endif /* CONFIG_TEGRA241_CMDQV */ + +struct arm_vsmmu { + struct iommufd_viommu core; + struct arm_smmu_device *smmu; + struct arm_smmu_domain *s2_parent; + u16 vmid; +}; + +#if IS_ENABLED(CONFIG_ARM_SMMU_V3_IOMMUFD) +void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type); +struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, + struct iommu_domain *parent, + struct iommufd_ctx *ictx, + unsigned int viommu_type); +#else +#define arm_smmu_hw_info NULL +#define arm_vsmmu_alloc NULL +#endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */ + #endif /* _ARM_SMMU_V3_H */ diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 8321962b3714..12b173eec454 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1558,21 +1558,6 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) return group; } -static int arm_smmu_enable_nesting(struct iommu_domain *domain) -{ - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - int ret = 0; - - mutex_lock(&smmu_domain->init_mutex); - if (smmu_domain->smmu) - ret = -EPERM; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; - mutex_unlock(&smmu_domain->init_mutex); - - return ret; -} - static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks) { @@ -1656,7 +1641,6 @@ static struct iommu_ops arm_smmu_ops = { .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, - .enable_nesting = arm_smmu_enable_nesting, .set_pgtable_quirks = arm_smmu_set_pgtable_quirks, .free = arm_smmu_domain_free, } |