1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
|
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
*/
#include <uapi/linux/iommufd.h>
#include "arm-smmu-v3.h"
void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct iommu_hw_info_arm_smmuv3 *info;
u32 __iomem *base_idr;
unsigned int i;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return ERR_PTR(-ENOMEM);
base_idr = master->smmu->base + ARM_SMMU_IDR0;
for (i = 0; i <= 5; i++)
info->idr[i] = readl_relaxed(base_idr + i);
info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR);
info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR);
*length = sizeof(*info);
*type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3;
return info;
}
static void arm_smmu_make_nested_cd_table_ste(
struct arm_smmu_ste *target, struct arm_smmu_master *master,
struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
{
arm_smmu_make_s2_domain_ste(
target, master, nested_domain->vsmmu->s2_parent, ats_enabled);
target->data[0] = cpu_to_le64(STRTAB_STE_0_V |
FIELD_PREP(STRTAB_STE_0_CFG,
STRTAB_STE_0_CFG_NESTED));
target->data[0] |= nested_domain->ste[0] &
~cpu_to_le64(STRTAB_STE_0_CFG);
target->data[1] |= nested_domain->ste[1];
}
/*
* Create a physical STE from the virtual STE that userspace provided when it
* created the nested domain. Using the vSTE userspace can request:
* - Non-valid STE
* - Abort STE
* - Bypass STE (install the S2, no CD table)
* - CD table STE (install the S2 and the userspace CD table)
*/
static void arm_smmu_make_nested_domain_ste(
struct arm_smmu_ste *target, struct arm_smmu_master *master,
struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
{
unsigned int cfg =
FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0]));
/*
* Userspace can request a non-valid STE through the nesting interface.
* We relay that into an abort physical STE with the intention that
* C_BAD_STE for this SID can be generated to userspace.
*/
if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V)))
cfg = STRTAB_STE_0_CFG_ABORT;
switch (cfg) {
case STRTAB_STE_0_CFG_S1_TRANS:
arm_smmu_make_nested_cd_table_ste(target, master, nested_domain,
ats_enabled);
break;
case STRTAB_STE_0_CFG_BYPASS:
arm_smmu_make_s2_domain_ste(target, master,
nested_domain->vsmmu->s2_parent,
ats_enabled);
break;
case STRTAB_STE_0_CFG_ABORT:
default:
arm_smmu_make_abort_ste(target);
break;
}
}
static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
struct device *dev)
{
struct arm_smmu_nested_domain *nested_domain =
to_smmu_nested_domain(domain);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct arm_smmu_attach_state state = {
.master = master,
.old_domain = iommu_get_domain_for_dev(dev),
.ssid = IOMMU_NO_PASID,
};
struct arm_smmu_ste ste;
int ret;
if (nested_domain->vsmmu->smmu != master->smmu)
return -EINVAL;
if (arm_smmu_ssids_in_use(&master->cd_table))
return -EBUSY;
mutex_lock(&arm_smmu_asid_lock);
/*
* The VM has to control the actual ATS state at the PCI device because
* we forward the invalidations directly from the VM. If the VM doesn't
* think ATS is on it will not generate ATC flushes and the ATC will
* become incoherent. Since we can't access the actual virtual PCI ATS
* config bit here base this off the EATS value in the STE. If the EATS
* is set then the VM must generate ATC flushes.
*/
state.disable_ats = !nested_domain->enable_ats;
ret = arm_smmu_attach_prepare(&state, domain);
if (ret) {
mutex_unlock(&arm_smmu_asid_lock);
return ret;
}
arm_smmu_make_nested_domain_ste(&ste, master, nested_domain,
state.ats_enabled);
arm_smmu_install_ste_for_dev(master, &ste);
arm_smmu_attach_commit(&state);
mutex_unlock(&arm_smmu_asid_lock);
return 0;
}
static void arm_smmu_domain_nested_free(struct iommu_domain *domain)
{
kfree(to_smmu_nested_domain(domain));
}
static const struct iommu_domain_ops arm_smmu_nested_ops = {
.attach_dev = arm_smmu_attach_dev_nested,
.free = arm_smmu_domain_nested_free,
};
static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg,
bool *enable_ats)
{
unsigned int eats;
unsigned int cfg;
if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) {
memset(arg->ste, 0, sizeof(arg->ste));
return 0;
}
/* EIO is reserved for invalid STE data. */
if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) ||
(arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED))
return -EIO;
cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0]));
if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS &&
cfg != STRTAB_STE_0_CFG_S1_TRANS)
return -EIO;
/*
* Only Full ATS or ATS UR is supported
* The EATS field will be set by arm_smmu_make_nested_domain_ste()
*/
eats = FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(arg->ste[1]));
arg->ste[1] &= ~cpu_to_le64(STRTAB_STE_1_EATS);
if (eats != STRTAB_STE_1_EATS_ABT && eats != STRTAB_STE_1_EATS_TRANS)
return -EIO;
if (cfg == STRTAB_STE_0_CFG_S1_TRANS)
*enable_ats = (eats == STRTAB_STE_1_EATS_TRANS);
return 0;
}
static struct iommu_domain *
arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
const struct iommu_user_data *user_data)
{
struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
const u32 SUPPORTED_FLAGS = IOMMU_HWPT_FAULT_ID_VALID;
struct arm_smmu_nested_domain *nested_domain;
struct iommu_hwpt_arm_smmuv3 arg;
bool enable_ats = false;
int ret;
/*
* Faults delivered to the nested domain are faults that originated by
* the S1 in the domain. The core code will match all PASIDs when
* delivering the fault due to user_pasid_table
*/
if (flags & ~SUPPORTED_FLAGS)
return ERR_PTR(-EOPNOTSUPP);
ret = iommu_copy_struct_from_user(&arg, user_data,
IOMMU_HWPT_DATA_ARM_SMMUV3, ste);
if (ret)
return ERR_PTR(ret);
ret = arm_smmu_validate_vste(&arg, &enable_ats);
if (ret)
return ERR_PTR(ret);
nested_domain = kzalloc(sizeof(*nested_domain), GFP_KERNEL_ACCOUNT);
if (!nested_domain)
return ERR_PTR(-ENOMEM);
nested_domain->domain.type = IOMMU_DOMAIN_NESTED;
nested_domain->domain.ops = &arm_smmu_nested_ops;
nested_domain->enable_ats = enable_ats;
nested_domain->vsmmu = vsmmu;
nested_domain->ste[0] = arg.ste[0];
nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS);
return &nested_domain->domain;
}
static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid)
{
struct arm_smmu_master *master;
struct device *dev;
int ret = 0;
xa_lock(&vsmmu->core.vdevs);
dev = iommufd_viommu_find_dev(&vsmmu->core, (unsigned long)vsid);
if (!dev) {
ret = -EIO;
goto unlock;
}
master = dev_iommu_priv_get(dev);
/* At this moment, iommufd only supports PCI device that has one SID */
if (sid)
*sid = master->streams[0].id;
unlock:
xa_unlock(&vsmmu->core.vdevs);
return ret;
}
/* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */
struct arm_vsmmu_invalidation_cmd {
union {
u64 cmd[2];
struct iommu_viommu_arm_smmuv3_invalidate ucmd;
};
};
/*
* Convert, in place, the raw invalidation command into an internal format that
* can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are
* stored in CPU endian.
*
* Enforce the VMID or SID on the command.
*/
static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
struct arm_vsmmu_invalidation_cmd *cmd)
{
/* Commands are le64 stored in u64 */
cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]);
switch (cmd->cmd[0] & CMDQ_0_OP) {
case CMDQ_OP_TLBI_NSNH_ALL:
/* Convert to NH_ALL */
cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL |
FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
cmd->cmd[1] = 0;
break;
case CMDQ_OP_TLBI_NH_VA:
case CMDQ_OP_TLBI_NH_VAA:
case CMDQ_OP_TLBI_NH_ALL:
case CMDQ_OP_TLBI_NH_ASID:
cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID;
cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
break;
case CMDQ_OP_ATC_INV:
case CMDQ_OP_CFGI_CD:
case CMDQ_OP_CFGI_CD_ALL: {
u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]);
if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid))
return -EIO;
cmd->cmd[0] &= ~CMDQ_CFGI_0_SID;
cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
break;
}
default:
return -EIO;
}
return 0;
}
static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
struct iommu_user_data_array *array)
{
struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
struct arm_smmu_device *smmu = vsmmu->smmu;
struct arm_vsmmu_invalidation_cmd *last;
struct arm_vsmmu_invalidation_cmd *cmds;
struct arm_vsmmu_invalidation_cmd *cur;
struct arm_vsmmu_invalidation_cmd *end;
int ret;
cmds = kcalloc(array->entry_num, sizeof(*cmds), GFP_KERNEL);
if (!cmds)
return -ENOMEM;
cur = cmds;
end = cmds + array->entry_num;
static_assert(sizeof(*cmds) == 2 * sizeof(u64));
ret = iommu_copy_struct_from_full_user_array(
cmds, sizeof(*cmds), array,
IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3);
if (ret)
goto out;
last = cmds;
while (cur != end) {
ret = arm_vsmmu_convert_user_cmd(vsmmu, cur);
if (ret)
goto out;
/* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */
cur++;
if (cur != end && (cur - last) != CMDQ_BATCH_ENTRIES - 1)
continue;
/* FIXME always uses the main cmdq rather than trying to group by type */
ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd,
cur - last, true);
if (ret) {
cur--;
goto out;
}
last = cur;
}
out:
array->entry_num = cur - cmds;
kfree(cmds);
return ret;
}
static const struct iommufd_viommu_ops arm_vsmmu_ops = {
.alloc_domain_nested = arm_vsmmu_alloc_domain_nested,
.cache_invalidate = arm_vsmmu_cache_invalidate,
};
struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
struct iommu_domain *parent,
struct iommufd_ctx *ictx,
unsigned int viommu_type)
{
struct arm_smmu_device *smmu =
iommu_get_iommu_dev(dev, struct arm_smmu_device, iommu);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct arm_smmu_domain *s2_parent = to_smmu_domain(parent);
struct arm_vsmmu *vsmmu;
if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
return ERR_PTR(-EOPNOTSUPP);
if (!(smmu->features & ARM_SMMU_FEAT_NESTING))
return ERR_PTR(-EOPNOTSUPP);
if (s2_parent->smmu != master->smmu)
return ERR_PTR(-EINVAL);
/*
* FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW
* defect is needed to determine if arm_vsmmu_cache_invalidate() needs
* any change to remove this.
*/
if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC))
return ERR_PTR(-EOPNOTSUPP);
/*
* Must support some way to prevent the VM from bypassing the cache
* because VFIO currently does not do any cache maintenance. canwbs
* indicates the device is fully coherent and no cache maintenance is
* ever required, even for PCI No-Snoop. S2FWB means the S1 can't make
* things non-coherent using the memattr, but No-Snoop behavior is not
* effected.
*/
if (!arm_smmu_master_canwbs(master) &&
!(smmu->features & ARM_SMMU_FEAT_S2FWB))
return ERR_PTR(-EOPNOTSUPP);
vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core,
&arm_vsmmu_ops);
if (IS_ERR(vsmmu))
return ERR_CAST(vsmmu);
vsmmu->smmu = smmu;
vsmmu->s2_parent = s2_parent;
/* FIXME Move VMID allocation from the S2 domain allocation to here */
vsmmu->vmid = s2_parent->s2_cfg.vmid;
return &vsmmu->core;
}
MODULE_IMPORT_NS(IOMMUFD);
|