1 // SPDX-License-Identifier: GPL-2.0
3 * IOMMU API for ARM architected SMMUv3 implementations.
5 * Copyright (C) 2015 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver is powered by bad coffee and bombay mix.
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
30 #include "arm-smmu-v3.h"
31 #include "../../dma-iommu.h"
33 static bool disable_bypass = true;
34 module_param(disable_bypass, bool, 0444);
35 MODULE_PARM_DESC(disable_bypass,
36 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
38 static bool disable_msipolling;
39 module_param(disable_msipolling, bool, 0444);
40 MODULE_PARM_DESC(disable_msipolling,
41 "Disable MSI-based polling for CMD_SYNC completion.");
43 enum arm_smmu_msi_index {
50 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
52 ARM_SMMU_EVTQ_IRQ_CFG0,
53 ARM_SMMU_EVTQ_IRQ_CFG1,
54 ARM_SMMU_EVTQ_IRQ_CFG2,
56 [GERROR_MSI_INDEX] = {
57 ARM_SMMU_GERROR_IRQ_CFG0,
58 ARM_SMMU_GERROR_IRQ_CFG1,
59 ARM_SMMU_GERROR_IRQ_CFG2,
62 ARM_SMMU_PRIQ_IRQ_CFG0,
63 ARM_SMMU_PRIQ_IRQ_CFG1,
64 ARM_SMMU_PRIQ_IRQ_CFG2,
68 struct arm_smmu_option_prop {
73 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
74 DEFINE_MUTEX(arm_smmu_asid_lock);
77 * Special value used by SVA when a process dies, to quiesce a CD without
80 struct arm_smmu_ctx_desc quiet_cd = { 0 };
82 static struct arm_smmu_option_prop arm_smmu_options[] = {
83 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
84 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 static void parse_driver_options(struct arm_smmu_device *smmu)
93 if (of_property_read_bool(smmu->dev->of_node,
94 arm_smmu_options[i].prop)) {
95 smmu->options |= arm_smmu_options[i].opt;
96 dev_notice(smmu->dev, "option %s\n",
97 arm_smmu_options[i].prop);
99 } while (arm_smmu_options[++i].opt);
102 /* Low-level queue manipulation functions */
103 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
105 u32 space, prod, cons;
107 prod = Q_IDX(q, q->prod);
108 cons = Q_IDX(q, q->cons);
110 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
111 space = (1 << q->max_n_shift) - (prod - cons);
118 static bool queue_full(struct arm_smmu_ll_queue *q)
120 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
121 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
124 static bool queue_empty(struct arm_smmu_ll_queue *q)
126 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
127 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
130 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
132 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
133 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
134 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
135 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
138 static void queue_sync_cons_out(struct arm_smmu_queue *q)
141 * Ensure that all CPU accesses (reads and writes) to the queue
142 * are complete before we update the cons pointer.
145 writel_relaxed(q->llq.cons, q->cons_reg);
148 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
150 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
151 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
154 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
156 struct arm_smmu_ll_queue *llq = &q->llq;
158 if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
161 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
162 Q_IDX(llq, llq->cons);
163 queue_sync_cons_out(q);
166 static int queue_sync_prod_in(struct arm_smmu_queue *q)
172 * We can't use the _relaxed() variant here, as we must prevent
173 * speculative reads of the queue before we have determined that
174 * prod has indeed moved.
176 prod = readl(q->prod_reg);
178 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
185 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
187 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
188 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
191 static void queue_poll_init(struct arm_smmu_device *smmu,
192 struct arm_smmu_queue_poll *qp)
196 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
197 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
200 static int queue_poll(struct arm_smmu_queue_poll *qp)
202 if (ktime_compare(ktime_get(), qp->timeout) > 0)
207 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
218 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
222 for (i = 0; i < n_dwords; ++i)
223 *dst++ = cpu_to_le64(*src++);
226 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
230 for (i = 0; i < n_dwords; ++i)
231 *dst++ = le64_to_cpu(*src++);
234 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
236 if (queue_empty(&q->llq))
239 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
240 queue_inc_cons(&q->llq);
241 queue_sync_cons_out(q);
245 /* High-level queue accessors */
246 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
248 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
249 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
251 switch (ent->opcode) {
252 case CMDQ_OP_TLBI_EL2_ALL:
253 case CMDQ_OP_TLBI_NSNH_ALL:
255 case CMDQ_OP_PREFETCH_CFG:
256 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
258 case CMDQ_OP_CFGI_CD:
259 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261 case CMDQ_OP_CFGI_STE:
262 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
263 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265 case CMDQ_OP_CFGI_CD_ALL:
266 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268 case CMDQ_OP_CFGI_ALL:
269 /* Cover the entire SID range */
270 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272 case CMDQ_OP_TLBI_NH_VA:
273 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
275 case CMDQ_OP_TLBI_EL2_VA:
276 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
279 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
284 case CMDQ_OP_TLBI_S2_IPA:
285 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
286 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
287 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
288 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
289 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
290 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
291 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
293 case CMDQ_OP_TLBI_NH_ASID:
294 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
296 case CMDQ_OP_TLBI_S12_VMALL:
297 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
299 case CMDQ_OP_TLBI_EL2_ASID:
300 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
302 case CMDQ_OP_ATC_INV:
303 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
304 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
305 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
306 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
307 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
308 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
310 case CMDQ_OP_PRI_RESP:
311 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
312 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
313 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
314 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
315 switch (ent->pri.resp) {
323 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
326 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
327 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
328 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
330 case CMDQ_OP_CMD_SYNC:
331 if (ent->sync.msiaddr) {
332 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
333 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
335 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
337 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
338 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
347 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
352 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
353 struct arm_smmu_queue *q, u32 prod)
355 struct arm_smmu_cmdq_ent ent = {
356 .opcode = CMDQ_OP_CMD_SYNC,
360 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
361 * payload, so the write will zero the entire command on that platform.
363 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
364 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
368 arm_smmu_cmdq_build_cmd(cmd, &ent);
371 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
372 struct arm_smmu_queue *q)
374 static const char * const cerror_str[] = {
375 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
376 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
377 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
378 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
382 u64 cmd[CMDQ_ENT_DWORDS];
383 u32 cons = readl_relaxed(q->cons_reg);
384 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
385 struct arm_smmu_cmdq_ent cmd_sync = {
386 .opcode = CMDQ_OP_CMD_SYNC,
389 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
390 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
393 case CMDQ_ERR_CERROR_ABT_IDX:
394 dev_err(smmu->dev, "retrying command fetch\n");
396 case CMDQ_ERR_CERROR_NONE_IDX:
398 case CMDQ_ERR_CERROR_ATC_INV_IDX:
400 * ATC Invalidation Completion timeout. CONS is still pointing
401 * at the CMD_SYNC. Attempt to complete other pending commands
402 * by repeating the CMD_SYNC, though we might well end up back
403 * here since the ATC invalidation may still be pending.
406 case CMDQ_ERR_CERROR_ILL_IDX:
412 * We may have concurrent producers, so we need to be careful
413 * not to touch any of the shadow cmdq state.
415 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
416 dev_err(smmu->dev, "skipping command in error state:\n");
417 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
418 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
420 /* Convert the erroneous command into a CMD_SYNC */
421 arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
423 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
426 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
428 __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
432 * Command queue locking.
433 * This is a form of bastardised rwlock with the following major changes:
435 * - The only LOCK routines are exclusive_trylock() and shared_lock().
436 * Neither have barrier semantics, and instead provide only a control
439 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
440 * fails if the caller appears to be the last lock holder (yes, this is
441 * racy). All successful UNLOCK routines have RELEASE semantics.
443 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
448 * We can try to avoid the cmpxchg() loop by simply incrementing the
449 * lock counter. When held in exclusive state, the lock counter is set
450 * to INT_MIN so these increments won't hurt as the value will remain
453 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
457 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
458 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
461 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
463 (void)atomic_dec_return_release(&cmdq->lock);
466 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
468 if (atomic_read(&cmdq->lock) == 1)
471 arm_smmu_cmdq_shared_unlock(cmdq);
475 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
478 local_irq_save(flags); \
479 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
481 local_irq_restore(flags); \
485 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
487 atomic_set_release(&cmdq->lock, 0); \
488 local_irq_restore(flags); \
493 * Command queue insertion.
494 * This is made fiddly by our attempts to achieve some sort of scalability
495 * since there is one queue shared amongst all of the CPUs in the system. If
496 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
497 * then you'll *love* this monstrosity.
499 * The basic idea is to split the queue up into ranges of commands that are
500 * owned by a given CPU; the owner may not have written all of the commands
501 * itself, but is responsible for advancing the hardware prod pointer when
502 * the time comes. The algorithm is roughly:
504 * 1. Allocate some space in the queue. At this point we also discover
505 * whether the head of the queue is currently owned by another CPU,
506 * or whether we are the owner.
508 * 2. Write our commands into our allocated slots in the queue.
510 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
512 * 4. If we are an owner:
513 * a. Wait for the previous owner to finish.
514 * b. Mark the queue head as unowned, which tells us the range
515 * that we are responsible for publishing.
516 * c. Wait for all commands in our owned range to become valid.
517 * d. Advance the hardware prod pointer.
518 * e. Tell the next owner we've finished.
520 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
521 * owner), then we need to stick around until it has completed:
522 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
523 * to clear the first 4 bytes.
524 * b. Otherwise, we spin waiting for the hardware cons pointer to
525 * advance past our command.
527 * The devil is in the details, particularly the use of locking for handling
528 * SYNC completion and freeing up space in the queue before we think that it is
531 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
532 u32 sprod, u32 eprod, bool set)
534 u32 swidx, sbidx, ewidx, ebidx;
535 struct arm_smmu_ll_queue llq = {
536 .max_n_shift = cmdq->q.llq.max_n_shift,
540 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
541 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
543 while (llq.prod != eprod) {
546 u32 limit = BITS_PER_LONG;
548 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
549 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
551 ptr = &cmdq->valid_map[swidx];
553 if ((swidx == ewidx) && (sbidx < ebidx))
556 mask = GENMASK(limit - 1, sbidx);
559 * The valid bit is the inverse of the wrap bit. This means
560 * that a zero-initialised queue is invalid and, after marking
561 * all entries as valid, they become invalid again when we
565 atomic_long_xor(mask, ptr);
569 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
570 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
573 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
577 /* Mark all entries in the range [sprod, eprod) as valid */
578 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
579 u32 sprod, u32 eprod)
581 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
584 /* Wait for all entries in the range [sprod, eprod) to become valid */
585 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
586 u32 sprod, u32 eprod)
588 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
591 /* Wait for the command queue to become non-full */
592 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
593 struct arm_smmu_ll_queue *llq)
596 struct arm_smmu_queue_poll qp;
597 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
601 * Try to update our copy of cons by grabbing exclusive cmdq access. If
602 * that fails, spin until somebody else updates it for us.
604 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
605 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
606 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
607 llq->val = READ_ONCE(cmdq->q.llq.val);
611 queue_poll_init(smmu, &qp);
613 llq->val = READ_ONCE(cmdq->q.llq.val);
614 if (!queue_full(llq))
617 ret = queue_poll(&qp);
624 * Wait until the SMMU signals a CMD_SYNC completion MSI.
625 * Must be called with the cmdq lock held in some capacity.
627 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
628 struct arm_smmu_ll_queue *llq)
631 struct arm_smmu_queue_poll qp;
632 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
633 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
635 queue_poll_init(smmu, &qp);
638 * The MSI won't generate an event, since it's being written back
639 * into the command queue.
642 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
643 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
648 * Wait until the SMMU cons index passes llq->prod.
649 * Must be called with the cmdq lock held in some capacity.
651 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
652 struct arm_smmu_ll_queue *llq)
654 struct arm_smmu_queue_poll qp;
655 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
656 u32 prod = llq->prod;
659 queue_poll_init(smmu, &qp);
660 llq->val = READ_ONCE(cmdq->q.llq.val);
662 if (queue_consumed(llq, prod))
665 ret = queue_poll(&qp);
668 * This needs to be a readl() so that our subsequent call
669 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
671 * Specifically, we need to ensure that we observe all
672 * shared_lock()s by other CMD_SYNCs that share our owner,
673 * so that a failing call to tryunlock() means that we're
674 * the last one out and therefore we can safely advance
675 * cmdq->q.llq.cons. Roughly speaking:
677 * CPU 0 CPU1 CPU2 (us)
687 * <control dependency>
693 * Requires us to see CPU 0's shared_lock() acquisition.
695 llq->cons = readl(cmdq->q.cons_reg);
701 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
702 struct arm_smmu_ll_queue *llq)
704 if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
705 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
707 return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
710 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
714 struct arm_smmu_ll_queue llq = {
715 .max_n_shift = cmdq->q.llq.max_n_shift,
719 for (i = 0; i < n; ++i) {
720 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
722 prod = queue_inc_prod_n(&llq, i);
723 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
728 * This is the actual insertion function, and provides the following
729 * ordering guarantees to callers:
731 * - There is a dma_wmb() before publishing any commands to the queue.
732 * This can be relied upon to order prior writes to data structures
733 * in memory (such as a CD or an STE) before the command.
735 * - On completion of a CMD_SYNC, there is a control dependency.
736 * This can be relied upon to order subsequent writes to memory (e.g.
737 * freeing an IOVA) after completion of the CMD_SYNC.
739 * - Command insertion is totally ordered, so if two CPUs each race to
740 * insert their own list of commands then all of the commands from one
741 * CPU will appear before any of the commands from the other CPU.
743 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
744 u64 *cmds, int n, bool sync)
746 u64 cmd_sync[CMDQ_ENT_DWORDS];
750 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
751 struct arm_smmu_ll_queue llq, head;
754 llq.max_n_shift = cmdq->q.llq.max_n_shift;
756 /* 1. Allocate some space in the queue */
757 local_irq_save(flags);
758 llq.val = READ_ONCE(cmdq->q.llq.val);
762 while (!queue_has_space(&llq, n + sync)) {
763 local_irq_restore(flags);
764 if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
765 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
766 local_irq_save(flags);
769 head.cons = llq.cons;
770 head.prod = queue_inc_prod_n(&llq, n + sync) |
771 CMDQ_PROD_OWNED_FLAG;
773 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
779 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
780 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
781 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
784 * 2. Write our commands into the queue
785 * Dependency ordering from the cmpxchg() loop above.
787 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
789 prod = queue_inc_prod_n(&llq, n);
790 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
791 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
794 * In order to determine completion of our CMD_SYNC, we must
795 * ensure that the queue can't wrap twice without us noticing.
796 * We achieve that by taking the cmdq lock as shared before
797 * marking our slot as valid.
799 arm_smmu_cmdq_shared_lock(cmdq);
802 /* 3. Mark our slots as valid, ensuring commands are visible first */
804 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
806 /* 4. If we are the owner, take control of the SMMU hardware */
808 /* a. Wait for previous owner to finish */
809 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
811 /* b. Stop gathering work by clearing the owned flag */
812 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
813 &cmdq->q.llq.atomic.prod);
814 prod &= ~CMDQ_PROD_OWNED_FLAG;
817 * c. Wait for any gathered work to be written to the queue.
818 * Note that we read our own entries so that we have the control
819 * dependency required by (d).
821 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
824 * d. Advance the hardware prod pointer
825 * Control dependency ordering from the entries becoming valid.
827 writel_relaxed(prod, cmdq->q.prod_reg);
830 * e. Tell the next owner we're done
831 * Make sure we've updated the hardware first, so that we don't
832 * race to update prod and potentially move it backwards.
834 atomic_set_release(&cmdq->owner_prod, prod);
837 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
839 llq.prod = queue_inc_prod_n(&llq, n);
840 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
842 dev_err_ratelimited(smmu->dev,
843 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
845 readl_relaxed(cmdq->q.prod_reg),
846 readl_relaxed(cmdq->q.cons_reg));
850 * Try to unlock the cmdq lock. This will fail if we're the last
851 * reader, in which case we can safely update cmdq->q.llq.cons
853 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
854 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
855 arm_smmu_cmdq_shared_unlock(cmdq);
859 local_irq_restore(flags);
863 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
864 struct arm_smmu_cmdq_ent *ent,
867 u64 cmd[CMDQ_ENT_DWORDS];
869 if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
870 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
875 return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
878 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
879 struct arm_smmu_cmdq_ent *ent)
881 return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
884 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
885 struct arm_smmu_cmdq_ent *ent)
887 return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
890 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
891 struct arm_smmu_cmdq_batch *cmds,
892 struct arm_smmu_cmdq_ent *cmd)
896 if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
897 (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
898 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
902 if (cmds->num == CMDQ_BATCH_ENTRIES) {
903 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
907 index = cmds->num * CMDQ_ENT_DWORDS;
908 if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
909 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
917 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
918 struct arm_smmu_cmdq_batch *cmds)
920 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
923 static int arm_smmu_page_response(struct device *dev,
924 struct iopf_fault *unused,
925 struct iommu_page_response *resp)
927 struct arm_smmu_cmdq_ent cmd = {0};
928 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
929 int sid = master->streams[0].id;
931 if (master->stall_enabled) {
932 cmd.opcode = CMDQ_OP_RESUME;
933 cmd.resume.sid = sid;
934 cmd.resume.stag = resp->grpid;
935 switch (resp->code) {
936 case IOMMU_PAGE_RESP_INVALID:
937 case IOMMU_PAGE_RESP_FAILURE:
938 cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
940 case IOMMU_PAGE_RESP_SUCCESS:
941 cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
950 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
952 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
953 * RESUME consumption guarantees that the stalled transaction will be
954 * terminated... at some point in the future. PRI_RESP is fire and
961 /* Context descriptor manipulation functions */
962 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
964 struct arm_smmu_cmdq_ent cmd = {
965 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
966 CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
970 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
973 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
977 struct arm_smmu_cmdq_batch cmds;
978 struct arm_smmu_device *smmu = master->smmu;
979 struct arm_smmu_cmdq_ent cmd = {
980 .opcode = CMDQ_OP_CFGI_CD,
988 for (i = 0; i < master->num_streams; i++) {
989 cmd.cfgi.sid = master->streams[i].id;
990 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
993 arm_smmu_cmdq_batch_submit(smmu, &cmds);
996 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
997 struct arm_smmu_l1_ctx_desc *l1_desc)
999 size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1001 l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1002 &l1_desc->l2ptr_dma, GFP_KERNEL);
1003 if (!l1_desc->l2ptr) {
1005 "failed to allocate context descriptor table\n");
1011 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1012 struct arm_smmu_l1_ctx_desc *l1_desc)
1014 u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1017 /* See comment in arm_smmu_write_ctx_desc() */
1018 WRITE_ONCE(*dst, cpu_to_le64(val));
1021 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
1025 struct arm_smmu_l1_ctx_desc *l1_desc;
1026 struct arm_smmu_device *smmu = master->smmu;
1027 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1029 if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1030 return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
1032 idx = ssid >> CTXDESC_SPLIT;
1033 l1_desc = &cd_table->l1_desc[idx];
1034 if (!l1_desc->l2ptr) {
1035 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1038 l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1039 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1040 /* An invalid L1CD can be cached */
1041 arm_smmu_sync_cd(master, ssid, false);
1043 idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1044 return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1047 int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
1048 struct arm_smmu_ctx_desc *cd)
1051 * This function handles the following cases:
1053 * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
1054 * (2) Install a secondary CD, for SID+SSID traffic.
1055 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1056 * CD, then invalidate the old entry and mappings.
1057 * (4) Quiesce the context without clearing the valid bit. Disable
1058 * translation, and ignore any translation fault.
1059 * (5) Remove a secondary CD.
1064 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1065 struct arm_smmu_device *smmu = master->smmu;
1067 if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
1070 cdptr = arm_smmu_get_cd_ptr(master, ssid);
1074 val = le64_to_cpu(cdptr[0]);
1075 cd_live = !!(val & CTXDESC_CD_0_V);
1077 if (!cd) { /* (5) */
1079 } else if (cd == &quiet_cd) { /* (4) */
1080 if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1081 val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R);
1082 val |= CTXDESC_CD_0_TCR_EPD0;
1083 } else if (cd_live) { /* (3) */
1084 val &= ~CTXDESC_CD_0_ASID;
1085 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1087 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1088 * this substream's traffic
1090 } else { /* (1) and (2) */
1091 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1093 cdptr[3] = cpu_to_le64(cd->mair);
1096 * STE may be live, and the SMMU might read dwords of this CD in any
1097 * order. Ensure that it observes valid values before reading
1100 arm_smmu_sync_cd(master, ssid, true);
1106 CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1107 (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1109 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1112 if (cd_table->stall_enabled)
1113 val |= CTXDESC_CD_0_S;
1117 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1118 * "Configuration structures and configuration invalidation completion"
1120 * The size of single-copy atomic reads made by the SMMU is
1121 * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1122 * field within an aligned 64-bit span of a structure can be altered
1123 * without first making the structure invalid.
1125 WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1126 arm_smmu_sync_cd(master, ssid, true);
1130 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1134 size_t max_contexts;
1135 struct arm_smmu_device *smmu = master->smmu;
1136 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1138 cd_table->stall_enabled = master->stall_enabled;
1139 cd_table->s1cdmax = master->ssid_bits;
1140 max_contexts = 1 << cd_table->s1cdmax;
1142 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1143 max_contexts <= CTXDESC_L2_ENTRIES) {
1144 cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1145 cd_table->num_l1_ents = max_contexts;
1147 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1149 cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1150 cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
1151 CTXDESC_L2_ENTRIES);
1153 cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
1154 sizeof(*cd_table->l1_desc),
1156 if (!cd_table->l1_desc)
1159 l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1162 cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
1164 if (!cd_table->cdtab) {
1165 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1173 if (cd_table->l1_desc) {
1174 devm_kfree(smmu->dev, cd_table->l1_desc);
1175 cd_table->l1_desc = NULL;
1180 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1183 size_t size, l1size;
1184 struct arm_smmu_device *smmu = master->smmu;
1185 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1187 if (cd_table->l1_desc) {
1188 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1190 for (i = 0; i < cd_table->num_l1_ents; i++) {
1191 if (!cd_table->l1_desc[i].l2ptr)
1194 dmam_free_coherent(smmu->dev, size,
1195 cd_table->l1_desc[i].l2ptr,
1196 cd_table->l1_desc[i].l2ptr_dma);
1198 devm_kfree(smmu->dev, cd_table->l1_desc);
1199 cd_table->l1_desc = NULL;
1201 l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1203 l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1206 dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
1207 cd_table->cdtab_dma = 0;
1208 cd_table->cdtab = NULL;
1211 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1214 struct arm_smmu_ctx_desc *old_cd;
1219 free = refcount_dec_and_test(&cd->refs);
1221 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1222 WARN_ON(old_cd != cd);
1227 /* Stream table manipulation functions */
1229 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1233 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1234 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1236 /* See comment in arm_smmu_write_ctx_desc() */
1237 WRITE_ONCE(*dst, cpu_to_le64(val));
1240 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1242 struct arm_smmu_cmdq_ent cmd = {
1243 .opcode = CMDQ_OP_CFGI_STE,
1250 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1253 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1254 struct arm_smmu_ste *dst)
1257 * This is hideously complicated, but we only really care about
1258 * three cases at the moment:
1260 * 1. Invalid (all zero) -> bypass/fault (init)
1261 * 2. Bypass/fault -> translation/bypass (attach)
1262 * 3. Translation/bypass -> bypass/fault (detach)
1264 * Given that we can't update the STE atomically and the SMMU
1265 * doesn't read the thing in a defined order, that leaves us
1266 * with the following maintenance requirements:
1268 * 1. Update Config, return (init time STEs aren't live)
1269 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1270 * 3. Update Config, sync
1272 u64 val = le64_to_cpu(dst->data[0]);
1273 bool ste_live = false;
1274 struct arm_smmu_device *smmu = master->smmu;
1275 struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
1276 struct arm_smmu_s2_cfg *s2_cfg = NULL;
1277 struct arm_smmu_domain *smmu_domain = master->domain;
1278 struct arm_smmu_cmdq_ent prefetch_cmd = {
1279 .opcode = CMDQ_OP_PREFETCH_CFG,
1286 switch (smmu_domain->stage) {
1287 case ARM_SMMU_DOMAIN_S1:
1288 cd_table = &master->cd_table;
1290 case ARM_SMMU_DOMAIN_S2:
1291 s2_cfg = &smmu_domain->s2_cfg;
1298 if (val & STRTAB_STE_0_V) {
1299 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1300 case STRTAB_STE_0_CFG_BYPASS:
1302 case STRTAB_STE_0_CFG_S1_TRANS:
1303 case STRTAB_STE_0_CFG_S2_TRANS:
1306 case STRTAB_STE_0_CFG_ABORT:
1307 BUG_ON(!disable_bypass);
1310 BUG(); /* STE corruption */
1314 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1315 val = STRTAB_STE_0_V;
1318 if (!smmu_domain || !(cd_table || s2_cfg)) {
1319 if (!smmu_domain && disable_bypass)
1320 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1322 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1324 dst->data[0] = cpu_to_le64(val);
1325 dst->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1326 STRTAB_STE_1_SHCFG_INCOMING));
1327 dst->data[2] = 0; /* Nuke the VMID */
1329 * The SMMU can perform negative caching, so we must sync
1330 * the STE regardless of whether the old value was live.
1333 arm_smmu_sync_ste_for_sid(smmu, sid);
1338 u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1339 STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1342 dst->data[1] = cpu_to_le64(
1343 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1344 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1345 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1346 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1347 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1349 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1350 !master->stall_enabled)
1351 dst->data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1353 val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1354 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1355 FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
1356 FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
1361 dst->data[2] = cpu_to_le64(
1362 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1363 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1365 STRTAB_STE_2_S2ENDI |
1367 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1370 dst->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1372 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1375 if (master->ats_enabled)
1376 dst->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1377 STRTAB_STE_1_EATS_TRANS));
1379 arm_smmu_sync_ste_for_sid(smmu, sid);
1380 /* See comment in arm_smmu_write_ctx_desc() */
1381 WRITE_ONCE(dst->data[0], cpu_to_le64(val));
1382 arm_smmu_sync_ste_for_sid(smmu, sid);
1384 /* It's likely that we'll want to use the new STE soon */
1385 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1386 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1389 static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab,
1390 unsigned int nent, bool force)
1393 u64 val = STRTAB_STE_0_V;
1395 if (disable_bypass && !force)
1396 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1398 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1400 for (i = 0; i < nent; ++i) {
1401 strtab->data[0] = cpu_to_le64(val);
1402 strtab->data[1] = cpu_to_le64(FIELD_PREP(
1403 STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
1404 strtab->data[2] = 0;
1409 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1413 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1414 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1419 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1420 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1422 desc->span = STRTAB_SPLIT + 1;
1423 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1427 "failed to allocate l2 stream table for SID %u\n",
1432 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1433 arm_smmu_write_strtab_l1_desc(strtab, desc);
1437 static struct arm_smmu_master *
1438 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1440 struct rb_node *node;
1441 struct arm_smmu_stream *stream;
1443 lockdep_assert_held(&smmu->streams_mutex);
1445 node = smmu->streams.rb_node;
1447 stream = rb_entry(node, struct arm_smmu_stream, node);
1448 if (stream->id < sid)
1449 node = node->rb_right;
1450 else if (stream->id > sid)
1451 node = node->rb_left;
1453 return stream->master;
1459 /* IRQ and event handlers */
1460 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1464 struct arm_smmu_master *master;
1465 bool ssid_valid = evt[0] & EVTQ_0_SSV;
1466 u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1467 struct iopf_fault fault_evt = { };
1468 struct iommu_fault *flt = &fault_evt.fault;
1470 switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1471 case EVT_ID_TRANSLATION_FAULT:
1472 case EVT_ID_ADDR_SIZE_FAULT:
1473 case EVT_ID_ACCESS_FAULT:
1474 case EVT_ID_PERMISSION_FAULT:
1480 /* Stage-2 is always pinned at the moment */
1481 if (evt[1] & EVTQ_1_S2)
1484 if (!(evt[1] & EVTQ_1_STALL))
1487 if (evt[1] & EVTQ_1_RnW)
1488 perm |= IOMMU_FAULT_PERM_READ;
1490 perm |= IOMMU_FAULT_PERM_WRITE;
1492 if (evt[1] & EVTQ_1_InD)
1493 perm |= IOMMU_FAULT_PERM_EXEC;
1495 if (evt[1] & EVTQ_1_PnU)
1496 perm |= IOMMU_FAULT_PERM_PRIV;
1498 flt->type = IOMMU_FAULT_PAGE_REQ;
1499 flt->prm = (struct iommu_fault_page_request) {
1500 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1501 .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1503 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1507 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1508 flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1511 mutex_lock(&smmu->streams_mutex);
1512 master = arm_smmu_find_master(smmu, sid);
1518 ret = iommu_report_device_fault(master->dev, &fault_evt);
1519 if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1520 /* Nobody cared, abort the access */
1521 struct iommu_page_response resp = {
1522 .pasid = flt->prm.pasid,
1523 .grpid = flt->prm.grpid,
1524 .code = IOMMU_PAGE_RESP_FAILURE,
1526 arm_smmu_page_response(master->dev, &fault_evt, &resp);
1530 mutex_unlock(&smmu->streams_mutex);
1534 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1537 struct arm_smmu_device *smmu = dev;
1538 struct arm_smmu_queue *q = &smmu->evtq.q;
1539 struct arm_smmu_ll_queue *llq = &q->llq;
1540 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1541 DEFAULT_RATELIMIT_BURST);
1542 u64 evt[EVTQ_ENT_DWORDS];
1545 while (!queue_remove_raw(q, evt)) {
1546 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1548 ret = arm_smmu_handle_evt(smmu, evt);
1549 if (!ret || !__ratelimit(&rs))
1552 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1553 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1554 dev_info(smmu->dev, "\t0x%016llx\n",
1555 (unsigned long long)evt[i]);
1561 * Not much we can do on overflow, so scream and pretend we're
1564 if (queue_sync_prod_in(q) == -EOVERFLOW)
1565 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1566 } while (!queue_empty(llq));
1568 /* Sync our overflow flag, as we believe we're up to speed */
1569 queue_sync_cons_ovf(q);
1573 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1579 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1580 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1581 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1582 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1583 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1585 dev_info(smmu->dev, "unexpected PRI request received:\n");
1587 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1588 sid, ssid, grpid, last ? "L" : "",
1589 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1590 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1591 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1592 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1593 evt[1] & PRIQ_1_ADDR_MASK);
1596 struct arm_smmu_cmdq_ent cmd = {
1597 .opcode = CMDQ_OP_PRI_RESP,
1598 .substream_valid = ssv,
1603 .resp = PRI_RESP_DENY,
1607 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1611 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1613 struct arm_smmu_device *smmu = dev;
1614 struct arm_smmu_queue *q = &smmu->priq.q;
1615 struct arm_smmu_ll_queue *llq = &q->llq;
1616 u64 evt[PRIQ_ENT_DWORDS];
1619 while (!queue_remove_raw(q, evt))
1620 arm_smmu_handle_ppr(smmu, evt);
1622 if (queue_sync_prod_in(q) == -EOVERFLOW)
1623 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1624 } while (!queue_empty(llq));
1626 /* Sync our overflow flag, as we believe we're up to speed */
1627 queue_sync_cons_ovf(q);
1631 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1633 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1635 u32 gerror, gerrorn, active;
1636 struct arm_smmu_device *smmu = dev;
1638 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1639 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1641 active = gerror ^ gerrorn;
1642 if (!(active & GERROR_ERR_MASK))
1643 return IRQ_NONE; /* No errors pending */
1646 "unexpected global error reported (0x%08x), this could be serious\n",
1649 if (active & GERROR_SFM_ERR) {
1650 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1651 arm_smmu_device_disable(smmu);
1654 if (active & GERROR_MSI_GERROR_ABT_ERR)
1655 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1657 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1658 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1660 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1661 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1663 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1664 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1666 if (active & GERROR_PRIQ_ABT_ERR)
1667 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1669 if (active & GERROR_EVTQ_ABT_ERR)
1670 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1672 if (active & GERROR_CMDQ_ERR)
1673 arm_smmu_cmdq_skip_err(smmu);
1675 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1679 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1681 struct arm_smmu_device *smmu = dev;
1683 arm_smmu_evtq_thread(irq, dev);
1684 if (smmu->features & ARM_SMMU_FEAT_PRI)
1685 arm_smmu_priq_thread(irq, dev);
1690 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1692 arm_smmu_gerror_handler(irq, dev);
1693 return IRQ_WAKE_THREAD;
1697 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1698 struct arm_smmu_cmdq_ent *cmd)
1702 /* ATC invalidates are always on 4096-bytes pages */
1703 size_t inval_grain_shift = 12;
1704 unsigned long page_start, page_end;
1709 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1710 * prefix. In that case all ATC entries within the address range are
1711 * invalidated, including those that were requested with a PASID! There
1712 * is no way to invalidate only entries without PASID.
1714 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1715 * traffic), translation requests without PASID create ATC entries
1716 * without PASID, which must be invalidated with substream_valid clear.
1717 * This has the unpleasant side-effect of invalidating all PASID-tagged
1718 * ATC entries within the address range.
1720 *cmd = (struct arm_smmu_cmdq_ent) {
1721 .opcode = CMDQ_OP_ATC_INV,
1722 .substream_valid = (ssid != IOMMU_NO_PASID),
1727 cmd->atc.size = ATC_INV_SIZE_ALL;
1731 page_start = iova >> inval_grain_shift;
1732 page_end = (iova + size - 1) >> inval_grain_shift;
1735 * In an ATS Invalidate Request, the address must be aligned on the
1736 * range size, which must be a power of two number of page sizes. We
1737 * thus have to choose between grossly over-invalidating the region, or
1738 * splitting the invalidation into multiple commands. For simplicity
1739 * we'll go with the first solution, but should refine it in the future
1740 * if multiple commands are shown to be more efficient.
1742 * Find the smallest power of two that covers the range. The most
1743 * significant differing bit between the start and end addresses,
1744 * fls(start ^ end), indicates the required span. For example:
1746 * We want to invalidate pages [8; 11]. This is already the ideal range:
1747 * x = 0b1000 ^ 0b1011 = 0b11
1748 * span = 1 << fls(x) = 4
1750 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1751 * x = 0b0111 ^ 0b1010 = 0b1101
1752 * span = 1 << fls(x) = 16
1754 log2_span = fls_long(page_start ^ page_end);
1755 span_mask = (1ULL << log2_span) - 1;
1757 page_start &= ~span_mask;
1759 cmd->atc.addr = page_start << inval_grain_shift;
1760 cmd->atc.size = log2_span;
1763 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1766 struct arm_smmu_cmdq_ent cmd;
1767 struct arm_smmu_cmdq_batch cmds;
1769 arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
1772 for (i = 0; i < master->num_streams; i++) {
1773 cmd.atc.sid = master->streams[i].id;
1774 arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1777 return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1780 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1781 unsigned long iova, size_t size)
1784 unsigned long flags;
1785 struct arm_smmu_cmdq_ent cmd;
1786 struct arm_smmu_master *master;
1787 struct arm_smmu_cmdq_batch cmds;
1789 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1793 * Ensure that we've completed prior invalidation of the main TLBs
1794 * before we read 'nr_ats_masters' in case of a concurrent call to
1795 * arm_smmu_enable_ats():
1797 * // unmap() // arm_smmu_enable_ats()
1798 * TLBI+SYNC atomic_inc(&nr_ats_masters);
1800 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
1802 * Ensures that we always see the incremented 'nr_ats_masters' count if
1803 * ATS was enabled at the PCI device before completion of the TLBI.
1806 if (!atomic_read(&smmu_domain->nr_ats_masters))
1809 arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1813 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1814 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1815 if (!master->ats_enabled)
1818 for (i = 0; i < master->num_streams; i++) {
1819 cmd.atc.sid = master->streams[i].id;
1820 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1823 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1825 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1828 /* IO_PGTABLE API */
1829 static void arm_smmu_tlb_inv_context(void *cookie)
1831 struct arm_smmu_domain *smmu_domain = cookie;
1832 struct arm_smmu_device *smmu = smmu_domain->smmu;
1833 struct arm_smmu_cmdq_ent cmd;
1836 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1837 * PTEs previously cleared by unmaps on the current CPU not yet visible
1838 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1839 * insertion to guarantee those are observed before the TLBI. Do be
1842 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1843 arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
1845 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1846 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1847 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1849 arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
1852 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1853 unsigned long iova, size_t size,
1855 struct arm_smmu_domain *smmu_domain)
1857 struct arm_smmu_device *smmu = smmu_domain->smmu;
1858 unsigned long end = iova + size, num_pages = 0, tg = 0;
1859 size_t inv_range = granule;
1860 struct arm_smmu_cmdq_batch cmds;
1865 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1866 /* Get the leaf page size */
1867 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1869 num_pages = size >> tg;
1871 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1872 cmd->tlbi.tg = (tg - 10) / 2;
1875 * Determine what level the granule is at. For non-leaf, both
1876 * io-pgtable and SVA pass a nominal last-level granule because
1877 * they don't know what level(s) actually apply, so ignore that
1878 * and leave TTL=0. However for various errata reasons we still
1879 * want to use a range command, so avoid the SVA corner case
1880 * where both scale and num could be 0 as well.
1883 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1884 else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
1890 while (iova < end) {
1891 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1893 * On each iteration of the loop, the range is 5 bits
1894 * worth of the aligned size remaining.
1895 * The range in pages is:
1897 * range = (num_pages & (0x1f << __ffs(num_pages)))
1899 unsigned long scale, num;
1901 /* Determine the power of 2 multiple number of pages */
1902 scale = __ffs(num_pages);
1903 cmd->tlbi.scale = scale;
1905 /* Determine how many chunks of 2^scale size we have */
1906 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1907 cmd->tlbi.num = num - 1;
1909 /* range is num * 2^scale * pgsize */
1910 inv_range = num << (scale + tg);
1912 /* Clear out the lower order bits for the next iteration */
1913 num_pages -= num << scale;
1916 cmd->tlbi.addr = iova;
1917 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1920 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1923 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1924 size_t granule, bool leaf,
1925 struct arm_smmu_domain *smmu_domain)
1927 struct arm_smmu_cmdq_ent cmd = {
1933 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1934 cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1935 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1936 cmd.tlbi.asid = smmu_domain->cd.asid;
1938 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1939 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1941 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1944 * Unfortunately, this can't be leaf-only since we may have
1945 * zapped an entire table.
1947 arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
1950 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1951 size_t granule, bool leaf,
1952 struct arm_smmu_domain *smmu_domain)
1954 struct arm_smmu_cmdq_ent cmd = {
1955 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1956 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1963 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1966 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1967 unsigned long iova, size_t granule,
1970 struct arm_smmu_domain *smmu_domain = cookie;
1971 struct iommu_domain *domain = &smmu_domain->domain;
1973 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1976 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1977 size_t granule, void *cookie)
1979 arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1982 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1983 .tlb_flush_all = arm_smmu_tlb_inv_context,
1984 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1985 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
1989 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
1991 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
1994 case IOMMU_CAP_CACHE_COHERENCY:
1995 /* Assume that a coherent TCU implies coherent TBUs */
1996 return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
1997 case IOMMU_CAP_NOEXEC:
1998 case IOMMU_CAP_DEFERRED_FLUSH:
2005 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2007 struct arm_smmu_domain *smmu_domain;
2009 if (type == IOMMU_DOMAIN_SVA)
2010 return arm_smmu_sva_domain_alloc();
2012 if (type != IOMMU_DOMAIN_UNMANAGED &&
2013 type != IOMMU_DOMAIN_DMA &&
2014 type != IOMMU_DOMAIN_IDENTITY)
2018 * Allocate the domain and initialise some of its data structures.
2019 * We can't really do anything meaningful until we've added a
2022 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2026 mutex_init(&smmu_domain->init_mutex);
2027 INIT_LIST_HEAD(&smmu_domain->devices);
2028 spin_lock_init(&smmu_domain->devices_lock);
2029 INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2031 return &smmu_domain->domain;
2034 static void arm_smmu_domain_free(struct iommu_domain *domain)
2036 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2037 struct arm_smmu_device *smmu = smmu_domain->smmu;
2039 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2041 /* Free the ASID or VMID */
2042 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2043 /* Prevent SVA from touching the CD while we're freeing it */
2044 mutex_lock(&arm_smmu_asid_lock);
2045 arm_smmu_free_asid(&smmu_domain->cd);
2046 mutex_unlock(&arm_smmu_asid_lock);
2048 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2050 ida_free(&smmu->vmid_map, cfg->vmid);
2056 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2057 struct io_pgtable_cfg *pgtbl_cfg)
2061 struct arm_smmu_device *smmu = smmu_domain->smmu;
2062 struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2063 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2065 refcount_set(&cd->refs, 1);
2067 /* Prevent SVA from modifying the ASID until it is written to the CD */
2068 mutex_lock(&arm_smmu_asid_lock);
2069 ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
2070 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2074 cd->asid = (u16)asid;
2075 cd->ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2076 cd->tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2077 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2078 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2079 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2080 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2081 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2082 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2083 cd->mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2085 mutex_unlock(&arm_smmu_asid_lock);
2089 mutex_unlock(&arm_smmu_asid_lock);
2093 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2094 struct io_pgtable_cfg *pgtbl_cfg)
2097 struct arm_smmu_device *smmu = smmu_domain->smmu;
2098 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2099 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2101 /* Reserve VMID 0 for stage-2 bypass STEs */
2102 vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2107 vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2108 cfg->vmid = (u16)vmid;
2109 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2110 cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2111 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2112 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2113 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2114 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2115 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2116 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2120 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
2123 unsigned long ias, oas;
2124 enum io_pgtable_fmt fmt;
2125 struct io_pgtable_cfg pgtbl_cfg;
2126 struct io_pgtable_ops *pgtbl_ops;
2127 int (*finalise_stage_fn)(struct arm_smmu_domain *,
2128 struct io_pgtable_cfg *);
2129 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2130 struct arm_smmu_device *smmu = smmu_domain->smmu;
2132 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2133 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2137 /* Restrict the stage to what we can actually support */
2138 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2139 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2140 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2141 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2143 switch (smmu_domain->stage) {
2144 case ARM_SMMU_DOMAIN_S1:
2145 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2146 ias = min_t(unsigned long, ias, VA_BITS);
2148 fmt = ARM_64_LPAE_S1;
2149 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2151 case ARM_SMMU_DOMAIN_S2:
2154 fmt = ARM_64_LPAE_S2;
2155 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2161 pgtbl_cfg = (struct io_pgtable_cfg) {
2162 .pgsize_bitmap = smmu->pgsize_bitmap,
2165 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2166 .tlb = &arm_smmu_flush_ops,
2167 .iommu_dev = smmu->dev,
2170 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2174 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2175 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2176 domain->geometry.force_aperture = true;
2178 ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
2180 free_io_pgtable_ops(pgtbl_ops);
2184 smmu_domain->pgtbl_ops = pgtbl_ops;
2188 static struct arm_smmu_ste *
2189 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2191 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2193 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2194 unsigned int idx1, idx2;
2196 /* Two-level walk */
2197 idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2198 idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
2199 return &cfg->l1_desc[idx1].l2ptr[idx2];
2201 /* Simple linear lookup */
2202 return (struct arm_smmu_ste *)&cfg
2203 ->strtab[sid * STRTAB_STE_DWORDS];
2207 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2210 struct arm_smmu_device *smmu = master->smmu;
2212 for (i = 0; i < master->num_streams; ++i) {
2213 u32 sid = master->streams[i].id;
2214 struct arm_smmu_ste *step =
2215 arm_smmu_get_step_for_sid(smmu, sid);
2217 /* Bridged PCI devices may end up with duplicated IDs */
2218 for (j = 0; j < i; j++)
2219 if (master->streams[j].id == sid)
2224 arm_smmu_write_strtab_ent(master, sid, step);
2228 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2230 struct device *dev = master->dev;
2231 struct arm_smmu_device *smmu = master->smmu;
2232 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2234 if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2237 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2240 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2243 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2246 struct pci_dev *pdev;
2247 struct arm_smmu_device *smmu = master->smmu;
2248 struct arm_smmu_domain *smmu_domain = master->domain;
2250 /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2251 if (!master->ats_enabled)
2254 /* Smallest Translation Unit: log2 of the smallest supported granule */
2255 stu = __ffs(smmu->pgsize_bitmap);
2256 pdev = to_pci_dev(master->dev);
2258 atomic_inc(&smmu_domain->nr_ats_masters);
2259 arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2260 if (pci_enable_ats(pdev, stu))
2261 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2264 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2266 struct arm_smmu_domain *smmu_domain = master->domain;
2268 if (!master->ats_enabled)
2271 pci_disable_ats(to_pci_dev(master->dev));
2273 * Ensure ATS is disabled at the endpoint before we issue the
2274 * ATC invalidation via the SMMU.
2277 arm_smmu_atc_inv_master(master);
2278 atomic_dec(&smmu_domain->nr_ats_masters);
2281 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2286 struct pci_dev *pdev;
2288 if (!dev_is_pci(master->dev))
2291 pdev = to_pci_dev(master->dev);
2293 features = pci_pasid_features(pdev);
2297 num_pasids = pci_max_pasids(pdev);
2298 if (num_pasids <= 0)
2301 ret = pci_enable_pasid(pdev, features);
2303 dev_err(&pdev->dev, "Failed to enable PASID\n");
2307 master->ssid_bits = min_t(u8, ilog2(num_pasids),
2308 master->smmu->ssid_bits);
2312 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2314 struct pci_dev *pdev;
2316 if (!dev_is_pci(master->dev))
2319 pdev = to_pci_dev(master->dev);
2321 if (!pdev->pasid_enabled)
2324 master->ssid_bits = 0;
2325 pci_disable_pasid(pdev);
2328 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2330 unsigned long flags;
2331 struct arm_smmu_domain *smmu_domain = master->domain;
2336 arm_smmu_disable_ats(master);
2338 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2339 list_del(&master->domain_head);
2340 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2342 master->domain = NULL;
2343 master->ats_enabled = false;
2344 arm_smmu_install_ste_for_dev(master);
2346 * Clearing the CD entry isn't strictly required to detach the domain
2347 * since the table is uninstalled anyway, but it helps avoid confusion
2348 * in the call to arm_smmu_write_ctx_desc on the next attach (which
2349 * expects the entry to be empty).
2351 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab)
2352 arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
2355 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2358 unsigned long flags;
2359 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2360 struct arm_smmu_device *smmu;
2361 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2362 struct arm_smmu_master *master;
2367 master = dev_iommu_priv_get(dev);
2368 smmu = master->smmu;
2371 * Checking that SVA is disabled ensures that this device isn't bound to
2372 * any mm, and can be safely detached from its old domain. Bonds cannot
2373 * be removed concurrently since we're holding the group mutex.
2375 if (arm_smmu_master_sva_enabled(master)) {
2376 dev_err(dev, "cannot attach - SVA enabled\n");
2380 arm_smmu_detach_dev(master);
2382 mutex_lock(&smmu_domain->init_mutex);
2384 if (!smmu_domain->smmu) {
2385 smmu_domain->smmu = smmu;
2386 ret = arm_smmu_domain_finalise(domain);
2388 smmu_domain->smmu = NULL;
2389 } else if (smmu_domain->smmu != smmu)
2392 mutex_unlock(&smmu_domain->init_mutex);
2396 master->domain = smmu_domain;
2399 * The SMMU does not support enabling ATS with bypass. When the STE is
2400 * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and
2401 * Translated transactions are denied as though ATS is disabled for the
2402 * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2403 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2405 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2406 master->ats_enabled = arm_smmu_ats_supported(master);
2408 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2409 list_add(&master->domain_head, &smmu_domain->devices);
2410 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2412 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2413 if (!master->cd_table.cdtab) {
2414 ret = arm_smmu_alloc_cd_tables(master);
2416 master->domain = NULL;
2422 * Prevent SVA from concurrently modifying the CD or writing to
2425 mutex_lock(&arm_smmu_asid_lock);
2426 ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
2427 mutex_unlock(&arm_smmu_asid_lock);
2429 master->domain = NULL;
2434 arm_smmu_install_ste_for_dev(master);
2436 arm_smmu_enable_ats(master);
2440 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2441 list_del(&master->domain_head);
2442 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2447 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2448 phys_addr_t paddr, size_t pgsize, size_t pgcount,
2449 int prot, gfp_t gfp, size_t *mapped)
2451 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2456 return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2459 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2460 size_t pgsize, size_t pgcount,
2461 struct iommu_iotlb_gather *gather)
2463 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2464 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2469 return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2472 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2474 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2476 if (smmu_domain->smmu)
2477 arm_smmu_tlb_inv_context(smmu_domain);
2480 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2481 struct iommu_iotlb_gather *gather)
2483 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2485 if (!gather->pgsize)
2488 arm_smmu_tlb_inv_range_domain(gather->start,
2489 gather->end - gather->start + 1,
2490 gather->pgsize, true, smmu_domain);
2494 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2496 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2501 return ops->iova_to_phys(ops, iova);
2504 static struct platform_driver arm_smmu_driver;
2507 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2509 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2512 return dev ? dev_get_drvdata(dev) : NULL;
2515 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2517 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2519 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2520 limit *= 1UL << STRTAB_SPLIT;
2525 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2527 /* Check the SIDs are in range of the SMMU and our stream table */
2528 if (!arm_smmu_sid_in_range(smmu, sid))
2531 /* Ensure l2 strtab is initialised */
2532 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2533 return arm_smmu_init_l2_strtab(smmu, sid);
2538 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2539 struct arm_smmu_master *master)
2543 struct arm_smmu_stream *new_stream, *cur_stream;
2544 struct rb_node **new_node, *parent_node = NULL;
2545 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2547 master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2549 if (!master->streams)
2551 master->num_streams = fwspec->num_ids;
2553 mutex_lock(&smmu->streams_mutex);
2554 for (i = 0; i < fwspec->num_ids; i++) {
2555 u32 sid = fwspec->ids[i];
2557 new_stream = &master->streams[i];
2558 new_stream->id = sid;
2559 new_stream->master = master;
2561 ret = arm_smmu_init_sid_strtab(smmu, sid);
2565 /* Insert into SID tree */
2566 new_node = &(smmu->streams.rb_node);
2568 cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2570 parent_node = *new_node;
2571 if (cur_stream->id > new_stream->id) {
2572 new_node = &((*new_node)->rb_left);
2573 } else if (cur_stream->id < new_stream->id) {
2574 new_node = &((*new_node)->rb_right);
2576 dev_warn(master->dev,
2577 "stream %u already in tree\n",
2586 rb_link_node(&new_stream->node, parent_node, new_node);
2587 rb_insert_color(&new_stream->node, &smmu->streams);
2591 for (i--; i >= 0; i--)
2592 rb_erase(&master->streams[i].node, &smmu->streams);
2593 kfree(master->streams);
2595 mutex_unlock(&smmu->streams_mutex);
2600 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2603 struct arm_smmu_device *smmu = master->smmu;
2604 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2606 if (!smmu || !master->streams)
2609 mutex_lock(&smmu->streams_mutex);
2610 for (i = 0; i < fwspec->num_ids; i++)
2611 rb_erase(&master->streams[i].node, &smmu->streams);
2612 mutex_unlock(&smmu->streams_mutex);
2614 kfree(master->streams);
2617 static struct iommu_ops arm_smmu_ops;
2619 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2622 struct arm_smmu_device *smmu;
2623 struct arm_smmu_master *master;
2624 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2626 if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2627 return ERR_PTR(-EBUSY);
2629 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2631 return ERR_PTR(-ENODEV);
2633 master = kzalloc(sizeof(*master), GFP_KERNEL);
2635 return ERR_PTR(-ENOMEM);
2638 master->smmu = smmu;
2639 INIT_LIST_HEAD(&master->bonds);
2640 dev_iommu_priv_set(dev, master);
2642 ret = arm_smmu_insert_master(smmu, master);
2644 goto err_free_master;
2646 device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2647 master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2650 * Note that PASID must be enabled before, and disabled after ATS:
2651 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2653 * Behavior is undefined if this bit is Set and the value of the PASID
2654 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
2657 arm_smmu_enable_pasid(master);
2659 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2660 master->ssid_bits = min_t(u8, master->ssid_bits,
2661 CTXDESC_LINEAR_CDMAX);
2663 if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2664 device_property_read_bool(dev, "dma-can-stall")) ||
2665 smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2666 master->stall_enabled = true;
2668 return &smmu->iommu;
2672 return ERR_PTR(ret);
2675 static void arm_smmu_release_device(struct device *dev)
2677 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2679 if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2680 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2681 arm_smmu_detach_dev(master);
2682 arm_smmu_disable_pasid(master);
2683 arm_smmu_remove_master(master);
2684 if (master->cd_table.cdtab)
2685 arm_smmu_free_cd_tables(master);
2689 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2691 struct iommu_group *group;
2694 * We don't support devices sharing stream IDs other than PCI RID
2695 * aliases, since the necessary ID-to-device lookup becomes rather
2696 * impractical given a potential sparse 32-bit stream ID space.
2698 if (dev_is_pci(dev))
2699 group = pci_device_group(dev);
2701 group = generic_device_group(dev);
2706 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2708 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2711 mutex_lock(&smmu_domain->init_mutex);
2712 if (smmu_domain->smmu)
2715 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2716 mutex_unlock(&smmu_domain->init_mutex);
2721 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2723 return iommu_fwspec_add_ids(dev, args->args, 1);
2726 static void arm_smmu_get_resv_regions(struct device *dev,
2727 struct list_head *head)
2729 struct iommu_resv_region *region;
2730 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2732 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2733 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2737 list_add_tail(®ion->list, head);
2739 iommu_dma_get_resv_regions(dev, head);
2742 static int arm_smmu_dev_enable_feature(struct device *dev,
2743 enum iommu_dev_features feat)
2745 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2751 case IOMMU_DEV_FEAT_IOPF:
2752 if (!arm_smmu_master_iopf_supported(master))
2754 if (master->iopf_enabled)
2756 master->iopf_enabled = true;
2758 case IOMMU_DEV_FEAT_SVA:
2759 if (!arm_smmu_master_sva_supported(master))
2761 if (arm_smmu_master_sva_enabled(master))
2763 return arm_smmu_master_enable_sva(master);
2769 static int arm_smmu_dev_disable_feature(struct device *dev,
2770 enum iommu_dev_features feat)
2772 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2778 case IOMMU_DEV_FEAT_IOPF:
2779 if (!master->iopf_enabled)
2781 if (master->sva_enabled)
2783 master->iopf_enabled = false;
2785 case IOMMU_DEV_FEAT_SVA:
2786 if (!arm_smmu_master_sva_enabled(master))
2788 return arm_smmu_master_disable_sva(master);
2795 * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
2796 * PCIe link and save the data to memory by DMA. The hardware is restricted to
2797 * use identity mapping only.
2799 #define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
2800 (pdev)->device == 0xa12e)
2802 static int arm_smmu_def_domain_type(struct device *dev)
2804 if (dev_is_pci(dev)) {
2805 struct pci_dev *pdev = to_pci_dev(dev);
2807 if (IS_HISI_PTT_DEVICE(pdev))
2808 return IOMMU_DOMAIN_IDENTITY;
2814 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
2816 struct iommu_domain *domain;
2818 domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
2819 if (WARN_ON(IS_ERR(domain)) || !domain)
2822 arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
2825 static struct iommu_ops arm_smmu_ops = {
2826 .capable = arm_smmu_capable,
2827 .domain_alloc = arm_smmu_domain_alloc,
2828 .probe_device = arm_smmu_probe_device,
2829 .release_device = arm_smmu_release_device,
2830 .device_group = arm_smmu_device_group,
2831 .of_xlate = arm_smmu_of_xlate,
2832 .get_resv_regions = arm_smmu_get_resv_regions,
2833 .remove_dev_pasid = arm_smmu_remove_dev_pasid,
2834 .dev_enable_feat = arm_smmu_dev_enable_feature,
2835 .dev_disable_feat = arm_smmu_dev_disable_feature,
2836 .page_response = arm_smmu_page_response,
2837 .def_domain_type = arm_smmu_def_domain_type,
2838 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2839 .owner = THIS_MODULE,
2840 .default_domain_ops = &(const struct iommu_domain_ops) {
2841 .attach_dev = arm_smmu_attach_dev,
2842 .map_pages = arm_smmu_map_pages,
2843 .unmap_pages = arm_smmu_unmap_pages,
2844 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2845 .iotlb_sync = arm_smmu_iotlb_sync,
2846 .iova_to_phys = arm_smmu_iova_to_phys,
2847 .enable_nesting = arm_smmu_enable_nesting,
2848 .free = arm_smmu_domain_free,
2852 /* Probing and initialisation functions */
2853 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2854 struct arm_smmu_queue *q,
2856 unsigned long prod_off,
2857 unsigned long cons_off,
2858 size_t dwords, const char *name)
2863 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2864 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2866 if (q->base || qsz < PAGE_SIZE)
2869 q->llq.max_n_shift--;
2874 "failed to allocate queue (0x%zx bytes) for %s\n",
2879 if (!WARN_ON(q->base_dma & (qsz - 1))) {
2880 dev_info(smmu->dev, "allocated %u entries for %s\n",
2881 1 << q->llq.max_n_shift, name);
2884 q->prod_reg = page + prod_off;
2885 q->cons_reg = page + cons_off;
2886 q->ent_dwords = dwords;
2888 q->q_base = Q_BASE_RWA;
2889 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2890 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2892 q->llq.prod = q->llq.cons = 0;
2896 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2898 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2899 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2901 atomic_set(&cmdq->owner_prod, 0);
2902 atomic_set(&cmdq->lock, 0);
2904 cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2906 if (!cmdq->valid_map)
2912 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2917 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2918 ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2919 CMDQ_ENT_DWORDS, "cmdq");
2923 ret = arm_smmu_cmdq_init(smmu);
2928 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2929 ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2930 EVTQ_ENT_DWORDS, "evtq");
2934 if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2935 (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2936 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2937 if (!smmu->evtq.iopf)
2942 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2945 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2946 ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2947 PRIQ_ENT_DWORDS, "priq");
2950 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2953 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2954 void *strtab = smmu->strtab_cfg.strtab;
2956 cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
2957 sizeof(*cfg->l1_desc), GFP_KERNEL);
2961 for (i = 0; i < cfg->num_l1_ents; ++i) {
2962 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2963 strtab += STRTAB_L1_DESC_DWORDS << 3;
2969 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2974 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2976 /* Calculate the L1 size, capped to the SIDSIZE. */
2977 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2978 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2979 cfg->num_l1_ents = 1 << size;
2981 size += STRTAB_SPLIT;
2982 if (size < smmu->sid_bits)
2984 "2-level strtab only covers %u/%u bits of SID\n",
2985 size, smmu->sid_bits);
2987 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2988 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2992 "failed to allocate l1 stream table (%u bytes)\n",
2996 cfg->strtab = strtab;
2998 /* Configure strtab_base_cfg for 2 levels */
2999 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3000 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3001 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3002 cfg->strtab_base_cfg = reg;
3004 return arm_smmu_init_l1_strtab(smmu);
3007 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3012 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3014 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3015 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3019 "failed to allocate linear stream table (%u bytes)\n",
3023 cfg->strtab = strtab;
3024 cfg->num_l1_ents = 1 << smmu->sid_bits;
3026 /* Configure strtab_base_cfg for a linear table covering all SIDs */
3027 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3028 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3029 cfg->strtab_base_cfg = reg;
3031 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3035 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3040 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3041 ret = arm_smmu_init_strtab_2lvl(smmu);
3043 ret = arm_smmu_init_strtab_linear(smmu);
3048 /* Set the strtab base address */
3049 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3050 reg |= STRTAB_BASE_RA;
3051 smmu->strtab_cfg.strtab_base = reg;
3053 ida_init(&smmu->vmid_map);
3058 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3062 mutex_init(&smmu->streams_mutex);
3063 smmu->streams = RB_ROOT;
3065 ret = arm_smmu_init_queues(smmu);
3069 return arm_smmu_init_strtab(smmu);
3072 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3073 unsigned int reg_off, unsigned int ack_off)
3077 writel_relaxed(val, smmu->base + reg_off);
3078 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3079 1, ARM_SMMU_POLL_TIMEOUT_US);
3082 /* GBPA is "special" */
3083 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3086 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3088 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3089 1, ARM_SMMU_POLL_TIMEOUT_US);
3095 writel_relaxed(reg | GBPA_UPDATE, gbpa);
3096 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3097 1, ARM_SMMU_POLL_TIMEOUT_US);
3100 dev_err(smmu->dev, "GBPA not responding to update\n");
3104 static void arm_smmu_free_msis(void *data)
3106 struct device *dev = data;
3107 platform_msi_domain_free_irqs(dev);
3110 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3112 phys_addr_t doorbell;
3113 struct device *dev = msi_desc_to_dev(desc);
3114 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3115 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3117 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3118 doorbell &= MSI_CFG0_ADDR_MASK;
3120 writeq_relaxed(doorbell, smmu->base + cfg[0]);
3121 writel_relaxed(msg->data, smmu->base + cfg[1]);
3122 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3125 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3127 int ret, nvec = ARM_SMMU_MAX_MSIS;
3128 struct device *dev = smmu->dev;
3130 /* Clear the MSI address regs */
3131 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3132 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3134 if (smmu->features & ARM_SMMU_FEAT_PRI)
3135 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3139 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3142 if (!dev->msi.domain) {
3143 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3147 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3148 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3150 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3154 smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3155 smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3156 smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3158 /* Add callback to free MSIs on teardown */
3159 devm_add_action(dev, arm_smmu_free_msis, dev);
3162 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3166 arm_smmu_setup_msis(smmu);
3168 /* Request interrupt lines */
3169 irq = smmu->evtq.q.irq;
3171 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3172 arm_smmu_evtq_thread,
3174 "arm-smmu-v3-evtq", smmu);
3176 dev_warn(smmu->dev, "failed to enable evtq irq\n");
3178 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3181 irq = smmu->gerr_irq;
3183 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3184 0, "arm-smmu-v3-gerror", smmu);
3186 dev_warn(smmu->dev, "failed to enable gerror irq\n");
3188 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3191 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3192 irq = smmu->priq.q.irq;
3194 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3195 arm_smmu_priq_thread,
3201 "failed to enable priq irq\n");
3203 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3208 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3211 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3213 /* Disable IRQs first */
3214 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3215 ARM_SMMU_IRQ_CTRLACK);
3217 dev_err(smmu->dev, "failed to disable irqs\n");
3221 irq = smmu->combined_irq;
3224 * Cavium ThunderX2 implementation doesn't support unique irq
3225 * lines. Use a single irq line for all the SMMUv3 interrupts.
3227 ret = devm_request_threaded_irq(smmu->dev, irq,
3228 arm_smmu_combined_irq_handler,
3229 arm_smmu_combined_irq_thread,
3231 "arm-smmu-v3-combined-irq", smmu);
3233 dev_warn(smmu->dev, "failed to enable combined irq\n");
3235 arm_smmu_setup_unique_irqs(smmu);
3237 if (smmu->features & ARM_SMMU_FEAT_PRI)
3238 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3240 /* Enable interrupt generation on the SMMU */
3241 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3242 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3244 dev_warn(smmu->dev, "failed to enable irqs\n");
3249 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3253 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3255 dev_err(smmu->dev, "failed to clear cr0\n");
3260 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3264 struct arm_smmu_cmdq_ent cmd;
3266 /* Clear CR0 and sync (disables SMMU and queue processing) */
3267 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3268 if (reg & CR0_SMMUEN) {
3269 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3270 WARN_ON(is_kdump_kernel() && !disable_bypass);
3271 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3274 ret = arm_smmu_device_disable(smmu);
3278 /* CR1 (table and queue memory attributes) */
3279 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3280 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3281 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3282 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3283 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3284 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3285 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3287 /* CR2 (random crap) */
3288 reg = CR2_PTM | CR2_RECINVSID;
3290 if (smmu->features & ARM_SMMU_FEAT_E2H)
3293 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3296 writeq_relaxed(smmu->strtab_cfg.strtab_base,
3297 smmu->base + ARM_SMMU_STRTAB_BASE);
3298 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3299 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3302 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3303 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3304 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3306 enables = CR0_CMDQEN;
3307 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3310 dev_err(smmu->dev, "failed to enable command queue\n");
3314 /* Invalidate any cached configuration */
3315 cmd.opcode = CMDQ_OP_CFGI_ALL;
3316 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3318 /* Invalidate any stale TLB entries */
3319 if (smmu->features & ARM_SMMU_FEAT_HYP) {
3320 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3321 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3324 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3325 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3328 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3329 writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3330 writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3332 enables |= CR0_EVTQEN;
3333 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3336 dev_err(smmu->dev, "failed to enable event queue\n");
3341 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3342 writeq_relaxed(smmu->priq.q.q_base,
3343 smmu->base + ARM_SMMU_PRIQ_BASE);
3344 writel_relaxed(smmu->priq.q.llq.prod,
3345 smmu->page1 + ARM_SMMU_PRIQ_PROD);
3346 writel_relaxed(smmu->priq.q.llq.cons,
3347 smmu->page1 + ARM_SMMU_PRIQ_CONS);
3349 enables |= CR0_PRIQEN;
3350 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3353 dev_err(smmu->dev, "failed to enable PRI queue\n");
3358 if (smmu->features & ARM_SMMU_FEAT_ATS) {
3359 enables |= CR0_ATSCHK;
3360 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3363 dev_err(smmu->dev, "failed to enable ATS check\n");
3368 ret = arm_smmu_setup_irqs(smmu);
3370 dev_err(smmu->dev, "failed to setup irqs\n");
3374 if (is_kdump_kernel())
3375 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3377 /* Enable the SMMU interface, or ensure bypass */
3378 if (!bypass || disable_bypass) {
3379 enables |= CR0_SMMUEN;
3381 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3385 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3388 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3395 #define IIDR_IMPLEMENTER_ARM 0x43b
3396 #define IIDR_PRODUCTID_ARM_MMU_600 0x483
3397 #define IIDR_PRODUCTID_ARM_MMU_700 0x487
3399 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3402 unsigned int implementer, productid, variant, revision;
3404 reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3405 implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3406 productid = FIELD_GET(IIDR_PRODUCTID, reg);
3407 variant = FIELD_GET(IIDR_VARIANT, reg);
3408 revision = FIELD_GET(IIDR_REVISION, reg);
3410 switch (implementer) {
3411 case IIDR_IMPLEMENTER_ARM:
3412 switch (productid) {
3413 case IIDR_PRODUCTID_ARM_MMU_600:
3414 /* Arm erratum 1076982 */
3415 if (variant == 0 && revision <= 2)
3416 smmu->features &= ~ARM_SMMU_FEAT_SEV;
3417 /* Arm erratum 1209401 */
3419 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3421 case IIDR_PRODUCTID_ARM_MMU_700:
3422 /* Arm erratum 2812531 */
3423 smmu->features &= ~ARM_SMMU_FEAT_BTM;
3424 smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3425 /* Arm errata 2268618, 2812531 */
3426 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3433 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3436 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3439 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3441 /* 2-level structures */
3442 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3443 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3445 if (reg & IDR0_CD2L)
3446 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3449 * Translation table endianness.
3450 * We currently require the same endianness as the CPU, but this
3451 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3453 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3454 case IDR0_TTENDIAN_MIXED:
3455 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3458 case IDR0_TTENDIAN_BE:
3459 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3462 case IDR0_TTENDIAN_LE:
3463 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3467 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3471 /* Boolean feature flags */
3472 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3473 smmu->features |= ARM_SMMU_FEAT_PRI;
3475 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3476 smmu->features |= ARM_SMMU_FEAT_ATS;
3479 smmu->features |= ARM_SMMU_FEAT_SEV;
3481 if (reg & IDR0_MSI) {
3482 smmu->features |= ARM_SMMU_FEAT_MSI;
3483 if (coherent && !disable_msipolling)
3484 smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3487 if (reg & IDR0_HYP) {
3488 smmu->features |= ARM_SMMU_FEAT_HYP;
3489 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3490 smmu->features |= ARM_SMMU_FEAT_E2H;
3494 * The coherency feature as set by FW is used in preference to the ID
3495 * register, but warn on mismatch.
3497 if (!!(reg & IDR0_COHACC) != coherent)
3498 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3499 coherent ? "true" : "false");
3501 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3502 case IDR0_STALL_MODEL_FORCE:
3503 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3505 case IDR0_STALL_MODEL_STALL:
3506 smmu->features |= ARM_SMMU_FEAT_STALLS;
3510 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3513 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3515 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3516 dev_err(smmu->dev, "no translation support!\n");
3520 /* We only support the AArch64 table format at present */
3521 switch (FIELD_GET(IDR0_TTF, reg)) {
3522 case IDR0_TTF_AARCH32_64:
3525 case IDR0_TTF_AARCH64:
3528 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3532 /* ASID/VMID sizes */
3533 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3534 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3537 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3538 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3539 dev_err(smmu->dev, "embedded implementation not supported\n");
3543 /* Queue sizes, capped to ensure natural alignment */
3544 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3545 FIELD_GET(IDR1_CMDQS, reg));
3546 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3548 * We don't support splitting up batches, so one batch of
3549 * commands plus an extra sync needs to fit inside the command
3550 * queue. There's also no way we can handle the weird alignment
3551 * restrictions on the base pointer for a unit-length queue.
3553 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3554 CMDQ_BATCH_ENTRIES);
3558 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3559 FIELD_GET(IDR1_EVTQS, reg));
3560 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3561 FIELD_GET(IDR1_PRIQS, reg));
3563 /* SID/SSID sizes */
3564 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3565 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3566 smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3569 * If the SMMU supports fewer bits than would fill a single L2 stream
3570 * table, use a linear table instead.
3572 if (smmu->sid_bits <= STRTAB_SPLIT)
3573 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3576 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3577 if (FIELD_GET(IDR3_RIL, reg))
3578 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3581 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3583 /* Maximum number of outstanding stalls */
3584 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3587 if (reg & IDR5_GRAN64K)
3588 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3589 if (reg & IDR5_GRAN16K)
3590 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3591 if (reg & IDR5_GRAN4K)
3592 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3594 /* Input address size */
3595 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3596 smmu->features |= ARM_SMMU_FEAT_VAX;
3598 /* Output address size */
3599 switch (FIELD_GET(IDR5_OAS, reg)) {
3600 case IDR5_OAS_32_BIT:
3603 case IDR5_OAS_36_BIT:
3606 case IDR5_OAS_40_BIT:
3609 case IDR5_OAS_42_BIT:
3612 case IDR5_OAS_44_BIT:
3615 case IDR5_OAS_52_BIT:
3617 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3621 "unknown output address size. Truncating to 48-bit\n");
3623 case IDR5_OAS_48_BIT:
3627 if (arm_smmu_ops.pgsize_bitmap == -1UL)
3628 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3630 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3632 /* Set the DMA mask for our table walker */
3633 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3635 "failed to set DMA mask for table walker\n");
3637 smmu->ias = max(smmu->ias, smmu->oas);
3639 if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3640 (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3641 smmu->features |= ARM_SMMU_FEAT_NESTING;
3643 arm_smmu_device_iidr_probe(smmu);
3645 if (arm_smmu_sva_supported(smmu))
3646 smmu->features |= ARM_SMMU_FEAT_SVA;
3648 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3649 smmu->ias, smmu->oas, smmu->features);
3654 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3657 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3658 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3660 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3661 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3665 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3668 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3669 struct arm_smmu_device *smmu)
3671 struct acpi_iort_smmu_v3 *iort_smmu;
3672 struct device *dev = smmu->dev;
3673 struct acpi_iort_node *node;
3675 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3677 /* Retrieve SMMUv3 specific data */
3678 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3680 acpi_smmu_get_options(iort_smmu->model, smmu);
3682 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3683 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3688 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3689 struct arm_smmu_device *smmu)
3695 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3696 struct arm_smmu_device *smmu)
3698 struct device *dev = &pdev->dev;
3702 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3703 dev_err(dev, "missing #iommu-cells property\n");
3704 else if (cells != 1)
3705 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3709 parse_driver_options(smmu);
3711 if (of_dma_is_coherent(dev->of_node))
3712 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3717 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3719 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3725 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3726 resource_size_t size)
3728 struct resource res = DEFINE_RES_MEM(start, size);
3730 return devm_ioremap_resource(dev, &res);
3733 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3735 struct list_head rmr_list;
3736 struct iommu_resv_region *e;
3738 INIT_LIST_HEAD(&rmr_list);
3739 iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3741 list_for_each_entry(e, &rmr_list, list) {
3742 struct arm_smmu_ste *step;
3743 struct iommu_iort_rmr_data *rmr;
3746 rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3747 for (i = 0; i < rmr->num_sids; i++) {
3748 ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3750 dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3755 step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3756 arm_smmu_init_bypass_stes(step, 1, true);
3760 iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3763 static int arm_smmu_device_probe(struct platform_device *pdev)
3766 struct resource *res;
3767 resource_size_t ioaddr;
3768 struct arm_smmu_device *smmu;
3769 struct device *dev = &pdev->dev;
3772 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3778 ret = arm_smmu_device_dt_probe(pdev, smmu);
3780 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3785 /* Set bypass mode according to firmware probing result */
3789 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3792 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3793 dev_err(dev, "MMIO region too small (%pr)\n", res);
3796 ioaddr = res->start;
3799 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3800 * the PMCG registers which are reserved by the PMU driver.
3802 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3803 if (IS_ERR(smmu->base))
3804 return PTR_ERR(smmu->base);
3806 if (arm_smmu_resource_size(smmu) > SZ_64K) {
3807 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3809 if (IS_ERR(smmu->page1))
3810 return PTR_ERR(smmu->page1);
3812 smmu->page1 = smmu->base;
3815 /* Interrupt lines */
3817 irq = platform_get_irq_byname_optional(pdev, "combined");
3819 smmu->combined_irq = irq;
3821 irq = platform_get_irq_byname_optional(pdev, "eventq");
3823 smmu->evtq.q.irq = irq;
3825 irq = platform_get_irq_byname_optional(pdev, "priq");
3827 smmu->priq.q.irq = irq;
3829 irq = platform_get_irq_byname_optional(pdev, "gerror");
3831 smmu->gerr_irq = irq;
3834 ret = arm_smmu_device_hw_probe(smmu);
3838 /* Initialise in-memory data structures */
3839 ret = arm_smmu_init_structures(smmu);
3843 /* Record our private device structure */
3844 platform_set_drvdata(pdev, smmu);
3846 /* Check for RMRs and install bypass STEs if any */
3847 arm_smmu_rmr_install_bypass_ste(smmu);
3849 /* Reset the device */
3850 ret = arm_smmu_device_reset(smmu, bypass);
3854 /* And we're up. Go go go! */
3855 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3856 "smmu3.%pa", &ioaddr);
3860 ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3862 dev_err(dev, "Failed to register iommu\n");
3863 iommu_device_sysfs_remove(&smmu->iommu);
3870 static void arm_smmu_device_remove(struct platform_device *pdev)
3872 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3874 iommu_device_unregister(&smmu->iommu);
3875 iommu_device_sysfs_remove(&smmu->iommu);
3876 arm_smmu_device_disable(smmu);
3877 iopf_queue_free(smmu->evtq.iopf);
3878 ida_destroy(&smmu->vmid_map);
3881 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3883 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3885 arm_smmu_device_disable(smmu);
3888 static const struct of_device_id arm_smmu_of_match[] = {
3889 { .compatible = "arm,smmu-v3", },
3892 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3894 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3896 arm_smmu_sva_notifier_synchronize();
3897 platform_driver_unregister(drv);
3900 static struct platform_driver arm_smmu_driver = {
3902 .name = "arm-smmu-v3",
3903 .of_match_table = arm_smmu_of_match,
3904 .suppress_bind_attrs = true,
3906 .probe = arm_smmu_device_probe,
3907 .remove_new = arm_smmu_device_remove,
3908 .shutdown = arm_smmu_device_shutdown,
3910 module_driver(arm_smmu_driver, platform_driver_register,
3911 arm_smmu_driver_unregister);
3913 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3914 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3915 MODULE_ALIAS("platform:arm-smmu-v3");
3916 MODULE_LICENSE("GPL v2");