1 // SPDX-License-Identifier: GPL-2.0
3 * IOMMU API for ARM architected SMMUv3 implementations.
5 * Copyright (C) 2015 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver is powered by bad coffee and bombay mix.
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
25 #include <linux/of_address.h>
26 #include <linux/of_iommu.h>
27 #include <linux/of_platform.h>
28 #include <linux/pci.h>
29 #include <linux/pci-ats.h>
30 #include <linux/platform_device.h>
32 #include <linux/amba/bus.h>
34 #include "arm-smmu-v3.h"
35 #include "../../iommu-sva-lib.h"
37 static bool disable_bypass = true;
38 module_param(disable_bypass, bool, 0444);
39 MODULE_PARM_DESC(disable_bypass,
40 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
42 static bool disable_msipolling;
43 module_param(disable_msipolling, bool, 0444);
44 MODULE_PARM_DESC(disable_msipolling,
45 "Disable MSI-based polling for CMD_SYNC completion.");
47 enum arm_smmu_msi_index {
54 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
56 ARM_SMMU_EVTQ_IRQ_CFG0,
57 ARM_SMMU_EVTQ_IRQ_CFG1,
58 ARM_SMMU_EVTQ_IRQ_CFG2,
60 [GERROR_MSI_INDEX] = {
61 ARM_SMMU_GERROR_IRQ_CFG0,
62 ARM_SMMU_GERROR_IRQ_CFG1,
63 ARM_SMMU_GERROR_IRQ_CFG2,
66 ARM_SMMU_PRIQ_IRQ_CFG0,
67 ARM_SMMU_PRIQ_IRQ_CFG1,
68 ARM_SMMU_PRIQ_IRQ_CFG2,
72 struct arm_smmu_option_prop {
77 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
78 DEFINE_MUTEX(arm_smmu_asid_lock);
81 * Special value used by SVA when a process dies, to quiesce a CD without
84 struct arm_smmu_ctx_desc quiet_cd = { 0 };
86 static struct arm_smmu_option_prop arm_smmu_options[] = {
87 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
88 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
92 static void parse_driver_options(struct arm_smmu_device *smmu)
97 if (of_property_read_bool(smmu->dev->of_node,
98 arm_smmu_options[i].prop)) {
99 smmu->options |= arm_smmu_options[i].opt;
100 dev_notice(smmu->dev, "option %s\n",
101 arm_smmu_options[i].prop);
103 } while (arm_smmu_options[++i].opt);
106 /* Low-level queue manipulation functions */
107 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
109 u32 space, prod, cons;
111 prod = Q_IDX(q, q->prod);
112 cons = Q_IDX(q, q->cons);
114 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
115 space = (1 << q->max_n_shift) - (prod - cons);
122 static bool queue_full(struct arm_smmu_ll_queue *q)
124 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
125 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
128 static bool queue_empty(struct arm_smmu_ll_queue *q)
130 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
131 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
134 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
136 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
137 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
138 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
139 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
142 static void queue_sync_cons_out(struct arm_smmu_queue *q)
145 * Ensure that all CPU accesses (reads and writes) to the queue
146 * are complete before we update the cons pointer.
149 writel_relaxed(q->llq.cons, q->cons_reg);
152 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
154 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
155 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
158 static int queue_sync_prod_in(struct arm_smmu_queue *q)
164 * We can't use the _relaxed() variant here, as we must prevent
165 * speculative reads of the queue before we have determined that
166 * prod has indeed moved.
168 prod = readl(q->prod_reg);
170 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
177 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
179 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
180 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
183 static void queue_poll_init(struct arm_smmu_device *smmu,
184 struct arm_smmu_queue_poll *qp)
188 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
189 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
192 static int queue_poll(struct arm_smmu_queue_poll *qp)
194 if (ktime_compare(ktime_get(), qp->timeout) > 0)
199 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
210 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
214 for (i = 0; i < n_dwords; ++i)
215 *dst++ = cpu_to_le64(*src++);
218 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
222 for (i = 0; i < n_dwords; ++i)
223 *dst++ = le64_to_cpu(*src++);
226 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
228 if (queue_empty(&q->llq))
231 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
232 queue_inc_cons(&q->llq);
233 queue_sync_cons_out(q);
237 /* High-level queue accessors */
238 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
240 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
241 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
243 switch (ent->opcode) {
244 case CMDQ_OP_TLBI_EL2_ALL:
245 case CMDQ_OP_TLBI_NSNH_ALL:
247 case CMDQ_OP_PREFETCH_CFG:
248 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
250 case CMDQ_OP_CFGI_CD:
251 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
253 case CMDQ_OP_CFGI_STE:
254 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
255 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
257 case CMDQ_OP_CFGI_CD_ALL:
258 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
260 case CMDQ_OP_CFGI_ALL:
261 /* Cover the entire SID range */
262 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
264 case CMDQ_OP_TLBI_NH_VA:
265 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
267 case CMDQ_OP_TLBI_EL2_VA:
268 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
269 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
270 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
271 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
272 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
273 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
274 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
276 case CMDQ_OP_TLBI_S2_IPA:
277 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
279 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
280 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
282 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
283 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
285 case CMDQ_OP_TLBI_NH_ASID:
286 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
288 case CMDQ_OP_TLBI_S12_VMALL:
289 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
291 case CMDQ_OP_TLBI_EL2_ASID:
292 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
294 case CMDQ_OP_ATC_INV:
295 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
296 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
297 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
298 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
299 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
300 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
302 case CMDQ_OP_PRI_RESP:
303 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
304 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
305 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
306 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
307 switch (ent->pri.resp) {
315 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
318 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
319 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
320 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
322 case CMDQ_OP_CMD_SYNC:
323 if (ent->sync.msiaddr) {
324 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
325 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
327 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
329 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
330 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
339 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
342 struct arm_smmu_queue *q = &smmu->cmdq.q;
343 struct arm_smmu_cmdq_ent ent = {
344 .opcode = CMDQ_OP_CMD_SYNC,
348 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
349 * payload, so the write will zero the entire command on that platform.
351 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
352 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
356 arm_smmu_cmdq_build_cmd(cmd, &ent);
359 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
361 static const char * const cerror_str[] = {
362 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
363 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
364 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
365 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
369 u64 cmd[CMDQ_ENT_DWORDS];
370 struct arm_smmu_queue *q = &smmu->cmdq.q;
371 u32 cons = readl_relaxed(q->cons_reg);
372 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
373 struct arm_smmu_cmdq_ent cmd_sync = {
374 .opcode = CMDQ_OP_CMD_SYNC,
377 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
378 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
381 case CMDQ_ERR_CERROR_ABT_IDX:
382 dev_err(smmu->dev, "retrying command fetch\n");
383 case CMDQ_ERR_CERROR_NONE_IDX:
385 case CMDQ_ERR_CERROR_ATC_INV_IDX:
387 * ATC Invalidation Completion timeout. CONS is still pointing
388 * at the CMD_SYNC. Attempt to complete other pending commands
389 * by repeating the CMD_SYNC, though we might well end up back
390 * here since the ATC invalidation may still be pending.
393 case CMDQ_ERR_CERROR_ILL_IDX:
399 * We may have concurrent producers, so we need to be careful
400 * not to touch any of the shadow cmdq state.
402 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
403 dev_err(smmu->dev, "skipping command in error state:\n");
404 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
405 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
407 /* Convert the erroneous command into a CMD_SYNC */
408 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
409 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
413 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
417 * Command queue locking.
418 * This is a form of bastardised rwlock with the following major changes:
420 * - The only LOCK routines are exclusive_trylock() and shared_lock().
421 * Neither have barrier semantics, and instead provide only a control
424 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
425 * fails if the caller appears to be the last lock holder (yes, this is
426 * racy). All successful UNLOCK routines have RELEASE semantics.
428 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
433 * We can try to avoid the cmpxchg() loop by simply incrementing the
434 * lock counter. When held in exclusive state, the lock counter is set
435 * to INT_MIN so these increments won't hurt as the value will remain
438 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
442 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
443 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
446 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
448 (void)atomic_dec_return_release(&cmdq->lock);
451 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
453 if (atomic_read(&cmdq->lock) == 1)
456 arm_smmu_cmdq_shared_unlock(cmdq);
460 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
463 local_irq_save(flags); \
464 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
466 local_irq_restore(flags); \
470 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
472 atomic_set_release(&cmdq->lock, 0); \
473 local_irq_restore(flags); \
478 * Command queue insertion.
479 * This is made fiddly by our attempts to achieve some sort of scalability
480 * since there is one queue shared amongst all of the CPUs in the system. If
481 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
482 * then you'll *love* this monstrosity.
484 * The basic idea is to split the queue up into ranges of commands that are
485 * owned by a given CPU; the owner may not have written all of the commands
486 * itself, but is responsible for advancing the hardware prod pointer when
487 * the time comes. The algorithm is roughly:
489 * 1. Allocate some space in the queue. At this point we also discover
490 * whether the head of the queue is currently owned by another CPU,
491 * or whether we are the owner.
493 * 2. Write our commands into our allocated slots in the queue.
495 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
497 * 4. If we are an owner:
498 * a. Wait for the previous owner to finish.
499 * b. Mark the queue head as unowned, which tells us the range
500 * that we are responsible for publishing.
501 * c. Wait for all commands in our owned range to become valid.
502 * d. Advance the hardware prod pointer.
503 * e. Tell the next owner we've finished.
505 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
506 * owner), then we need to stick around until it has completed:
507 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
508 * to clear the first 4 bytes.
509 * b. Otherwise, we spin waiting for the hardware cons pointer to
510 * advance past our command.
512 * The devil is in the details, particularly the use of locking for handling
513 * SYNC completion and freeing up space in the queue before we think that it is
516 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
517 u32 sprod, u32 eprod, bool set)
519 u32 swidx, sbidx, ewidx, ebidx;
520 struct arm_smmu_ll_queue llq = {
521 .max_n_shift = cmdq->q.llq.max_n_shift,
525 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
526 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
528 while (llq.prod != eprod) {
531 u32 limit = BITS_PER_LONG;
533 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
534 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
536 ptr = &cmdq->valid_map[swidx];
538 if ((swidx == ewidx) && (sbidx < ebidx))
541 mask = GENMASK(limit - 1, sbidx);
544 * The valid bit is the inverse of the wrap bit. This means
545 * that a zero-initialised queue is invalid and, after marking
546 * all entries as valid, they become invalid again when we
550 atomic_long_xor(mask, ptr);
554 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
555 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
558 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
562 /* Mark all entries in the range [sprod, eprod) as valid */
563 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
564 u32 sprod, u32 eprod)
566 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
569 /* Wait for all entries in the range [sprod, eprod) to become valid */
570 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
571 u32 sprod, u32 eprod)
573 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
576 /* Wait for the command queue to become non-full */
577 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
578 struct arm_smmu_ll_queue *llq)
581 struct arm_smmu_queue_poll qp;
582 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
586 * Try to update our copy of cons by grabbing exclusive cmdq access. If
587 * that fails, spin until somebody else updates it for us.
589 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
590 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
591 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
592 llq->val = READ_ONCE(cmdq->q.llq.val);
596 queue_poll_init(smmu, &qp);
598 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
599 if (!queue_full(llq))
602 ret = queue_poll(&qp);
609 * Wait until the SMMU signals a CMD_SYNC completion MSI.
610 * Must be called with the cmdq lock held in some capacity.
612 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
613 struct arm_smmu_ll_queue *llq)
616 struct arm_smmu_queue_poll qp;
617 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
618 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
620 queue_poll_init(smmu, &qp);
623 * The MSI won't generate an event, since it's being written back
624 * into the command queue.
627 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
628 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
633 * Wait until the SMMU cons index passes llq->prod.
634 * Must be called with the cmdq lock held in some capacity.
636 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
637 struct arm_smmu_ll_queue *llq)
639 struct arm_smmu_queue_poll qp;
640 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
641 u32 prod = llq->prod;
644 queue_poll_init(smmu, &qp);
645 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
647 if (queue_consumed(llq, prod))
650 ret = queue_poll(&qp);
653 * This needs to be a readl() so that our subsequent call
654 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
656 * Specifically, we need to ensure that we observe all
657 * shared_lock()s by other CMD_SYNCs that share our owner,
658 * so that a failing call to tryunlock() means that we're
659 * the last one out and therefore we can safely advance
660 * cmdq->q.llq.cons. Roughly speaking:
662 * CPU 0 CPU1 CPU2 (us)
672 * <control dependency>
678 * Requires us to see CPU 0's shared_lock() acquisition.
680 llq->cons = readl(cmdq->q.cons_reg);
686 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
687 struct arm_smmu_ll_queue *llq)
689 if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
690 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
692 return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
695 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
699 struct arm_smmu_ll_queue llq = {
700 .max_n_shift = cmdq->q.llq.max_n_shift,
704 for (i = 0; i < n; ++i) {
705 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
707 prod = queue_inc_prod_n(&llq, i);
708 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
713 * This is the actual insertion function, and provides the following
714 * ordering guarantees to callers:
716 * - There is a dma_wmb() before publishing any commands to the queue.
717 * This can be relied upon to order prior writes to data structures
718 * in memory (such as a CD or an STE) before the command.
720 * - On completion of a CMD_SYNC, there is a control dependency.
721 * This can be relied upon to order subsequent writes to memory (e.g.
722 * freeing an IOVA) after completion of the CMD_SYNC.
724 * - Command insertion is totally ordered, so if two CPUs each race to
725 * insert their own list of commands then all of the commands from one
726 * CPU will appear before any of the commands from the other CPU.
728 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
729 u64 *cmds, int n, bool sync)
731 u64 cmd_sync[CMDQ_ENT_DWORDS];
735 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
736 struct arm_smmu_ll_queue llq = {
737 .max_n_shift = cmdq->q.llq.max_n_shift,
741 /* 1. Allocate some space in the queue */
742 local_irq_save(flags);
743 llq.val = READ_ONCE(cmdq->q.llq.val);
747 while (!queue_has_space(&llq, n + sync)) {
748 local_irq_restore(flags);
749 if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
750 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
751 local_irq_save(flags);
754 head.cons = llq.cons;
755 head.prod = queue_inc_prod_n(&llq, n + sync) |
756 CMDQ_PROD_OWNED_FLAG;
758 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
764 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
765 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
766 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
769 * 2. Write our commands into the queue
770 * Dependency ordering from the cmpxchg() loop above.
772 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
774 prod = queue_inc_prod_n(&llq, n);
775 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
776 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
779 * In order to determine completion of our CMD_SYNC, we must
780 * ensure that the queue can't wrap twice without us noticing.
781 * We achieve that by taking the cmdq lock as shared before
782 * marking our slot as valid.
784 arm_smmu_cmdq_shared_lock(cmdq);
787 /* 3. Mark our slots as valid, ensuring commands are visible first */
789 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
791 /* 4. If we are the owner, take control of the SMMU hardware */
793 /* a. Wait for previous owner to finish */
794 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
796 /* b. Stop gathering work by clearing the owned flag */
797 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
798 &cmdq->q.llq.atomic.prod);
799 prod &= ~CMDQ_PROD_OWNED_FLAG;
802 * c. Wait for any gathered work to be written to the queue.
803 * Note that we read our own entries so that we have the control
804 * dependency required by (d).
806 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
809 * d. Advance the hardware prod pointer
810 * Control dependency ordering from the entries becoming valid.
812 writel_relaxed(prod, cmdq->q.prod_reg);
815 * e. Tell the next owner we're done
816 * Make sure we've updated the hardware first, so that we don't
817 * race to update prod and potentially move it backwards.
819 atomic_set_release(&cmdq->owner_prod, prod);
822 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
824 llq.prod = queue_inc_prod_n(&llq, n);
825 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
827 dev_err_ratelimited(smmu->dev,
828 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
830 readl_relaxed(cmdq->q.prod_reg),
831 readl_relaxed(cmdq->q.cons_reg));
835 * Try to unlock the cmdq lock. This will fail if we're the last
836 * reader, in which case we can safely update cmdq->q.llq.cons
838 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
839 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
840 arm_smmu_cmdq_shared_unlock(cmdq);
844 local_irq_restore(flags);
848 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
849 struct arm_smmu_cmdq_ent *ent)
851 u64 cmd[CMDQ_ENT_DWORDS];
853 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
854 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
859 return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
862 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
864 return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
867 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
868 struct arm_smmu_cmdq_batch *cmds,
869 struct arm_smmu_cmdq_ent *cmd)
871 if (cmds->num == CMDQ_BATCH_ENTRIES) {
872 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
875 arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
879 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
880 struct arm_smmu_cmdq_batch *cmds)
882 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
885 static int arm_smmu_page_response(struct device *dev,
886 struct iommu_fault_event *unused,
887 struct iommu_page_response *resp)
889 struct arm_smmu_cmdq_ent cmd = {0};
890 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
891 int sid = master->streams[0].id;
893 if (master->stall_enabled) {
894 cmd.opcode = CMDQ_OP_RESUME;
895 cmd.resume.sid = sid;
896 cmd.resume.stag = resp->grpid;
897 switch (resp->code) {
898 case IOMMU_PAGE_RESP_INVALID:
899 case IOMMU_PAGE_RESP_FAILURE:
900 cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
902 case IOMMU_PAGE_RESP_SUCCESS:
903 cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
912 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
914 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
915 * RESUME consumption guarantees that the stalled transaction will be
916 * terminated... at some point in the future. PRI_RESP is fire and
923 /* Context descriptor manipulation functions */
924 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
926 struct arm_smmu_cmdq_ent cmd = {
927 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
928 CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
932 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
933 arm_smmu_cmdq_issue_sync(smmu);
936 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
941 struct arm_smmu_master *master;
942 struct arm_smmu_cmdq_batch cmds = {};
943 struct arm_smmu_device *smmu = smmu_domain->smmu;
944 struct arm_smmu_cmdq_ent cmd = {
945 .opcode = CMDQ_OP_CFGI_CD,
952 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
953 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
954 for (i = 0; i < master->num_streams; i++) {
955 cmd.cfgi.sid = master->streams[i].id;
956 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
959 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
961 arm_smmu_cmdq_batch_submit(smmu, &cmds);
964 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
965 struct arm_smmu_l1_ctx_desc *l1_desc)
967 size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
969 l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
970 &l1_desc->l2ptr_dma, GFP_KERNEL);
971 if (!l1_desc->l2ptr) {
973 "failed to allocate context descriptor table\n");
979 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
980 struct arm_smmu_l1_ctx_desc *l1_desc)
982 u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
985 /* See comment in arm_smmu_write_ctx_desc() */
986 WRITE_ONCE(*dst, cpu_to_le64(val));
989 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
994 struct arm_smmu_l1_ctx_desc *l1_desc;
995 struct arm_smmu_device *smmu = smmu_domain->smmu;
996 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
998 if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
999 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1001 idx = ssid >> CTXDESC_SPLIT;
1002 l1_desc = &cdcfg->l1_desc[idx];
1003 if (!l1_desc->l2ptr) {
1004 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1007 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1008 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1009 /* An invalid L1CD can be cached */
1010 arm_smmu_sync_cd(smmu_domain, ssid, false);
1012 idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1013 return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1016 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1017 struct arm_smmu_ctx_desc *cd)
1020 * This function handles the following cases:
1022 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1023 * (2) Install a secondary CD, for SID+SSID traffic.
1024 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1025 * CD, then invalidate the old entry and mappings.
1026 * (4) Quiesce the context without clearing the valid bit. Disable
1027 * translation, and ignore any translation fault.
1028 * (5) Remove a secondary CD.
1034 if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1037 cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1041 val = le64_to_cpu(cdptr[0]);
1042 cd_live = !!(val & CTXDESC_CD_0_V);
1044 if (!cd) { /* (5) */
1046 } else if (cd == &quiet_cd) { /* (4) */
1047 val |= CTXDESC_CD_0_TCR_EPD0;
1048 } else if (cd_live) { /* (3) */
1049 val &= ~CTXDESC_CD_0_ASID;
1050 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1052 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1053 * this substream's traffic
1055 } else { /* (1) and (2) */
1056 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1058 cdptr[3] = cpu_to_le64(cd->mair);
1061 * STE is live, and the SMMU might read dwords of this CD in any
1062 * order. Ensure that it observes valid values before reading
1065 arm_smmu_sync_cd(smmu_domain, ssid, true);
1071 CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1072 (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1074 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1077 if (smmu_domain->stall_enabled)
1078 val |= CTXDESC_CD_0_S;
1082 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1083 * "Configuration structures and configuration invalidation completion"
1085 * The size of single-copy atomic reads made by the SMMU is
1086 * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1087 * field within an aligned 64-bit span of a structure can be altered
1088 * without first making the structure invalid.
1090 WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1091 arm_smmu_sync_cd(smmu_domain, ssid, true);
1095 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1099 size_t max_contexts;
1100 struct arm_smmu_device *smmu = smmu_domain->smmu;
1101 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1102 struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1104 max_contexts = 1 << cfg->s1cdmax;
1106 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1107 max_contexts <= CTXDESC_L2_ENTRIES) {
1108 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1109 cdcfg->num_l1_ents = max_contexts;
1111 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1113 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1114 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1115 CTXDESC_L2_ENTRIES);
1117 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1118 sizeof(*cdcfg->l1_desc),
1120 if (!cdcfg->l1_desc)
1123 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1126 cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1128 if (!cdcfg->cdtab) {
1129 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1137 if (cdcfg->l1_desc) {
1138 devm_kfree(smmu->dev, cdcfg->l1_desc);
1139 cdcfg->l1_desc = NULL;
1144 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1147 size_t size, l1size;
1148 struct arm_smmu_device *smmu = smmu_domain->smmu;
1149 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1151 if (cdcfg->l1_desc) {
1152 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1154 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1155 if (!cdcfg->l1_desc[i].l2ptr)
1158 dmam_free_coherent(smmu->dev, size,
1159 cdcfg->l1_desc[i].l2ptr,
1160 cdcfg->l1_desc[i].l2ptr_dma);
1162 devm_kfree(smmu->dev, cdcfg->l1_desc);
1163 cdcfg->l1_desc = NULL;
1165 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1167 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1170 dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1171 cdcfg->cdtab_dma = 0;
1172 cdcfg->cdtab = NULL;
1175 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1178 struct arm_smmu_ctx_desc *old_cd;
1183 free = refcount_dec_and_test(&cd->refs);
1185 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1186 WARN_ON(old_cd != cd);
1191 /* Stream table manipulation functions */
1193 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1197 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1198 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1200 /* See comment in arm_smmu_write_ctx_desc() */
1201 WRITE_ONCE(*dst, cpu_to_le64(val));
1204 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1206 struct arm_smmu_cmdq_ent cmd = {
1207 .opcode = CMDQ_OP_CFGI_STE,
1214 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1215 arm_smmu_cmdq_issue_sync(smmu);
1218 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1222 * This is hideously complicated, but we only really care about
1223 * three cases at the moment:
1225 * 1. Invalid (all zero) -> bypass/fault (init)
1226 * 2. Bypass/fault -> translation/bypass (attach)
1227 * 3. Translation/bypass -> bypass/fault (detach)
1229 * Given that we can't update the STE atomically and the SMMU
1230 * doesn't read the thing in a defined order, that leaves us
1231 * with the following maintenance requirements:
1233 * 1. Update Config, return (init time STEs aren't live)
1234 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1235 * 3. Update Config, sync
1237 u64 val = le64_to_cpu(dst[0]);
1238 bool ste_live = false;
1239 struct arm_smmu_device *smmu = NULL;
1240 struct arm_smmu_s1_cfg *s1_cfg = NULL;
1241 struct arm_smmu_s2_cfg *s2_cfg = NULL;
1242 struct arm_smmu_domain *smmu_domain = NULL;
1243 struct arm_smmu_cmdq_ent prefetch_cmd = {
1244 .opcode = CMDQ_OP_PREFETCH_CFG,
1251 smmu_domain = master->domain;
1252 smmu = master->smmu;
1256 switch (smmu_domain->stage) {
1257 case ARM_SMMU_DOMAIN_S1:
1258 s1_cfg = &smmu_domain->s1_cfg;
1260 case ARM_SMMU_DOMAIN_S2:
1261 case ARM_SMMU_DOMAIN_NESTED:
1262 s2_cfg = &smmu_domain->s2_cfg;
1269 if (val & STRTAB_STE_0_V) {
1270 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1271 case STRTAB_STE_0_CFG_BYPASS:
1273 case STRTAB_STE_0_CFG_S1_TRANS:
1274 case STRTAB_STE_0_CFG_S2_TRANS:
1277 case STRTAB_STE_0_CFG_ABORT:
1278 BUG_ON(!disable_bypass);
1281 BUG(); /* STE corruption */
1285 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1286 val = STRTAB_STE_0_V;
1289 if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1290 if (!smmu_domain && disable_bypass)
1291 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1293 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1295 dst[0] = cpu_to_le64(val);
1296 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1297 STRTAB_STE_1_SHCFG_INCOMING));
1298 dst[2] = 0; /* Nuke the VMID */
1300 * The SMMU can perform negative caching, so we must sync
1301 * the STE regardless of whether the old value was live.
1304 arm_smmu_sync_ste_for_sid(smmu, sid);
1309 u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1310 STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1313 dst[1] = cpu_to_le64(
1314 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1315 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1316 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1317 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1318 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1320 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1321 !master->stall_enabled)
1322 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1324 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1325 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1326 FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1327 FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1332 dst[2] = cpu_to_le64(
1333 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1334 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1336 STRTAB_STE_2_S2ENDI |
1338 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1341 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1343 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1346 if (master->ats_enabled)
1347 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1348 STRTAB_STE_1_EATS_TRANS));
1350 arm_smmu_sync_ste_for_sid(smmu, sid);
1351 /* See comment in arm_smmu_write_ctx_desc() */
1352 WRITE_ONCE(dst[0], cpu_to_le64(val));
1353 arm_smmu_sync_ste_for_sid(smmu, sid);
1355 /* It's likely that we'll want to use the new STE soon */
1356 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1357 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1360 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1364 for (i = 0; i < nent; ++i) {
1365 arm_smmu_write_strtab_ent(NULL, -1, strtab);
1366 strtab += STRTAB_STE_DWORDS;
1370 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1374 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1375 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1380 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1381 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1383 desc->span = STRTAB_SPLIT + 1;
1384 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1388 "failed to allocate l2 stream table for SID %u\n",
1393 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1394 arm_smmu_write_strtab_l1_desc(strtab, desc);
1398 static struct arm_smmu_master *
1399 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1401 struct rb_node *node;
1402 struct arm_smmu_stream *stream;
1404 lockdep_assert_held(&smmu->streams_mutex);
1406 node = smmu->streams.rb_node;
1408 stream = rb_entry(node, struct arm_smmu_stream, node);
1409 if (stream->id < sid)
1410 node = node->rb_right;
1411 else if (stream->id > sid)
1412 node = node->rb_left;
1414 return stream->master;
1420 /* IRQ and event handlers */
1421 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1426 struct arm_smmu_master *master;
1427 bool ssid_valid = evt[0] & EVTQ_0_SSV;
1428 u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1429 struct iommu_fault_event fault_evt = { };
1430 struct iommu_fault *flt = &fault_evt.fault;
1432 switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1433 case EVT_ID_TRANSLATION_FAULT:
1434 reason = IOMMU_FAULT_REASON_PTE_FETCH;
1436 case EVT_ID_ADDR_SIZE_FAULT:
1437 reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1439 case EVT_ID_ACCESS_FAULT:
1440 reason = IOMMU_FAULT_REASON_ACCESS;
1442 case EVT_ID_PERMISSION_FAULT:
1443 reason = IOMMU_FAULT_REASON_PERMISSION;
1449 /* Stage-2 is always pinned at the moment */
1450 if (evt[1] & EVTQ_1_S2)
1453 if (evt[1] & EVTQ_1_RnW)
1454 perm |= IOMMU_FAULT_PERM_READ;
1456 perm |= IOMMU_FAULT_PERM_WRITE;
1458 if (evt[1] & EVTQ_1_InD)
1459 perm |= IOMMU_FAULT_PERM_EXEC;
1461 if (evt[1] & EVTQ_1_PnU)
1462 perm |= IOMMU_FAULT_PERM_PRIV;
1464 if (evt[1] & EVTQ_1_STALL) {
1465 flt->type = IOMMU_FAULT_PAGE_REQ;
1466 flt->prm = (struct iommu_fault_page_request) {
1467 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1468 .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1470 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1474 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1475 flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1478 flt->type = IOMMU_FAULT_DMA_UNRECOV;
1479 flt->event = (struct iommu_fault_unrecoverable) {
1481 .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1483 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1487 flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1488 flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1492 mutex_lock(&smmu->streams_mutex);
1493 master = arm_smmu_find_master(smmu, sid);
1499 ret = iommu_report_device_fault(master->dev, &fault_evt);
1500 if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1501 /* Nobody cared, abort the access */
1502 struct iommu_page_response resp = {
1503 .pasid = flt->prm.pasid,
1504 .grpid = flt->prm.grpid,
1505 .code = IOMMU_PAGE_RESP_FAILURE,
1507 arm_smmu_page_response(master->dev, &fault_evt, &resp);
1511 mutex_unlock(&smmu->streams_mutex);
1515 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1518 struct arm_smmu_device *smmu = dev;
1519 struct arm_smmu_queue *q = &smmu->evtq.q;
1520 struct arm_smmu_ll_queue *llq = &q->llq;
1521 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1522 DEFAULT_RATELIMIT_BURST);
1523 u64 evt[EVTQ_ENT_DWORDS];
1526 while (!queue_remove_raw(q, evt)) {
1527 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1529 ret = arm_smmu_handle_evt(smmu, evt);
1530 if (!ret || !__ratelimit(&rs))
1533 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1534 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1535 dev_info(smmu->dev, "\t0x%016llx\n",
1536 (unsigned long long)evt[i]);
1541 * Not much we can do on overflow, so scream and pretend we're
1544 if (queue_sync_prod_in(q) == -EOVERFLOW)
1545 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1546 } while (!queue_empty(llq));
1548 /* Sync our overflow flag, as we believe we're up to speed */
1549 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1550 Q_IDX(llq, llq->cons);
1554 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1560 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1561 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1562 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1563 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1564 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1566 dev_info(smmu->dev, "unexpected PRI request received:\n");
1568 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1569 sid, ssid, grpid, last ? "L" : "",
1570 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1571 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1572 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1573 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1574 evt[1] & PRIQ_1_ADDR_MASK);
1577 struct arm_smmu_cmdq_ent cmd = {
1578 .opcode = CMDQ_OP_PRI_RESP,
1579 .substream_valid = ssv,
1584 .resp = PRI_RESP_DENY,
1588 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1592 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1594 struct arm_smmu_device *smmu = dev;
1595 struct arm_smmu_queue *q = &smmu->priq.q;
1596 struct arm_smmu_ll_queue *llq = &q->llq;
1597 u64 evt[PRIQ_ENT_DWORDS];
1600 while (!queue_remove_raw(q, evt))
1601 arm_smmu_handle_ppr(smmu, evt);
1603 if (queue_sync_prod_in(q) == -EOVERFLOW)
1604 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1605 } while (!queue_empty(llq));
1607 /* Sync our overflow flag, as we believe we're up to speed */
1608 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1609 Q_IDX(llq, llq->cons);
1610 queue_sync_cons_out(q);
1614 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1616 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1618 u32 gerror, gerrorn, active;
1619 struct arm_smmu_device *smmu = dev;
1621 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1622 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1624 active = gerror ^ gerrorn;
1625 if (!(active & GERROR_ERR_MASK))
1626 return IRQ_NONE; /* No errors pending */
1629 "unexpected global error reported (0x%08x), this could be serious\n",
1632 if (active & GERROR_SFM_ERR) {
1633 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1634 arm_smmu_device_disable(smmu);
1637 if (active & GERROR_MSI_GERROR_ABT_ERR)
1638 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1640 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1641 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1643 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1644 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1646 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1647 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1649 if (active & GERROR_PRIQ_ABT_ERR)
1650 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1652 if (active & GERROR_EVTQ_ABT_ERR)
1653 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1655 if (active & GERROR_CMDQ_ERR)
1656 arm_smmu_cmdq_skip_err(smmu);
1658 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1662 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1664 struct arm_smmu_device *smmu = dev;
1666 arm_smmu_evtq_thread(irq, dev);
1667 if (smmu->features & ARM_SMMU_FEAT_PRI)
1668 arm_smmu_priq_thread(irq, dev);
1673 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1675 arm_smmu_gerror_handler(irq, dev);
1676 return IRQ_WAKE_THREAD;
1680 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1681 struct arm_smmu_cmdq_ent *cmd)
1685 /* ATC invalidates are always on 4096-bytes pages */
1686 size_t inval_grain_shift = 12;
1687 unsigned long page_start, page_end;
1692 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1693 * prefix. In that case all ATC entries within the address range are
1694 * invalidated, including those that were requested with a PASID! There
1695 * is no way to invalidate only entries without PASID.
1697 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1698 * traffic), translation requests without PASID create ATC entries
1699 * without PASID, which must be invalidated with substream_valid clear.
1700 * This has the unpleasant side-effect of invalidating all PASID-tagged
1701 * ATC entries within the address range.
1703 *cmd = (struct arm_smmu_cmdq_ent) {
1704 .opcode = CMDQ_OP_ATC_INV,
1705 .substream_valid = !!ssid,
1710 cmd->atc.size = ATC_INV_SIZE_ALL;
1714 page_start = iova >> inval_grain_shift;
1715 page_end = (iova + size - 1) >> inval_grain_shift;
1718 * In an ATS Invalidate Request, the address must be aligned on the
1719 * range size, which must be a power of two number of page sizes. We
1720 * thus have to choose between grossly over-invalidating the region, or
1721 * splitting the invalidation into multiple commands. For simplicity
1722 * we'll go with the first solution, but should refine it in the future
1723 * if multiple commands are shown to be more efficient.
1725 * Find the smallest power of two that covers the range. The most
1726 * significant differing bit between the start and end addresses,
1727 * fls(start ^ end), indicates the required span. For example:
1729 * We want to invalidate pages [8; 11]. This is already the ideal range:
1730 * x = 0b1000 ^ 0b1011 = 0b11
1731 * span = 1 << fls(x) = 4
1733 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1734 * x = 0b0111 ^ 0b1010 = 0b1101
1735 * span = 1 << fls(x) = 16
1737 log2_span = fls_long(page_start ^ page_end);
1738 span_mask = (1ULL << log2_span) - 1;
1740 page_start &= ~span_mask;
1742 cmd->atc.addr = page_start << inval_grain_shift;
1743 cmd->atc.size = log2_span;
1746 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1749 struct arm_smmu_cmdq_ent cmd;
1751 arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1753 for (i = 0; i < master->num_streams; i++) {
1754 cmd.atc.sid = master->streams[i].id;
1755 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1758 return arm_smmu_cmdq_issue_sync(master->smmu);
1761 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1762 unsigned long iova, size_t size)
1765 unsigned long flags;
1766 struct arm_smmu_cmdq_ent cmd;
1767 struct arm_smmu_master *master;
1768 struct arm_smmu_cmdq_batch cmds = {};
1770 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1774 * Ensure that we've completed prior invalidation of the main TLBs
1775 * before we read 'nr_ats_masters' in case of a concurrent call to
1776 * arm_smmu_enable_ats():
1778 * // unmap() // arm_smmu_enable_ats()
1779 * TLBI+SYNC atomic_inc(&nr_ats_masters);
1781 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
1783 * Ensures that we always see the incremented 'nr_ats_masters' count if
1784 * ATS was enabled at the PCI device before completion of the TLBI.
1787 if (!atomic_read(&smmu_domain->nr_ats_masters))
1790 arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1792 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1793 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1794 if (!master->ats_enabled)
1797 for (i = 0; i < master->num_streams; i++) {
1798 cmd.atc.sid = master->streams[i].id;
1799 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1802 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1804 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1807 /* IO_PGTABLE API */
1808 static void arm_smmu_tlb_inv_context(void *cookie)
1810 struct arm_smmu_domain *smmu_domain = cookie;
1811 struct arm_smmu_device *smmu = smmu_domain->smmu;
1812 struct arm_smmu_cmdq_ent cmd;
1815 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1816 * PTEs previously cleared by unmaps on the current CPU not yet visible
1817 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1818 * insertion to guarantee those are observed before the TLBI. Do be
1821 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1822 arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1824 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1825 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1826 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1827 arm_smmu_cmdq_issue_sync(smmu);
1829 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1832 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1833 unsigned long iova, size_t size,
1835 struct arm_smmu_domain *smmu_domain)
1837 struct arm_smmu_device *smmu = smmu_domain->smmu;
1838 unsigned long end = iova + size, num_pages = 0, tg = 0;
1839 size_t inv_range = granule;
1840 struct arm_smmu_cmdq_batch cmds = {};
1845 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1846 /* Get the leaf page size */
1847 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1849 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1850 cmd->tlbi.tg = (tg - 10) / 2;
1852 /* Determine what level the granule is at */
1853 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1855 num_pages = size >> tg;
1858 while (iova < end) {
1859 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1861 * On each iteration of the loop, the range is 5 bits
1862 * worth of the aligned size remaining.
1863 * The range in pages is:
1865 * range = (num_pages & (0x1f << __ffs(num_pages)))
1867 unsigned long scale, num;
1869 /* Determine the power of 2 multiple number of pages */
1870 scale = __ffs(num_pages);
1871 cmd->tlbi.scale = scale;
1873 /* Determine how many chunks of 2^scale size we have */
1874 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1875 cmd->tlbi.num = num - 1;
1877 /* range is num * 2^scale * pgsize */
1878 inv_range = num << (scale + tg);
1880 /* Clear out the lower order bits for the next iteration */
1881 num_pages -= num << scale;
1884 cmd->tlbi.addr = iova;
1885 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1888 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1891 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1892 size_t granule, bool leaf,
1893 struct arm_smmu_domain *smmu_domain)
1895 struct arm_smmu_cmdq_ent cmd = {
1901 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1902 cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1903 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1904 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1906 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1907 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1909 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1912 * Unfortunately, this can't be leaf-only since we may have
1913 * zapped an entire table.
1915 arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1918 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1919 size_t granule, bool leaf,
1920 struct arm_smmu_domain *smmu_domain)
1922 struct arm_smmu_cmdq_ent cmd = {
1923 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1924 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1931 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1934 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1935 unsigned long iova, size_t granule,
1938 struct arm_smmu_domain *smmu_domain = cookie;
1939 struct iommu_domain *domain = &smmu_domain->domain;
1941 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1944 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1945 size_t granule, void *cookie)
1947 arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1950 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1951 .tlb_flush_all = arm_smmu_tlb_inv_context,
1952 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1953 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
1957 static bool arm_smmu_capable(enum iommu_cap cap)
1960 case IOMMU_CAP_CACHE_COHERENCY:
1962 case IOMMU_CAP_NOEXEC:
1969 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1971 struct arm_smmu_domain *smmu_domain;
1973 if (type != IOMMU_DOMAIN_UNMANAGED &&
1974 type != IOMMU_DOMAIN_DMA &&
1975 type != IOMMU_DOMAIN_IDENTITY)
1979 * Allocate the domain and initialise some of its data structures.
1980 * We can't really do anything meaningful until we've added a
1983 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1987 if (type == IOMMU_DOMAIN_DMA &&
1988 iommu_get_dma_cookie(&smmu_domain->domain)) {
1993 mutex_init(&smmu_domain->init_mutex);
1994 INIT_LIST_HEAD(&smmu_domain->devices);
1995 spin_lock_init(&smmu_domain->devices_lock);
1996 INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
1998 return &smmu_domain->domain;
2001 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2003 int idx, size = 1 << span;
2006 idx = find_first_zero_bit(map, size);
2009 } while (test_and_set_bit(idx, map));
2014 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2016 clear_bit(idx, map);
2019 static void arm_smmu_domain_free(struct iommu_domain *domain)
2021 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2022 struct arm_smmu_device *smmu = smmu_domain->smmu;
2024 iommu_put_dma_cookie(domain);
2025 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2027 /* Free the CD and ASID, if we allocated them */
2028 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2029 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2031 /* Prevent SVA from touching the CD while we're freeing it */
2032 mutex_lock(&arm_smmu_asid_lock);
2033 if (cfg->cdcfg.cdtab)
2034 arm_smmu_free_cd_tables(smmu_domain);
2035 arm_smmu_free_asid(&cfg->cd);
2036 mutex_unlock(&arm_smmu_asid_lock);
2038 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2040 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2046 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2047 struct arm_smmu_master *master,
2048 struct io_pgtable_cfg *pgtbl_cfg)
2052 struct arm_smmu_device *smmu = smmu_domain->smmu;
2053 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2054 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2056 refcount_set(&cfg->cd.refs, 1);
2058 /* Prevent SVA from modifying the ASID until it is written to the CD */
2059 mutex_lock(&arm_smmu_asid_lock);
2060 ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2061 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2065 cfg->s1cdmax = master->ssid_bits;
2067 smmu_domain->stall_enabled = master->stall_enabled;
2069 ret = arm_smmu_alloc_cd_tables(smmu_domain);
2073 cfg->cd.asid = (u16)asid;
2074 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2075 cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2076 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2077 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2078 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2079 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2080 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2081 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2082 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2085 * Note that this will end up calling arm_smmu_sync_cd() before
2086 * the master has been added to the devices list for this domain.
2087 * This isn't an issue because the STE hasn't been installed yet.
2089 ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2091 goto out_free_cd_tables;
2093 mutex_unlock(&arm_smmu_asid_lock);
2097 arm_smmu_free_cd_tables(smmu_domain);
2099 arm_smmu_free_asid(&cfg->cd);
2101 mutex_unlock(&arm_smmu_asid_lock);
2105 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2106 struct arm_smmu_master *master,
2107 struct io_pgtable_cfg *pgtbl_cfg)
2110 struct arm_smmu_device *smmu = smmu_domain->smmu;
2111 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2112 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2114 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2118 vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2119 cfg->vmid = (u16)vmid;
2120 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2121 cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2122 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2123 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2124 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2125 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2126 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2127 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2131 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2132 struct arm_smmu_master *master)
2135 unsigned long ias, oas;
2136 enum io_pgtable_fmt fmt;
2137 struct io_pgtable_cfg pgtbl_cfg;
2138 struct io_pgtable_ops *pgtbl_ops;
2139 int (*finalise_stage_fn)(struct arm_smmu_domain *,
2140 struct arm_smmu_master *,
2141 struct io_pgtable_cfg *);
2142 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2143 struct arm_smmu_device *smmu = smmu_domain->smmu;
2145 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2146 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2150 /* Restrict the stage to what we can actually support */
2151 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2152 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2153 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2154 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2156 switch (smmu_domain->stage) {
2157 case ARM_SMMU_DOMAIN_S1:
2158 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2159 ias = min_t(unsigned long, ias, VA_BITS);
2161 fmt = ARM_64_LPAE_S1;
2162 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2164 case ARM_SMMU_DOMAIN_NESTED:
2165 case ARM_SMMU_DOMAIN_S2:
2168 fmt = ARM_64_LPAE_S2;
2169 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2175 pgtbl_cfg = (struct io_pgtable_cfg) {
2176 .pgsize_bitmap = smmu->pgsize_bitmap,
2179 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2180 .tlb = &arm_smmu_flush_ops,
2181 .iommu_dev = smmu->dev,
2184 if (!iommu_get_dma_strict(domain))
2185 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2187 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2191 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2192 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2193 domain->geometry.force_aperture = true;
2195 ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2197 free_io_pgtable_ops(pgtbl_ops);
2201 smmu_domain->pgtbl_ops = pgtbl_ops;
2205 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2208 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2210 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2211 struct arm_smmu_strtab_l1_desc *l1_desc;
2214 /* Two-level walk */
2215 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2216 l1_desc = &cfg->l1_desc[idx];
2217 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2218 step = &l1_desc->l2ptr[idx];
2220 /* Simple linear lookup */
2221 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2227 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2230 struct arm_smmu_device *smmu = master->smmu;
2232 for (i = 0; i < master->num_streams; ++i) {
2233 u32 sid = master->streams[i].id;
2234 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2236 /* Bridged PCI devices may end up with duplicated IDs */
2237 for (j = 0; j < i; j++)
2238 if (master->streams[j].id == sid)
2243 arm_smmu_write_strtab_ent(master, sid, step);
2247 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2249 struct device *dev = master->dev;
2250 struct arm_smmu_device *smmu = master->smmu;
2251 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2253 if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2256 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2259 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2262 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2265 struct pci_dev *pdev;
2266 struct arm_smmu_device *smmu = master->smmu;
2267 struct arm_smmu_domain *smmu_domain = master->domain;
2269 /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2270 if (!master->ats_enabled)
2273 /* Smallest Translation Unit: log2 of the smallest supported granule */
2274 stu = __ffs(smmu->pgsize_bitmap);
2275 pdev = to_pci_dev(master->dev);
2277 atomic_inc(&smmu_domain->nr_ats_masters);
2278 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2279 if (pci_enable_ats(pdev, stu))
2280 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2283 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2285 struct arm_smmu_domain *smmu_domain = master->domain;
2287 if (!master->ats_enabled)
2290 pci_disable_ats(to_pci_dev(master->dev));
2292 * Ensure ATS is disabled at the endpoint before we issue the
2293 * ATC invalidation via the SMMU.
2296 arm_smmu_atc_inv_master(master);
2297 atomic_dec(&smmu_domain->nr_ats_masters);
2300 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2305 struct pci_dev *pdev;
2307 if (!dev_is_pci(master->dev))
2310 pdev = to_pci_dev(master->dev);
2312 features = pci_pasid_features(pdev);
2316 num_pasids = pci_max_pasids(pdev);
2317 if (num_pasids <= 0)
2320 ret = pci_enable_pasid(pdev, features);
2322 dev_err(&pdev->dev, "Failed to enable PASID\n");
2326 master->ssid_bits = min_t(u8, ilog2(num_pasids),
2327 master->smmu->ssid_bits);
2331 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2333 struct pci_dev *pdev;
2335 if (!dev_is_pci(master->dev))
2338 pdev = to_pci_dev(master->dev);
2340 if (!pdev->pasid_enabled)
2343 master->ssid_bits = 0;
2344 pci_disable_pasid(pdev);
2347 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2349 unsigned long flags;
2350 struct arm_smmu_domain *smmu_domain = master->domain;
2355 arm_smmu_disable_ats(master);
2357 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2358 list_del(&master->domain_head);
2359 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2361 master->domain = NULL;
2362 master->ats_enabled = false;
2363 arm_smmu_install_ste_for_dev(master);
2366 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2369 unsigned long flags;
2370 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2371 struct arm_smmu_device *smmu;
2372 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2373 struct arm_smmu_master *master;
2378 master = dev_iommu_priv_get(dev);
2379 smmu = master->smmu;
2382 * Checking that SVA is disabled ensures that this device isn't bound to
2383 * any mm, and can be safely detached from its old domain. Bonds cannot
2384 * be removed concurrently since we're holding the group mutex.
2386 if (arm_smmu_master_sva_enabled(master)) {
2387 dev_err(dev, "cannot attach - SVA enabled\n");
2391 arm_smmu_detach_dev(master);
2393 mutex_lock(&smmu_domain->init_mutex);
2395 if (!smmu_domain->smmu) {
2396 smmu_domain->smmu = smmu;
2397 ret = arm_smmu_domain_finalise(domain, master);
2399 smmu_domain->smmu = NULL;
2402 } else if (smmu_domain->smmu != smmu) {
2404 "cannot attach to SMMU %s (upstream of %s)\n",
2405 dev_name(smmu_domain->smmu->dev),
2406 dev_name(smmu->dev));
2409 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2410 master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2412 "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2413 smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2416 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2417 smmu_domain->stall_enabled != master->stall_enabled) {
2418 dev_err(dev, "cannot attach to stall-%s domain\n",
2419 smmu_domain->stall_enabled ? "enabled" : "disabled");
2424 master->domain = smmu_domain;
2426 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2427 master->ats_enabled = arm_smmu_ats_supported(master);
2429 arm_smmu_install_ste_for_dev(master);
2431 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2432 list_add(&master->domain_head, &smmu_domain->devices);
2433 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2435 arm_smmu_enable_ats(master);
2438 mutex_unlock(&smmu_domain->init_mutex);
2442 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2443 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2445 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2450 return ops->map(ops, iova, paddr, size, prot, gfp);
2453 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2454 size_t size, struct iommu_iotlb_gather *gather)
2456 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2457 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2462 return ops->unmap(ops, iova, size, gather);
2465 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2467 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2469 if (smmu_domain->smmu)
2470 arm_smmu_tlb_inv_context(smmu_domain);
2473 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2474 struct iommu_iotlb_gather *gather)
2476 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2478 if (!gather->pgsize)
2481 arm_smmu_tlb_inv_range_domain(gather->start,
2482 gather->end - gather->start + 1,
2483 gather->pgsize, true, smmu_domain);
2487 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2489 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2491 if (domain->type == IOMMU_DOMAIN_IDENTITY)
2497 return ops->iova_to_phys(ops, iova);
2500 static struct platform_driver arm_smmu_driver;
2503 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2505 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2508 return dev ? dev_get_drvdata(dev) : NULL;
2511 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2513 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2515 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2516 limit *= 1UL << STRTAB_SPLIT;
2521 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2522 struct arm_smmu_master *master)
2526 struct arm_smmu_stream *new_stream, *cur_stream;
2527 struct rb_node **new_node, *parent_node = NULL;
2528 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2530 master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2532 if (!master->streams)
2534 master->num_streams = fwspec->num_ids;
2536 mutex_lock(&smmu->streams_mutex);
2537 for (i = 0; i < fwspec->num_ids; i++) {
2538 u32 sid = fwspec->ids[i];
2540 new_stream = &master->streams[i];
2541 new_stream->id = sid;
2542 new_stream->master = master;
2545 * Check the SIDs are in range of the SMMU and our stream table
2547 if (!arm_smmu_sid_in_range(smmu, sid)) {
2552 /* Ensure l2 strtab is initialised */
2553 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2554 ret = arm_smmu_init_l2_strtab(smmu, sid);
2559 /* Insert into SID tree */
2560 new_node = &(smmu->streams.rb_node);
2562 cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2564 parent_node = *new_node;
2565 if (cur_stream->id > new_stream->id) {
2566 new_node = &((*new_node)->rb_left);
2567 } else if (cur_stream->id < new_stream->id) {
2568 new_node = &((*new_node)->rb_right);
2570 dev_warn(master->dev,
2571 "stream %u already in tree\n",
2580 rb_link_node(&new_stream->node, parent_node, new_node);
2581 rb_insert_color(&new_stream->node, &smmu->streams);
2585 for (i--; i >= 0; i--)
2586 rb_erase(&master->streams[i].node, &smmu->streams);
2587 kfree(master->streams);
2589 mutex_unlock(&smmu->streams_mutex);
2594 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2597 struct arm_smmu_device *smmu = master->smmu;
2598 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2600 if (!smmu || !master->streams)
2603 mutex_lock(&smmu->streams_mutex);
2604 for (i = 0; i < fwspec->num_ids; i++)
2605 rb_erase(&master->streams[i].node, &smmu->streams);
2606 mutex_unlock(&smmu->streams_mutex);
2608 kfree(master->streams);
2611 static struct iommu_ops arm_smmu_ops;
2613 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2616 struct arm_smmu_device *smmu;
2617 struct arm_smmu_master *master;
2618 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2620 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2621 return ERR_PTR(-ENODEV);
2623 if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2624 return ERR_PTR(-EBUSY);
2626 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2628 return ERR_PTR(-ENODEV);
2630 master = kzalloc(sizeof(*master), GFP_KERNEL);
2632 return ERR_PTR(-ENOMEM);
2635 master->smmu = smmu;
2636 INIT_LIST_HEAD(&master->bonds);
2637 dev_iommu_priv_set(dev, master);
2639 ret = arm_smmu_insert_master(smmu, master);
2641 goto err_free_master;
2643 device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2644 master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2647 * Note that PASID must be enabled before, and disabled after ATS:
2648 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2650 * Behavior is undefined if this bit is Set and the value of the PASID
2651 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
2654 arm_smmu_enable_pasid(master);
2656 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2657 master->ssid_bits = min_t(u8, master->ssid_bits,
2658 CTXDESC_LINEAR_CDMAX);
2660 if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2661 device_property_read_bool(dev, "dma-can-stall")) ||
2662 smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2663 master->stall_enabled = true;
2665 return &smmu->iommu;
2669 dev_iommu_priv_set(dev, NULL);
2670 return ERR_PTR(ret);
2673 static void arm_smmu_release_device(struct device *dev)
2675 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2676 struct arm_smmu_master *master;
2678 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2681 master = dev_iommu_priv_get(dev);
2682 if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2683 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2684 arm_smmu_detach_dev(master);
2685 arm_smmu_disable_pasid(master);
2686 arm_smmu_remove_master(master);
2688 iommu_fwspec_free(dev);
2691 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2693 struct iommu_group *group;
2696 * We don't support devices sharing stream IDs other than PCI RID
2697 * aliases, since the necessary ID-to-device lookup becomes rather
2698 * impractical given a potential sparse 32-bit stream ID space.
2700 if (dev_is_pci(dev))
2701 group = pci_device_group(dev);
2703 group = generic_device_group(dev);
2708 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2710 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2713 mutex_lock(&smmu_domain->init_mutex);
2714 if (smmu_domain->smmu)
2717 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2718 mutex_unlock(&smmu_domain->init_mutex);
2723 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2725 return iommu_fwspec_add_ids(dev, args->args, 1);
2728 static void arm_smmu_get_resv_regions(struct device *dev,
2729 struct list_head *head)
2731 struct iommu_resv_region *region;
2732 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2734 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2735 prot, IOMMU_RESV_SW_MSI);
2739 list_add_tail(®ion->list, head);
2741 iommu_dma_get_resv_regions(dev, head);
2744 static bool arm_smmu_dev_has_feature(struct device *dev,
2745 enum iommu_dev_features feat)
2747 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2753 case IOMMU_DEV_FEAT_IOPF:
2754 return arm_smmu_master_iopf_supported(master);
2755 case IOMMU_DEV_FEAT_SVA:
2756 return arm_smmu_master_sva_supported(master);
2762 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2763 enum iommu_dev_features feat)
2765 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2771 case IOMMU_DEV_FEAT_IOPF:
2772 return master->iopf_enabled;
2773 case IOMMU_DEV_FEAT_SVA:
2774 return arm_smmu_master_sva_enabled(master);
2780 static int arm_smmu_dev_enable_feature(struct device *dev,
2781 enum iommu_dev_features feat)
2783 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2785 if (!arm_smmu_dev_has_feature(dev, feat))
2788 if (arm_smmu_dev_feature_enabled(dev, feat))
2792 case IOMMU_DEV_FEAT_IOPF:
2793 master->iopf_enabled = true;
2795 case IOMMU_DEV_FEAT_SVA:
2796 return arm_smmu_master_enable_sva(master);
2802 static int arm_smmu_dev_disable_feature(struct device *dev,
2803 enum iommu_dev_features feat)
2805 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2807 if (!arm_smmu_dev_feature_enabled(dev, feat))
2811 case IOMMU_DEV_FEAT_IOPF:
2812 if (master->sva_enabled)
2814 master->iopf_enabled = false;
2816 case IOMMU_DEV_FEAT_SVA:
2817 return arm_smmu_master_disable_sva(master);
2823 static struct iommu_ops arm_smmu_ops = {
2824 .capable = arm_smmu_capable,
2825 .domain_alloc = arm_smmu_domain_alloc,
2826 .domain_free = arm_smmu_domain_free,
2827 .attach_dev = arm_smmu_attach_dev,
2828 .map = arm_smmu_map,
2829 .unmap = arm_smmu_unmap,
2830 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2831 .iotlb_sync = arm_smmu_iotlb_sync,
2832 .iova_to_phys = arm_smmu_iova_to_phys,
2833 .probe_device = arm_smmu_probe_device,
2834 .release_device = arm_smmu_release_device,
2835 .device_group = arm_smmu_device_group,
2836 .enable_nesting = arm_smmu_enable_nesting,
2837 .of_xlate = arm_smmu_of_xlate,
2838 .get_resv_regions = arm_smmu_get_resv_regions,
2839 .put_resv_regions = generic_iommu_put_resv_regions,
2840 .dev_has_feat = arm_smmu_dev_has_feature,
2841 .dev_feat_enabled = arm_smmu_dev_feature_enabled,
2842 .dev_enable_feat = arm_smmu_dev_enable_feature,
2843 .dev_disable_feat = arm_smmu_dev_disable_feature,
2844 .sva_bind = arm_smmu_sva_bind,
2845 .sva_unbind = arm_smmu_sva_unbind,
2846 .sva_get_pasid = arm_smmu_sva_get_pasid,
2847 .page_response = arm_smmu_page_response,
2848 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2849 .owner = THIS_MODULE,
2852 /* Probing and initialisation functions */
2853 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2854 struct arm_smmu_queue *q,
2856 unsigned long prod_off,
2857 unsigned long cons_off,
2858 size_t dwords, const char *name)
2863 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2864 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2866 if (q->base || qsz < PAGE_SIZE)
2869 q->llq.max_n_shift--;
2874 "failed to allocate queue (0x%zx bytes) for %s\n",
2879 if (!WARN_ON(q->base_dma & (qsz - 1))) {
2880 dev_info(smmu->dev, "allocated %u entries for %s\n",
2881 1 << q->llq.max_n_shift, name);
2884 q->prod_reg = page + prod_off;
2885 q->cons_reg = page + cons_off;
2886 q->ent_dwords = dwords;
2888 q->q_base = Q_BASE_RWA;
2889 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2890 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2892 q->llq.prod = q->llq.cons = 0;
2896 static void arm_smmu_cmdq_free_bitmap(void *data)
2898 unsigned long *bitmap = data;
2899 bitmap_free(bitmap);
2902 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2905 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2906 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2907 atomic_long_t *bitmap;
2909 atomic_set(&cmdq->owner_prod, 0);
2910 atomic_set(&cmdq->lock, 0);
2912 bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2914 dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2917 cmdq->valid_map = bitmap;
2918 devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2924 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2929 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2930 ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2931 CMDQ_ENT_DWORDS, "cmdq");
2935 ret = arm_smmu_cmdq_init(smmu);
2940 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2941 ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2942 EVTQ_ENT_DWORDS, "evtq");
2946 if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2947 (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2948 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2949 if (!smmu->evtq.iopf)
2954 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2957 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2958 ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2959 PRIQ_ENT_DWORDS, "priq");
2962 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2965 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2966 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2967 void *strtab = smmu->strtab_cfg.strtab;
2969 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2973 for (i = 0; i < cfg->num_l1_ents; ++i) {
2974 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2975 strtab += STRTAB_L1_DESC_DWORDS << 3;
2981 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2986 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2988 /* Calculate the L1 size, capped to the SIDSIZE. */
2989 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2990 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2991 cfg->num_l1_ents = 1 << size;
2993 size += STRTAB_SPLIT;
2994 if (size < smmu->sid_bits)
2996 "2-level strtab only covers %u/%u bits of SID\n",
2997 size, smmu->sid_bits);
2999 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3000 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3004 "failed to allocate l1 stream table (%u bytes)\n",
3008 cfg->strtab = strtab;
3010 /* Configure strtab_base_cfg for 2 levels */
3011 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3012 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3013 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3014 cfg->strtab_base_cfg = reg;
3016 return arm_smmu_init_l1_strtab(smmu);
3019 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3024 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3026 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3027 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3031 "failed to allocate linear stream table (%u bytes)\n",
3035 cfg->strtab = strtab;
3036 cfg->num_l1_ents = 1 << smmu->sid_bits;
3038 /* Configure strtab_base_cfg for a linear table covering all SIDs */
3039 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3040 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3041 cfg->strtab_base_cfg = reg;
3043 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3047 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3052 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3053 ret = arm_smmu_init_strtab_2lvl(smmu);
3055 ret = arm_smmu_init_strtab_linear(smmu);
3060 /* Set the strtab base address */
3061 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3062 reg |= STRTAB_BASE_RA;
3063 smmu->strtab_cfg.strtab_base = reg;
3065 /* Allocate the first VMID for stage-2 bypass STEs */
3066 set_bit(0, smmu->vmid_map);
3070 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3074 mutex_init(&smmu->streams_mutex);
3075 smmu->streams = RB_ROOT;
3077 ret = arm_smmu_init_queues(smmu);
3081 return arm_smmu_init_strtab(smmu);
3084 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3085 unsigned int reg_off, unsigned int ack_off)
3089 writel_relaxed(val, smmu->base + reg_off);
3090 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3091 1, ARM_SMMU_POLL_TIMEOUT_US);
3094 /* GBPA is "special" */
3095 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3098 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3100 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3101 1, ARM_SMMU_POLL_TIMEOUT_US);
3107 writel_relaxed(reg | GBPA_UPDATE, gbpa);
3108 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3109 1, ARM_SMMU_POLL_TIMEOUT_US);
3112 dev_err(smmu->dev, "GBPA not responding to update\n");
3116 static void arm_smmu_free_msis(void *data)
3118 struct device *dev = data;
3119 platform_msi_domain_free_irqs(dev);
3122 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3124 phys_addr_t doorbell;
3125 struct device *dev = msi_desc_to_dev(desc);
3126 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3127 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3129 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3130 doorbell &= MSI_CFG0_ADDR_MASK;
3132 writeq_relaxed(doorbell, smmu->base + cfg[0]);
3133 writel_relaxed(msg->data, smmu->base + cfg[1]);
3134 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3137 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3139 struct msi_desc *desc;
3140 int ret, nvec = ARM_SMMU_MAX_MSIS;
3141 struct device *dev = smmu->dev;
3143 /* Clear the MSI address regs */
3144 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3145 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3147 if (smmu->features & ARM_SMMU_FEAT_PRI)
3148 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3152 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3155 if (!dev->msi_domain) {
3156 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3160 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3161 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3163 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3167 for_each_msi_entry(desc, dev) {
3168 switch (desc->platform.msi_index) {
3169 case EVTQ_MSI_INDEX:
3170 smmu->evtq.q.irq = desc->irq;
3172 case GERROR_MSI_INDEX:
3173 smmu->gerr_irq = desc->irq;
3175 case PRIQ_MSI_INDEX:
3176 smmu->priq.q.irq = desc->irq;
3178 default: /* Unknown */
3183 /* Add callback to free MSIs on teardown */
3184 devm_add_action(dev, arm_smmu_free_msis, dev);
3187 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3191 arm_smmu_setup_msis(smmu);
3193 /* Request interrupt lines */
3194 irq = smmu->evtq.q.irq;
3196 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3197 arm_smmu_evtq_thread,
3199 "arm-smmu-v3-evtq", smmu);
3201 dev_warn(smmu->dev, "failed to enable evtq irq\n");
3203 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3206 irq = smmu->gerr_irq;
3208 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3209 0, "arm-smmu-v3-gerror", smmu);
3211 dev_warn(smmu->dev, "failed to enable gerror irq\n");
3213 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3216 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3217 irq = smmu->priq.q.irq;
3219 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3220 arm_smmu_priq_thread,
3226 "failed to enable priq irq\n");
3228 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3233 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3236 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3238 /* Disable IRQs first */
3239 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3240 ARM_SMMU_IRQ_CTRLACK);
3242 dev_err(smmu->dev, "failed to disable irqs\n");
3246 irq = smmu->combined_irq;
3249 * Cavium ThunderX2 implementation doesn't support unique irq
3250 * lines. Use a single irq line for all the SMMUv3 interrupts.
3252 ret = devm_request_threaded_irq(smmu->dev, irq,
3253 arm_smmu_combined_irq_handler,
3254 arm_smmu_combined_irq_thread,
3256 "arm-smmu-v3-combined-irq", smmu);
3258 dev_warn(smmu->dev, "failed to enable combined irq\n");
3260 arm_smmu_setup_unique_irqs(smmu);
3262 if (smmu->features & ARM_SMMU_FEAT_PRI)
3263 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3265 /* Enable interrupt generation on the SMMU */
3266 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3267 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3269 dev_warn(smmu->dev, "failed to enable irqs\n");
3274 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3278 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3280 dev_err(smmu->dev, "failed to clear cr0\n");
3285 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3289 struct arm_smmu_cmdq_ent cmd;
3291 /* Clear CR0 and sync (disables SMMU and queue processing) */
3292 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3293 if (reg & CR0_SMMUEN) {
3294 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3295 WARN_ON(is_kdump_kernel() && !disable_bypass);
3296 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3299 ret = arm_smmu_device_disable(smmu);
3303 /* CR1 (table and queue memory attributes) */
3304 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3305 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3306 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3307 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3308 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3309 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3310 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3312 /* CR2 (random crap) */
3313 reg = CR2_PTM | CR2_RECINVSID;
3315 if (smmu->features & ARM_SMMU_FEAT_E2H)
3318 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3321 writeq_relaxed(smmu->strtab_cfg.strtab_base,
3322 smmu->base + ARM_SMMU_STRTAB_BASE);
3323 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3324 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3327 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3328 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3329 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3331 enables = CR0_CMDQEN;
3332 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3335 dev_err(smmu->dev, "failed to enable command queue\n");
3339 /* Invalidate any cached configuration */
3340 cmd.opcode = CMDQ_OP_CFGI_ALL;
3341 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3342 arm_smmu_cmdq_issue_sync(smmu);
3344 /* Invalidate any stale TLB entries */
3345 if (smmu->features & ARM_SMMU_FEAT_HYP) {
3346 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3347 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3350 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3351 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3352 arm_smmu_cmdq_issue_sync(smmu);
3355 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3356 writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3357 writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3359 enables |= CR0_EVTQEN;
3360 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3363 dev_err(smmu->dev, "failed to enable event queue\n");
3368 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3369 writeq_relaxed(smmu->priq.q.q_base,
3370 smmu->base + ARM_SMMU_PRIQ_BASE);
3371 writel_relaxed(smmu->priq.q.llq.prod,
3372 smmu->page1 + ARM_SMMU_PRIQ_PROD);
3373 writel_relaxed(smmu->priq.q.llq.cons,
3374 smmu->page1 + ARM_SMMU_PRIQ_CONS);
3376 enables |= CR0_PRIQEN;
3377 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3380 dev_err(smmu->dev, "failed to enable PRI queue\n");
3385 if (smmu->features & ARM_SMMU_FEAT_ATS) {
3386 enables |= CR0_ATSCHK;
3387 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3390 dev_err(smmu->dev, "failed to enable ATS check\n");
3395 ret = arm_smmu_setup_irqs(smmu);
3397 dev_err(smmu->dev, "failed to setup irqs\n");
3401 if (is_kdump_kernel())
3402 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3404 /* Enable the SMMU interface, or ensure bypass */
3405 if (!bypass || disable_bypass) {
3406 enables |= CR0_SMMUEN;
3408 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3412 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3415 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3422 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3425 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3428 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3430 /* 2-level structures */
3431 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3432 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3434 if (reg & IDR0_CD2L)
3435 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3438 * Translation table endianness.
3439 * We currently require the same endianness as the CPU, but this
3440 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3442 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3443 case IDR0_TTENDIAN_MIXED:
3444 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3447 case IDR0_TTENDIAN_BE:
3448 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3451 case IDR0_TTENDIAN_LE:
3452 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3456 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3460 /* Boolean feature flags */
3461 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3462 smmu->features |= ARM_SMMU_FEAT_PRI;
3464 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3465 smmu->features |= ARM_SMMU_FEAT_ATS;
3468 smmu->features |= ARM_SMMU_FEAT_SEV;
3470 if (reg & IDR0_MSI) {
3471 smmu->features |= ARM_SMMU_FEAT_MSI;
3472 if (coherent && !disable_msipolling)
3473 smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3476 if (reg & IDR0_HYP) {
3477 smmu->features |= ARM_SMMU_FEAT_HYP;
3478 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3479 smmu->features |= ARM_SMMU_FEAT_E2H;
3483 * The coherency feature as set by FW is used in preference to the ID
3484 * register, but warn on mismatch.
3486 if (!!(reg & IDR0_COHACC) != coherent)
3487 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3488 coherent ? "true" : "false");
3490 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3491 case IDR0_STALL_MODEL_FORCE:
3492 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3494 case IDR0_STALL_MODEL_STALL:
3495 smmu->features |= ARM_SMMU_FEAT_STALLS;
3499 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3502 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3504 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3505 dev_err(smmu->dev, "no translation support!\n");
3509 /* We only support the AArch64 table format at present */
3510 switch (FIELD_GET(IDR0_TTF, reg)) {
3511 case IDR0_TTF_AARCH32_64:
3514 case IDR0_TTF_AARCH64:
3517 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3521 /* ASID/VMID sizes */
3522 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3523 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3526 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3527 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3528 dev_err(smmu->dev, "embedded implementation not supported\n");
3532 /* Queue sizes, capped to ensure natural alignment */
3533 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3534 FIELD_GET(IDR1_CMDQS, reg));
3535 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3537 * We don't support splitting up batches, so one batch of
3538 * commands plus an extra sync needs to fit inside the command
3539 * queue. There's also no way we can handle the weird alignment
3540 * restrictions on the base pointer for a unit-length queue.
3542 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3543 CMDQ_BATCH_ENTRIES);
3547 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3548 FIELD_GET(IDR1_EVTQS, reg));
3549 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3550 FIELD_GET(IDR1_PRIQS, reg));
3552 /* SID/SSID sizes */
3553 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3554 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3557 * If the SMMU supports fewer bits than would fill a single L2 stream
3558 * table, use a linear table instead.
3560 if (smmu->sid_bits <= STRTAB_SPLIT)
3561 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3564 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3565 if (FIELD_GET(IDR3_RIL, reg))
3566 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3569 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3571 /* Maximum number of outstanding stalls */
3572 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3575 if (reg & IDR5_GRAN64K)
3576 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3577 if (reg & IDR5_GRAN16K)
3578 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3579 if (reg & IDR5_GRAN4K)
3580 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3582 /* Input address size */
3583 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3584 smmu->features |= ARM_SMMU_FEAT_VAX;
3586 /* Output address size */
3587 switch (FIELD_GET(IDR5_OAS, reg)) {
3588 case IDR5_OAS_32_BIT:
3591 case IDR5_OAS_36_BIT:
3594 case IDR5_OAS_40_BIT:
3597 case IDR5_OAS_42_BIT:
3600 case IDR5_OAS_44_BIT:
3603 case IDR5_OAS_52_BIT:
3605 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3609 "unknown output address size. Truncating to 48-bit\n");
3611 case IDR5_OAS_48_BIT:
3615 if (arm_smmu_ops.pgsize_bitmap == -1UL)
3616 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3618 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3620 /* Set the DMA mask for our table walker */
3621 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3623 "failed to set DMA mask for table walker\n");
3625 smmu->ias = max(smmu->ias, smmu->oas);
3627 if (arm_smmu_sva_supported(smmu))
3628 smmu->features |= ARM_SMMU_FEAT_SVA;
3630 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3631 smmu->ias, smmu->oas, smmu->features);
3636 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3639 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3640 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3642 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3643 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3647 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3650 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3651 struct arm_smmu_device *smmu)
3653 struct acpi_iort_smmu_v3 *iort_smmu;
3654 struct device *dev = smmu->dev;
3655 struct acpi_iort_node *node;
3657 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3659 /* Retrieve SMMUv3 specific data */
3660 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3662 acpi_smmu_get_options(iort_smmu->model, smmu);
3664 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3665 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3670 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3671 struct arm_smmu_device *smmu)
3677 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3678 struct arm_smmu_device *smmu)
3680 struct device *dev = &pdev->dev;
3684 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3685 dev_err(dev, "missing #iommu-cells property\n");
3686 else if (cells != 1)
3687 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3691 parse_driver_options(smmu);
3693 if (of_dma_is_coherent(dev->of_node))
3694 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3699 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3701 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3707 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3712 if (pci_bus_type.iommu_ops != ops) {
3713 err = bus_set_iommu(&pci_bus_type, ops);
3718 #ifdef CONFIG_ARM_AMBA
3719 if (amba_bustype.iommu_ops != ops) {
3720 err = bus_set_iommu(&amba_bustype, ops);
3722 goto err_reset_pci_ops;
3725 if (platform_bus_type.iommu_ops != ops) {
3726 err = bus_set_iommu(&platform_bus_type, ops);
3728 goto err_reset_amba_ops;
3734 #ifdef CONFIG_ARM_AMBA
3735 bus_set_iommu(&amba_bustype, NULL);
3737 err_reset_pci_ops: __maybe_unused;
3739 bus_set_iommu(&pci_bus_type, NULL);
3744 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3745 resource_size_t size)
3747 struct resource res = DEFINE_RES_MEM(start, size);
3749 return devm_ioremap_resource(dev, &res);
3752 static int arm_smmu_device_probe(struct platform_device *pdev)
3755 struct resource *res;
3756 resource_size_t ioaddr;
3757 struct arm_smmu_device *smmu;
3758 struct device *dev = &pdev->dev;
3761 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3767 ret = arm_smmu_device_dt_probe(pdev, smmu);
3769 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3774 /* Set bypass mode according to firmware probing result */
3778 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3779 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3780 dev_err(dev, "MMIO region too small (%pr)\n", res);
3783 ioaddr = res->start;
3786 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3787 * the PMCG registers which are reserved by the PMU driver.
3789 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3790 if (IS_ERR(smmu->base))
3791 return PTR_ERR(smmu->base);
3793 if (arm_smmu_resource_size(smmu) > SZ_64K) {
3794 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3796 if (IS_ERR(smmu->page1))
3797 return PTR_ERR(smmu->page1);
3799 smmu->page1 = smmu->base;
3802 /* Interrupt lines */
3804 irq = platform_get_irq_byname_optional(pdev, "combined");
3806 smmu->combined_irq = irq;
3808 irq = platform_get_irq_byname_optional(pdev, "eventq");
3810 smmu->evtq.q.irq = irq;
3812 irq = platform_get_irq_byname_optional(pdev, "priq");
3814 smmu->priq.q.irq = irq;
3816 irq = platform_get_irq_byname_optional(pdev, "gerror");
3818 smmu->gerr_irq = irq;
3821 ret = arm_smmu_device_hw_probe(smmu);
3825 /* Initialise in-memory data structures */
3826 ret = arm_smmu_init_structures(smmu);
3830 /* Record our private device structure */
3831 platform_set_drvdata(pdev, smmu);
3833 /* Reset the device */
3834 ret = arm_smmu_device_reset(smmu, bypass);
3838 /* And we're up. Go go go! */
3839 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3840 "smmu3.%pa", &ioaddr);
3844 ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3846 dev_err(dev, "Failed to register iommu\n");
3847 goto err_sysfs_remove;
3850 ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3852 goto err_unregister_device;
3856 err_unregister_device:
3857 iommu_device_unregister(&smmu->iommu);
3859 iommu_device_sysfs_remove(&smmu->iommu);
3863 static int arm_smmu_device_remove(struct platform_device *pdev)
3865 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3867 arm_smmu_set_bus_ops(NULL);
3868 iommu_device_unregister(&smmu->iommu);
3869 iommu_device_sysfs_remove(&smmu->iommu);
3870 arm_smmu_device_disable(smmu);
3871 iopf_queue_free(smmu->evtq.iopf);
3876 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3878 arm_smmu_device_remove(pdev);
3881 static const struct of_device_id arm_smmu_of_match[] = {
3882 { .compatible = "arm,smmu-v3", },
3885 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3887 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3889 arm_smmu_sva_notifier_synchronize();
3890 platform_driver_unregister(drv);
3893 static struct platform_driver arm_smmu_driver = {
3895 .name = "arm-smmu-v3",
3896 .of_match_table = arm_smmu_of_match,
3897 .suppress_bind_attrs = true,
3899 .probe = arm_smmu_device_probe,
3900 .remove = arm_smmu_device_remove,
3901 .shutdown = arm_smmu_device_shutdown,
3903 module_driver(arm_smmu_driver, platform_driver_register,
3904 arm_smmu_driver_unregister);
3906 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3907 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3908 MODULE_ALIAS("platform:arm-smmu-v3");
3909 MODULE_LICENSE("GPL v2");