1 // SPDX-License-Identifier: GPL-2.0-only
3 * IOMMU API for ARM architected SMMU implementations.
5 * Copyright (C) 2013 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver currently supports:
10 * - SMMUv1 and v2 implementations
11 * - Stream-matching and stream-indexing
12 * - v7/v8 long-descriptor format
13 * - Non-secure access to the SMMU
14 * - Context fault reporting
15 * - Extended Stream ID (16 bit)
18 #define pr_fmt(fmt) "arm-smmu: " fmt
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/atomic.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
29 #include <linux/io-64-nonatomic-hi-lo.h>
30 #include <linux/io-pgtable.h>
31 #include <linux/iommu.h>
32 #include <linux/iopoll.h>
33 #include <linux/init.h>
34 #include <linux/moduleparam.h>
36 #include <linux/of_address.h>
37 #include <linux/of_device.h>
38 #include <linux/of_iommu.h>
39 #include <linux/pci.h>
40 #include <linux/platform_device.h>
41 #include <linux/pm_runtime.h>
42 #include <linux/slab.h>
43 #include <linux/spinlock.h>
45 #include <linux/amba/bus.h>
46 #include <linux/fsl/mc.h>
48 #include "arm-smmu-regs.h"
50 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
52 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
53 #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
54 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
56 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
57 #define TLB_SPIN_COUNT 10
59 /* Maximum number of context banks per SMMU */
60 #define ARM_SMMU_MAX_CBS 128
62 /* SMMU global address space */
63 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
64 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
67 * SMMU global address space with conditional offset to access secure
68 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
71 #define ARM_SMMU_GR0_NS(smmu) \
73 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
77 * Some 64-bit registers only make sense to write atomically, but in such
78 * cases all the data relevant to AArch32 formats lies within the lower word,
79 * therefore this actually makes more sense than it might first appear.
82 #define smmu_write_atomic_lq writeq_relaxed
84 #define smmu_write_atomic_lq writel_relaxed
87 /* Translation context bank */
88 #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
90 #define MSI_IOVA_BASE 0x8000000
91 #define MSI_IOVA_LENGTH 0x100000
93 static int force_stage;
95 * not really modular, but the easiest way to keep compat with existing
96 * bootargs behaviour is to continue using module_param() here.
98 module_param(force_stage, int, S_IRUGO);
99 MODULE_PARM_DESC(force_stage,
100 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
101 static bool disable_bypass =
102 IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
103 module_param(disable_bypass, bool, S_IRUGO);
104 MODULE_PARM_DESC(disable_bypass,
105 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
107 enum arm_smmu_arch_version {
113 enum arm_smmu_implementation {
120 struct arm_smmu_s2cr {
121 struct iommu_group *group;
123 enum arm_smmu_s2cr_type type;
124 enum arm_smmu_s2cr_privcfg privcfg;
128 #define s2cr_init_val (struct arm_smmu_s2cr){ \
129 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
132 struct arm_smmu_smr {
142 struct arm_smmu_cfg *cfg;
145 struct arm_smmu_master_cfg {
146 struct arm_smmu_device *smmu;
149 #define INVALID_SMENDX -1
150 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
151 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
152 #define fwspec_smendx(fw, i) \
153 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
154 #define for_each_cfg_sme(fw, i, idx) \
155 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
157 struct arm_smmu_device {
161 void __iomem *cb_base;
162 unsigned long pgshift;
164 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
165 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
166 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
167 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
168 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
169 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
170 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
171 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
172 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
173 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
174 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
175 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
176 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
179 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
181 enum arm_smmu_arch_version version;
182 enum arm_smmu_implementation model;
184 u32 num_context_banks;
185 u32 num_s2_context_banks;
186 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
187 struct arm_smmu_cb *cbs;
190 u32 num_mapping_groups;
193 struct arm_smmu_smr *smrs;
194 struct arm_smmu_s2cr *s2crs;
195 struct mutex stream_map_mutex;
197 unsigned long va_size;
198 unsigned long ipa_size;
199 unsigned long pa_size;
200 unsigned long pgsize_bitmap;
203 u32 num_context_irqs;
205 struct clk_bulk_data *clks;
208 u32 cavium_id_base; /* Specific to Cavium */
210 spinlock_t global_sync_lock;
212 /* IOMMU core code handle */
213 struct iommu_device iommu;
216 enum arm_smmu_context_fmt {
217 ARM_SMMU_CTX_FMT_NONE,
218 ARM_SMMU_CTX_FMT_AARCH64,
219 ARM_SMMU_CTX_FMT_AARCH32_L,
220 ARM_SMMU_CTX_FMT_AARCH32_S,
223 struct arm_smmu_cfg {
231 enum arm_smmu_context_fmt fmt;
233 #define INVALID_IRPTNDX 0xff
235 enum arm_smmu_domain_stage {
236 ARM_SMMU_DOMAIN_S1 = 0,
238 ARM_SMMU_DOMAIN_NESTED,
239 ARM_SMMU_DOMAIN_BYPASS,
242 struct arm_smmu_domain {
243 struct arm_smmu_device *smmu;
244 struct io_pgtable_ops *pgtbl_ops;
245 const struct iommu_gather_ops *tlb_ops;
246 struct arm_smmu_cfg cfg;
247 enum arm_smmu_domain_stage stage;
249 struct mutex init_mutex; /* Protects smmu pointer */
250 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
251 struct iommu_domain domain;
254 struct arm_smmu_option_prop {
259 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
261 static bool using_legacy_binding, using_generic_binding;
263 static struct arm_smmu_option_prop arm_smmu_options[] = {
264 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
268 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
270 if (pm_runtime_enabled(smmu->dev))
271 return pm_runtime_get_sync(smmu->dev);
276 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
278 if (pm_runtime_enabled(smmu->dev))
279 pm_runtime_put(smmu->dev);
282 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
284 return container_of(dom, struct arm_smmu_domain, domain);
287 static void parse_driver_options(struct arm_smmu_device *smmu)
292 if (of_property_read_bool(smmu->dev->of_node,
293 arm_smmu_options[i].prop)) {
294 smmu->options |= arm_smmu_options[i].opt;
295 dev_notice(smmu->dev, "option %s\n",
296 arm_smmu_options[i].prop);
298 } while (arm_smmu_options[++i].opt);
301 static struct device_node *dev_get_dev_node(struct device *dev)
303 if (dev_is_pci(dev)) {
304 struct pci_bus *bus = to_pci_dev(dev)->bus;
306 while (!pci_is_root_bus(bus))
308 return of_node_get(bus->bridge->parent->of_node);
311 return of_node_get(dev->of_node);
314 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
316 *((__be32 *)data) = cpu_to_be32(alias);
317 return 0; /* Continue walking */
320 static int __find_legacy_master_phandle(struct device *dev, void *data)
322 struct of_phandle_iterator *it = *(void **)data;
323 struct device_node *np = it->node;
326 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
327 "#stream-id-cells", 0)
328 if (it->node == np) {
329 *(void **)data = dev;
333 return err == -ENOENT ? 0 : err;
336 static struct platform_driver arm_smmu_driver;
337 static struct iommu_ops arm_smmu_ops;
339 static int arm_smmu_register_legacy_master(struct device *dev,
340 struct arm_smmu_device **smmu)
342 struct device *smmu_dev;
343 struct device_node *np;
344 struct of_phandle_iterator it;
350 np = dev_get_dev_node(dev);
351 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
357 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
358 __find_legacy_master_phandle);
366 if (dev_is_pci(dev)) {
367 /* "mmu-masters" assumes Stream ID == Requester ID */
368 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
374 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
379 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
383 *smmu = dev_get_drvdata(smmu_dev);
384 of_phandle_iterator_args(&it, sids, it.cur_count);
385 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
390 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
395 idx = find_next_zero_bit(map, end, start);
398 } while (test_and_set_bit(idx, map));
403 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
408 /* Wait for any pending TLB invalidations to complete */
409 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
410 void __iomem *sync, void __iomem *status)
412 unsigned int spin_cnt, delay;
414 writel_relaxed(0, sync);
415 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
416 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
417 if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
423 dev_err_ratelimited(smmu->dev,
424 "TLB sync timed out -- SMMU may be deadlocked\n");
427 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
429 void __iomem *base = ARM_SMMU_GR0(smmu);
432 spin_lock_irqsave(&smmu->global_sync_lock, flags);
433 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
434 base + ARM_SMMU_GR0_sTLBGSTATUS);
435 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
438 static void arm_smmu_tlb_sync_context(void *cookie)
440 struct arm_smmu_domain *smmu_domain = cookie;
441 struct arm_smmu_device *smmu = smmu_domain->smmu;
442 void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
445 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
446 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
447 base + ARM_SMMU_CB_TLBSTATUS);
448 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
451 static void arm_smmu_tlb_sync_vmid(void *cookie)
453 struct arm_smmu_domain *smmu_domain = cookie;
455 arm_smmu_tlb_sync_global(smmu_domain->smmu);
458 static void arm_smmu_tlb_inv_context_s1(void *cookie)
460 struct arm_smmu_domain *smmu_domain = cookie;
461 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
462 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
465 * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
466 * cleared by the current CPU are visible to the SMMU before the TLBI.
468 writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
469 arm_smmu_tlb_sync_context(cookie);
472 static void arm_smmu_tlb_inv_context_s2(void *cookie)
474 struct arm_smmu_domain *smmu_domain = cookie;
475 struct arm_smmu_device *smmu = smmu_domain->smmu;
476 void __iomem *base = ARM_SMMU_GR0(smmu);
478 /* NOTE: see above */
479 writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
480 arm_smmu_tlb_sync_global(smmu);
483 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
484 size_t granule, bool leaf, void *cookie)
486 struct arm_smmu_domain *smmu_domain = cookie;
487 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
488 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
489 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
491 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
495 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
497 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
501 writel_relaxed(iova, reg);
503 } while (size -= granule);
506 iova |= (u64)cfg->asid << 48;
508 writeq_relaxed(iova, reg);
509 iova += granule >> 12;
510 } while (size -= granule);
513 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
514 ARM_SMMU_CB_S2_TLBIIPAS2;
517 smmu_write_atomic_lq(iova, reg);
518 iova += granule >> 12;
519 } while (size -= granule);
524 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
525 * almost negligible, but the benefit of getting the first one in as far ahead
526 * of the sync as possible is significant, hence we don't just make this a
527 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
529 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
530 size_t granule, bool leaf, void *cookie)
532 struct arm_smmu_domain *smmu_domain = cookie;
533 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
535 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
538 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
541 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
542 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
543 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
544 .tlb_sync = arm_smmu_tlb_sync_context,
547 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
548 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
549 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
550 .tlb_sync = arm_smmu_tlb_sync_context,
553 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
554 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
555 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
556 .tlb_sync = arm_smmu_tlb_sync_vmid,
559 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
561 u32 fsr, fsynr, cbfrsynra;
563 struct iommu_domain *domain = dev;
564 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
565 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
566 struct arm_smmu_device *smmu = smmu_domain->smmu;
567 void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
568 void __iomem *cb_base;
570 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
571 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
573 if (!(fsr & FSR_FAULT))
576 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
577 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
578 cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
580 dev_err_ratelimited(smmu->dev,
581 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
582 fsr, iova, fsynr, cbfrsynra, cfg->cbndx);
584 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
588 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
590 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
591 struct arm_smmu_device *smmu = dev;
592 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
594 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
595 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
596 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
597 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
602 dev_err_ratelimited(smmu->dev,
603 "Unexpected global fault, this could be serious\n");
604 dev_err_ratelimited(smmu->dev,
605 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
606 gfsr, gfsynr0, gfsynr1, gfsynr2);
608 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
612 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
613 struct io_pgtable_cfg *pgtbl_cfg)
615 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
616 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
617 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
623 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
624 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
626 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
627 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
628 cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
629 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
630 cb->tcr[1] |= TTBCR2_AS;
633 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
638 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
639 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
640 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
642 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
643 cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
644 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
645 cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
648 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
651 /* MAIRs (stage-1 only) */
653 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
654 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
655 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
657 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
658 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
663 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
667 struct arm_smmu_cb *cb = &smmu->cbs[idx];
668 struct arm_smmu_cfg *cfg = cb->cfg;
669 void __iomem *cb_base, *gr1_base;
671 cb_base = ARM_SMMU_CB(smmu, idx);
673 /* Unassigned context banks only need disabling */
675 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
679 gr1_base = ARM_SMMU_GR1(smmu);
680 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
683 if (smmu->version > ARM_SMMU_V1) {
684 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
685 reg = CBA2R_RW64_64BIT;
687 reg = CBA2R_RW64_32BIT;
688 /* 16-bit VMIDs live in CBA2R */
689 if (smmu->features & ARM_SMMU_FEAT_VMID16)
690 reg |= cfg->vmid << CBA2R_VMID_SHIFT;
692 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
697 if (smmu->version < ARM_SMMU_V2)
698 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
701 * Use the weakest shareability/memory types, so they are
702 * overridden by the ttbcr/pte.
705 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
706 (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
707 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
708 /* 8-bit VMIDs live in CBAR */
709 reg |= cfg->vmid << CBAR_VMID_SHIFT;
711 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
715 * We must write this before the TTBRs, since it determines the
716 * access behaviour of some fields (in particular, ASID[15:8]).
718 if (stage1 && smmu->version > ARM_SMMU_V1)
719 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
720 writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
723 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
724 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
725 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
726 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
728 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
730 writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
733 /* MAIRs (stage-1 only) */
735 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
736 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
740 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
742 reg |= SCTLR_S1_ASIDPNE;
743 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
746 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
749 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
750 struct arm_smmu_device *smmu)
752 int irq, start, ret = 0;
753 unsigned long ias, oas;
754 struct io_pgtable_ops *pgtbl_ops;
755 struct io_pgtable_cfg pgtbl_cfg;
756 enum io_pgtable_fmt fmt;
757 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
758 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
760 mutex_lock(&smmu_domain->init_mutex);
761 if (smmu_domain->smmu)
764 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
765 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
766 smmu_domain->smmu = smmu;
771 * Mapping the requested stage onto what we support is surprisingly
772 * complicated, mainly because the spec allows S1+S2 SMMUs without
773 * support for nested translation. That means we end up with the
776 * Requested Supported Actual
786 * Note that you can't actually request stage-2 mappings.
788 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
789 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
790 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
791 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
794 * Choosing a suitable context format is even more fiddly. Until we
795 * grow some way for the caller to express a preference, and/or move
796 * the decision into the io-pgtable code where it arguably belongs,
797 * just aim for the closest thing to the rest of the system, and hope
798 * that the hardware isn't esoteric enough that we can't assume AArch64
799 * support to be a superset of AArch32 support...
801 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
802 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
803 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
804 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
805 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
806 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
807 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
808 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
809 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
810 ARM_SMMU_FEAT_FMT_AARCH64_16K |
811 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
812 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
814 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
819 switch (smmu_domain->stage) {
820 case ARM_SMMU_DOMAIN_S1:
821 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
822 start = smmu->num_s2_context_banks;
824 oas = smmu->ipa_size;
825 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
826 fmt = ARM_64_LPAE_S1;
827 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
828 fmt = ARM_32_LPAE_S1;
829 ias = min(ias, 32UL);
830 oas = min(oas, 40UL);
833 ias = min(ias, 32UL);
834 oas = min(oas, 32UL);
836 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
838 case ARM_SMMU_DOMAIN_NESTED:
840 * We will likely want to change this if/when KVM gets
843 case ARM_SMMU_DOMAIN_S2:
844 cfg->cbar = CBAR_TYPE_S2_TRANS;
846 ias = smmu->ipa_size;
848 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
849 fmt = ARM_64_LPAE_S2;
851 fmt = ARM_32_LPAE_S2;
852 ias = min(ias, 40UL);
853 oas = min(oas, 40UL);
855 if (smmu->version == ARM_SMMU_V2)
856 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
858 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
864 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
865 smmu->num_context_banks);
870 if (smmu->version < ARM_SMMU_V2) {
871 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
872 cfg->irptndx %= smmu->num_context_irqs;
874 cfg->irptndx = cfg->cbndx;
877 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
878 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
880 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
882 pgtbl_cfg = (struct io_pgtable_cfg) {
883 .pgsize_bitmap = smmu->pgsize_bitmap,
886 .tlb = smmu_domain->tlb_ops,
887 .iommu_dev = smmu->dev,
890 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
891 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
893 if (smmu_domain->non_strict)
894 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
896 smmu_domain->smmu = smmu;
897 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
903 /* Update the domain's page sizes to reflect the page table format */
904 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
905 domain->geometry.aperture_end = (1UL << ias) - 1;
906 domain->geometry.force_aperture = true;
908 /* Initialise the context bank with our page table cfg */
909 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
910 arm_smmu_write_context_bank(smmu, cfg->cbndx);
913 * Request context fault interrupt. Do this last to avoid the
914 * handler seeing a half-initialised domain state.
916 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
917 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
918 IRQF_SHARED, "arm-smmu-context-fault", domain);
920 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
922 cfg->irptndx = INVALID_IRPTNDX;
925 mutex_unlock(&smmu_domain->init_mutex);
927 /* Publish page table ops for map/unmap */
928 smmu_domain->pgtbl_ops = pgtbl_ops;
932 smmu_domain->smmu = NULL;
934 mutex_unlock(&smmu_domain->init_mutex);
938 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
940 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
941 struct arm_smmu_device *smmu = smmu_domain->smmu;
942 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
945 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
948 ret = arm_smmu_rpm_get(smmu);
953 * Disable the context bank and free the page tables before freeing
956 smmu->cbs[cfg->cbndx].cfg = NULL;
957 arm_smmu_write_context_bank(smmu, cfg->cbndx);
959 if (cfg->irptndx != INVALID_IRPTNDX) {
960 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
961 devm_free_irq(smmu->dev, irq, domain);
964 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
965 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
967 arm_smmu_rpm_put(smmu);
970 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
972 struct arm_smmu_domain *smmu_domain;
974 if (type != IOMMU_DOMAIN_UNMANAGED &&
975 type != IOMMU_DOMAIN_DMA &&
976 type != IOMMU_DOMAIN_IDENTITY)
979 * Allocate the domain and initialise some of its data structures.
980 * We can't really do anything meaningful until we've added a
983 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
987 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
988 iommu_get_dma_cookie(&smmu_domain->domain))) {
993 mutex_init(&smmu_domain->init_mutex);
994 spin_lock_init(&smmu_domain->cb_lock);
996 return &smmu_domain->domain;
999 static void arm_smmu_domain_free(struct iommu_domain *domain)
1001 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1004 * Free the domain resources. We assume that all devices have
1005 * already been detached.
1007 iommu_put_dma_cookie(domain);
1008 arm_smmu_destroy_domain_context(domain);
1012 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1014 struct arm_smmu_smr *smr = smmu->smrs + idx;
1015 u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
1017 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1019 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1022 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1024 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1025 u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1026 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1027 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1029 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1030 smmu->smrs[idx].valid)
1031 reg |= S2CR_EXIDVALID;
1032 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1035 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1037 arm_smmu_write_s2cr(smmu, idx);
1039 arm_smmu_write_smr(smmu, idx);
1043 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1044 * should be called after sCR0 is written.
1046 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1048 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1055 * SMR.ID bits may not be preserved if the corresponding MASK
1056 * bits are set, so check each one separately. We can reject
1057 * masters later if they try to claim IDs outside these masks.
1059 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1060 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1061 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1062 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1064 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1065 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1066 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1067 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1070 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1072 struct arm_smmu_smr *smrs = smmu->smrs;
1073 int i, free_idx = -ENOSPC;
1075 /* Stream indexing is blissfully easy */
1079 /* Validating SMRs is... less so */
1080 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1081 if (!smrs[i].valid) {
1083 * Note the first free entry we come across, which
1084 * we'll claim in the end if nothing else matches.
1091 * If the new entry is _entirely_ matched by an existing entry,
1092 * then reuse that, with the guarantee that there also cannot
1093 * be any subsequent conflicting entries. In normal use we'd
1094 * expect simply identical entries for this case, but there's
1095 * no harm in accommodating the generalisation.
1097 if ((mask & smrs[i].mask) == mask &&
1098 !((id ^ smrs[i].id) & ~smrs[i].mask))
1101 * If the new entry has any other overlap with an existing one,
1102 * though, then there always exists at least one stream ID
1103 * which would cause a conflict, and we can't allow that risk.
1105 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1112 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1114 if (--smmu->s2crs[idx].count)
1117 smmu->s2crs[idx] = s2cr_init_val;
1119 smmu->smrs[idx].valid = false;
1124 static int arm_smmu_master_alloc_smes(struct device *dev)
1126 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1127 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1128 struct arm_smmu_device *smmu = cfg->smmu;
1129 struct arm_smmu_smr *smrs = smmu->smrs;
1130 struct iommu_group *group;
1133 mutex_lock(&smmu->stream_map_mutex);
1134 /* Figure out a viable stream map entry allocation */
1135 for_each_cfg_sme(fwspec, i, idx) {
1136 u16 sid = fwspec->ids[i];
1137 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1139 if (idx != INVALID_SMENDX) {
1144 ret = arm_smmu_find_sme(smmu, sid, mask);
1149 if (smrs && smmu->s2crs[idx].count == 0) {
1151 smrs[idx].mask = mask;
1152 smrs[idx].valid = true;
1154 smmu->s2crs[idx].count++;
1155 cfg->smendx[i] = (s16)idx;
1158 group = iommu_group_get_for_dev(dev);
1160 group = ERR_PTR(-ENOMEM);
1161 if (IS_ERR(group)) {
1162 ret = PTR_ERR(group);
1165 iommu_group_put(group);
1167 /* It worked! Now, poke the actual hardware */
1168 for_each_cfg_sme(fwspec, i, idx) {
1169 arm_smmu_write_sme(smmu, idx);
1170 smmu->s2crs[idx].group = group;
1173 mutex_unlock(&smmu->stream_map_mutex);
1178 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1179 cfg->smendx[i] = INVALID_SMENDX;
1181 mutex_unlock(&smmu->stream_map_mutex);
1185 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1187 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1188 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1191 mutex_lock(&smmu->stream_map_mutex);
1192 for_each_cfg_sme(fwspec, i, idx) {
1193 if (arm_smmu_free_sme(smmu, idx))
1194 arm_smmu_write_sme(smmu, idx);
1195 cfg->smendx[i] = INVALID_SMENDX;
1197 mutex_unlock(&smmu->stream_map_mutex);
1200 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1201 struct iommu_fwspec *fwspec)
1203 struct arm_smmu_device *smmu = smmu_domain->smmu;
1204 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1205 u8 cbndx = smmu_domain->cfg.cbndx;
1206 enum arm_smmu_s2cr_type type;
1209 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1210 type = S2CR_TYPE_BYPASS;
1212 type = S2CR_TYPE_TRANS;
1214 for_each_cfg_sme(fwspec, i, idx) {
1215 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1218 s2cr[idx].type = type;
1219 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1220 s2cr[idx].cbndx = cbndx;
1221 arm_smmu_write_s2cr(smmu, idx);
1226 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1229 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1230 struct arm_smmu_device *smmu;
1231 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1233 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1234 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1239 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1240 * domains between of_xlate() and add_device() - we have no way to cope
1241 * with that, so until ARM gets converted to rely on groups and default
1242 * domains, just say no (but more politely than by dereferencing NULL).
1243 * This should be at least a WARN_ON once that's sorted.
1245 if (!fwspec->iommu_priv)
1248 smmu = fwspec_smmu(fwspec);
1250 ret = arm_smmu_rpm_get(smmu);
1254 /* Ensure that the domain is finalised */
1255 ret = arm_smmu_init_domain_context(domain, smmu);
1260 * Sanity check the domain. We don't support domains across
1263 if (smmu_domain->smmu != smmu) {
1265 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1266 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1271 /* Looks ok, so add the device to the domain */
1272 ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1275 arm_smmu_rpm_put(smmu);
1279 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1280 phys_addr_t paddr, size_t size, int prot)
1282 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1283 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1289 arm_smmu_rpm_get(smmu);
1290 ret = ops->map(ops, iova, paddr, size, prot);
1291 arm_smmu_rpm_put(smmu);
1296 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1299 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1300 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1306 arm_smmu_rpm_get(smmu);
1307 ret = ops->unmap(ops, iova, size);
1308 arm_smmu_rpm_put(smmu);
1313 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1315 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1316 struct arm_smmu_device *smmu = smmu_domain->smmu;
1318 if (smmu_domain->tlb_ops) {
1319 arm_smmu_rpm_get(smmu);
1320 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1321 arm_smmu_rpm_put(smmu);
1325 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1327 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1328 struct arm_smmu_device *smmu = smmu_domain->smmu;
1330 if (smmu_domain->tlb_ops) {
1331 arm_smmu_rpm_get(smmu);
1332 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1333 arm_smmu_rpm_put(smmu);
1337 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1340 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1341 struct arm_smmu_device *smmu = smmu_domain->smmu;
1342 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1343 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1344 struct device *dev = smmu->dev;
1345 void __iomem *cb_base;
1348 unsigned long va, flags;
1351 ret = arm_smmu_rpm_get(smmu);
1355 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1357 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1358 /* ATS1 registers can only be written atomically */
1359 va = iova & ~0xfffUL;
1360 if (smmu->version == ARM_SMMU_V2)
1361 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1362 else /* Register is only 32-bit in v1 */
1363 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1365 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1366 !(tmp & ATSR_ACTIVE), 5, 50)) {
1367 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1369 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1371 return ops->iova_to_phys(ops, iova);
1374 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1375 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1376 if (phys & CB_PAR_F) {
1377 dev_err(dev, "translation fault!\n");
1378 dev_err(dev, "PAR = 0x%llx\n", phys);
1382 arm_smmu_rpm_put(smmu);
1384 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1387 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1390 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1391 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1393 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1399 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1400 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1401 return arm_smmu_iova_to_phys_hard(domain, iova);
1403 return ops->iova_to_phys(ops, iova);
1406 static bool arm_smmu_capable(enum iommu_cap cap)
1409 case IOMMU_CAP_CACHE_COHERENCY:
1411 * Return true here as the SMMU can always send out coherent
1415 case IOMMU_CAP_NOEXEC:
1422 static int arm_smmu_match_node(struct device *dev, void *data)
1424 return dev->fwnode == data;
1428 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1430 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1431 fwnode, arm_smmu_match_node);
1433 return dev ? dev_get_drvdata(dev) : NULL;
1436 static int arm_smmu_add_device(struct device *dev)
1438 struct arm_smmu_device *smmu;
1439 struct arm_smmu_master_cfg *cfg;
1440 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1443 if (using_legacy_binding) {
1444 ret = arm_smmu_register_legacy_master(dev, &smmu);
1447 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1448 * will allocate/initialise a new one. Thus we need to update fwspec for
1451 fwspec = dev_iommu_fwspec_get(dev);
1454 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1455 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1461 for (i = 0; i < fwspec->num_ids; i++) {
1462 u16 sid = fwspec->ids[i];
1463 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1465 if (sid & ~smmu->streamid_mask) {
1466 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1467 sid, smmu->streamid_mask);
1470 if (mask & ~smmu->smr_mask_mask) {
1471 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1472 mask, smmu->smr_mask_mask);
1478 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1484 fwspec->iommu_priv = cfg;
1486 cfg->smendx[i] = INVALID_SMENDX;
1488 ret = arm_smmu_rpm_get(smmu);
1492 ret = arm_smmu_master_alloc_smes(dev);
1493 arm_smmu_rpm_put(smmu);
1498 iommu_device_link(&smmu->iommu, dev);
1500 device_link_add(dev, smmu->dev,
1501 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1508 iommu_fwspec_free(dev);
1512 static void arm_smmu_remove_device(struct device *dev)
1514 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1515 struct arm_smmu_master_cfg *cfg;
1516 struct arm_smmu_device *smmu;
1519 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1522 cfg = fwspec->iommu_priv;
1525 ret = arm_smmu_rpm_get(smmu);
1529 iommu_device_unlink(&smmu->iommu, dev);
1530 arm_smmu_master_free_smes(fwspec);
1532 arm_smmu_rpm_put(smmu);
1534 iommu_group_remove_device(dev);
1535 kfree(fwspec->iommu_priv);
1536 iommu_fwspec_free(dev);
1539 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1541 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1542 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1543 struct iommu_group *group = NULL;
1546 for_each_cfg_sme(fwspec, i, idx) {
1547 if (group && smmu->s2crs[idx].group &&
1548 group != smmu->s2crs[idx].group)
1549 return ERR_PTR(-EINVAL);
1551 group = smmu->s2crs[idx].group;
1555 return iommu_group_ref_get(group);
1557 if (dev_is_pci(dev))
1558 group = pci_device_group(dev);
1559 else if (dev_is_fsl_mc(dev))
1560 group = fsl_mc_device_group(dev);
1562 group = generic_device_group(dev);
1567 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1568 enum iommu_attr attr, void *data)
1570 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1572 switch(domain->type) {
1573 case IOMMU_DOMAIN_UNMANAGED:
1575 case DOMAIN_ATTR_NESTING:
1576 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1582 case IOMMU_DOMAIN_DMA:
1584 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1585 *(int *)data = smmu_domain->non_strict;
1596 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1597 enum iommu_attr attr, void *data)
1600 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1602 mutex_lock(&smmu_domain->init_mutex);
1604 switch(domain->type) {
1605 case IOMMU_DOMAIN_UNMANAGED:
1607 case DOMAIN_ATTR_NESTING:
1608 if (smmu_domain->smmu) {
1614 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1616 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1622 case IOMMU_DOMAIN_DMA:
1624 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1625 smmu_domain->non_strict = *(int *)data;
1635 mutex_unlock(&smmu_domain->init_mutex);
1639 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1643 if (args->args_count > 0)
1644 fwid |= (u16)args->args[0];
1646 if (args->args_count > 1)
1647 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1648 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1649 fwid |= (u16)mask << SMR_MASK_SHIFT;
1651 return iommu_fwspec_add_ids(dev, &fwid, 1);
1654 static void arm_smmu_get_resv_regions(struct device *dev,
1655 struct list_head *head)
1657 struct iommu_resv_region *region;
1658 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1660 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1661 prot, IOMMU_RESV_SW_MSI);
1665 list_add_tail(®ion->list, head);
1667 iommu_dma_get_resv_regions(dev, head);
1670 static void arm_smmu_put_resv_regions(struct device *dev,
1671 struct list_head *head)
1673 struct iommu_resv_region *entry, *next;
1675 list_for_each_entry_safe(entry, next, head, list)
1679 static struct iommu_ops arm_smmu_ops = {
1680 .capable = arm_smmu_capable,
1681 .domain_alloc = arm_smmu_domain_alloc,
1682 .domain_free = arm_smmu_domain_free,
1683 .attach_dev = arm_smmu_attach_dev,
1684 .map = arm_smmu_map,
1685 .unmap = arm_smmu_unmap,
1686 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1687 .iotlb_sync = arm_smmu_iotlb_sync,
1688 .iova_to_phys = arm_smmu_iova_to_phys,
1689 .add_device = arm_smmu_add_device,
1690 .remove_device = arm_smmu_remove_device,
1691 .device_group = arm_smmu_device_group,
1692 .domain_get_attr = arm_smmu_domain_get_attr,
1693 .domain_set_attr = arm_smmu_domain_set_attr,
1694 .of_xlate = arm_smmu_of_xlate,
1695 .get_resv_regions = arm_smmu_get_resv_regions,
1696 .put_resv_regions = arm_smmu_put_resv_regions,
1697 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1700 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1702 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1706 /* clear global FSR */
1707 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1708 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1711 * Reset stream mapping groups: Initial values mark all SMRn as
1712 * invalid and all S2CRn as bypass unless overridden.
1714 for (i = 0; i < smmu->num_mapping_groups; ++i)
1715 arm_smmu_write_sme(smmu, i);
1717 if (smmu->model == ARM_MMU500) {
1719 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1720 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1721 * bit is only present in MMU-500r2 onwards.
1723 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1724 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1725 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1727 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1729 * Allow unmatched Stream IDs to allocate bypass
1730 * TLB entries for reduced latency.
1732 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1733 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1736 /* Make sure all context banks are disabled and clear CB_FSR */
1737 for (i = 0; i < smmu->num_context_banks; ++i) {
1738 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1740 arm_smmu_write_context_bank(smmu, i);
1741 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1743 * Disable MMU-500's not-particularly-beneficial next-page
1744 * prefetcher for the sake of errata #841119 and #826419.
1746 if (smmu->model == ARM_MMU500) {
1747 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1748 reg &= ~ARM_MMU500_ACTLR_CPRE;
1749 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1753 /* Invalidate the TLB, just in case */
1754 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1755 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1757 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1759 /* Enable fault reporting */
1760 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1762 /* Disable TLB broadcasting. */
1763 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1765 /* Enable client access, handling unmatched streams as appropriate */
1766 reg &= ~sCR0_CLIENTPD;
1770 reg &= ~sCR0_USFCFG;
1772 /* Disable forced broadcasting */
1775 /* Don't upgrade barriers */
1776 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1778 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1779 reg |= sCR0_VMID16EN;
1781 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1782 reg |= sCR0_EXIDENABLE;
1784 /* Push the button */
1785 arm_smmu_tlb_sync_global(smmu);
1786 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1789 static int arm_smmu_id_size_to_bits(int size)
1808 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1811 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1813 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1816 dev_notice(smmu->dev, "probing hardware configuration...\n");
1817 dev_notice(smmu->dev, "SMMUv%d with:\n",
1818 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1821 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1823 /* Restrict available stages based on module parameter */
1824 if (force_stage == 1)
1825 id &= ~(ID0_S2TS | ID0_NTS);
1826 else if (force_stage == 2)
1827 id &= ~(ID0_S1TS | ID0_NTS);
1829 if (id & ID0_S1TS) {
1830 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1831 dev_notice(smmu->dev, "\tstage 1 translation\n");
1834 if (id & ID0_S2TS) {
1835 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1836 dev_notice(smmu->dev, "\tstage 2 translation\n");
1840 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1841 dev_notice(smmu->dev, "\tnested translation\n");
1844 if (!(smmu->features &
1845 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1846 dev_err(smmu->dev, "\tno translation support!\n");
1850 if ((id & ID0_S1TS) &&
1851 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1852 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1853 dev_notice(smmu->dev, "\taddress translation ops\n");
1857 * In order for DMA API calls to work properly, we must defer to what
1858 * the FW says about coherency, regardless of what the hardware claims.
1859 * Fortunately, this also opens up a workaround for systems where the
1860 * ID register value has ended up configured incorrectly.
1862 cttw_reg = !!(id & ID0_CTTW);
1863 if (cttw_fw || cttw_reg)
1864 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1865 cttw_fw ? "" : "non-");
1866 if (cttw_fw != cttw_reg)
1867 dev_notice(smmu->dev,
1868 "\t(IDR0.CTTW overridden by FW configuration)\n");
1870 /* Max. number of entries we have for stream matching/indexing */
1871 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1872 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1875 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1877 smmu->streamid_mask = size - 1;
1879 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1880 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1883 "stream-matching supported, but no SMRs present!\n");
1887 /* Zero-initialised to mark as invalid */
1888 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1893 dev_notice(smmu->dev,
1894 "\tstream matching with %lu register groups", size);
1896 /* s2cr->type == 0 means translation, so initialise explicitly */
1897 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1901 for (i = 0; i < size; i++)
1902 smmu->s2crs[i] = s2cr_init_val;
1904 smmu->num_mapping_groups = size;
1905 mutex_init(&smmu->stream_map_mutex);
1906 spin_lock_init(&smmu->global_sync_lock);
1908 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1909 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1910 if (!(id & ID0_PTFS_NO_AARCH32S))
1911 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1915 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1916 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1918 /* Check for size mismatch of SMMU address space from mapped region */
1919 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1920 size <<= smmu->pgshift;
1921 if (smmu->cb_base != gr0_base + size)
1923 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1924 size * 2, (smmu->cb_base - gr0_base) * 2);
1926 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1927 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1928 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1929 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1932 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1933 smmu->num_context_banks, smmu->num_s2_context_banks);
1935 * Cavium CN88xx erratum #27704.
1936 * Ensure ASID and VMID allocation is unique across all SMMUs in
1939 if (smmu->model == CAVIUM_SMMUV2) {
1940 smmu->cavium_id_base =
1941 atomic_add_return(smmu->num_context_banks,
1942 &cavium_smmu_context_count);
1943 smmu->cavium_id_base -= smmu->num_context_banks;
1944 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1946 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1947 sizeof(*smmu->cbs), GFP_KERNEL);
1952 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1953 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1954 smmu->ipa_size = size;
1956 /* The output mask is also applied for bypass */
1957 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1958 smmu->pa_size = size;
1960 if (id & ID2_VMID16)
1961 smmu->features |= ARM_SMMU_FEAT_VMID16;
1964 * What the page table walker can address actually depends on which
1965 * descriptor format is in use, but since a) we don't know that yet,
1966 * and b) it can vary per context bank, this will have to do...
1968 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1970 "failed to set DMA mask for table walker\n");
1972 if (smmu->version < ARM_SMMU_V2) {
1973 smmu->va_size = smmu->ipa_size;
1974 if (smmu->version == ARM_SMMU_V1_64K)
1975 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1977 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1978 smmu->va_size = arm_smmu_id_size_to_bits(size);
1979 if (id & ID2_PTFS_4K)
1980 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1981 if (id & ID2_PTFS_16K)
1982 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1983 if (id & ID2_PTFS_64K)
1984 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1987 /* Now we've corralled the various formats, what'll it do? */
1988 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1989 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1990 if (smmu->features &
1991 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1992 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1993 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1994 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1995 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1996 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1998 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1999 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2001 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2002 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
2003 smmu->pgsize_bitmap);
2006 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
2007 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
2008 smmu->va_size, smmu->ipa_size);
2010 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
2011 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
2012 smmu->ipa_size, smmu->pa_size);
2017 struct arm_smmu_match_data {
2018 enum arm_smmu_arch_version version;
2019 enum arm_smmu_implementation model;
2022 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
2023 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
2025 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
2026 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
2027 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
2028 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
2029 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
2030 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
2032 static const struct of_device_id arm_smmu_of_match[] = {
2033 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
2034 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
2035 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
2036 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
2037 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
2038 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
2039 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
2044 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
2049 case ACPI_IORT_SMMU_V1:
2050 case ACPI_IORT_SMMU_CORELINK_MMU400:
2051 smmu->version = ARM_SMMU_V1;
2052 smmu->model = GENERIC_SMMU;
2054 case ACPI_IORT_SMMU_CORELINK_MMU401:
2055 smmu->version = ARM_SMMU_V1_64K;
2056 smmu->model = GENERIC_SMMU;
2058 case ACPI_IORT_SMMU_V2:
2059 smmu->version = ARM_SMMU_V2;
2060 smmu->model = GENERIC_SMMU;
2062 case ACPI_IORT_SMMU_CORELINK_MMU500:
2063 smmu->version = ARM_SMMU_V2;
2064 smmu->model = ARM_MMU500;
2066 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
2067 smmu->version = ARM_SMMU_V2;
2068 smmu->model = CAVIUM_SMMUV2;
2077 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2078 struct arm_smmu_device *smmu)
2080 struct device *dev = smmu->dev;
2081 struct acpi_iort_node *node =
2082 *(struct acpi_iort_node **)dev_get_platdata(dev);
2083 struct acpi_iort_smmu *iort_smmu;
2086 /* Retrieve SMMU1/2 specific data */
2087 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2089 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2093 /* Ignore the configuration access interrupt */
2094 smmu->num_global_irqs = 1;
2096 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2097 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2102 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2103 struct arm_smmu_device *smmu)
2109 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2110 struct arm_smmu_device *smmu)
2112 const struct arm_smmu_match_data *data;
2113 struct device *dev = &pdev->dev;
2114 bool legacy_binding;
2116 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2117 &smmu->num_global_irqs)) {
2118 dev_err(dev, "missing #global-interrupts property\n");
2122 data = of_device_get_match_data(dev);
2123 smmu->version = data->version;
2124 smmu->model = data->model;
2126 parse_driver_options(smmu);
2128 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2129 if (legacy_binding && !using_generic_binding) {
2130 if (!using_legacy_binding)
2131 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2132 using_legacy_binding = true;
2133 } else if (!legacy_binding && !using_legacy_binding) {
2134 using_generic_binding = true;
2136 dev_err(dev, "not probing due to mismatched DT properties\n");
2140 if (of_dma_is_coherent(dev->of_node))
2141 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2146 static void arm_smmu_bus_init(void)
2148 /* Oh, for a proper bus abstraction */
2149 if (!iommu_present(&platform_bus_type))
2150 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2151 #ifdef CONFIG_ARM_AMBA
2152 if (!iommu_present(&amba_bustype))
2153 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2156 if (!iommu_present(&pci_bus_type)) {
2158 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2161 #ifdef CONFIG_FSL_MC_BUS
2162 if (!iommu_present(&fsl_mc_bus_type))
2163 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2167 static int arm_smmu_device_probe(struct platform_device *pdev)
2169 struct resource *res;
2170 resource_size_t ioaddr;
2171 struct arm_smmu_device *smmu;
2172 struct device *dev = &pdev->dev;
2173 int num_irqs, i, err;
2175 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2177 dev_err(dev, "failed to allocate arm_smmu_device\n");
2183 err = arm_smmu_device_dt_probe(pdev, smmu);
2185 err = arm_smmu_device_acpi_probe(pdev, smmu);
2190 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2191 ioaddr = res->start;
2192 smmu->base = devm_ioremap_resource(dev, res);
2193 if (IS_ERR(smmu->base))
2194 return PTR_ERR(smmu->base);
2195 smmu->cb_base = smmu->base + resource_size(res) / 2;
2198 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2200 if (num_irqs > smmu->num_global_irqs)
2201 smmu->num_context_irqs++;
2204 if (!smmu->num_context_irqs) {
2205 dev_err(dev, "found %d interrupts but expected at least %d\n",
2206 num_irqs, smmu->num_global_irqs + 1);
2210 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2213 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2217 for (i = 0; i < num_irqs; ++i) {
2218 int irq = platform_get_irq(pdev, i);
2221 dev_err(dev, "failed to get irq index %d\n", i);
2224 smmu->irqs[i] = irq;
2227 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2229 dev_err(dev, "failed to get clocks %d\n", err);
2232 smmu->num_clks = err;
2234 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2238 err = arm_smmu_device_cfg_probe(smmu);
2242 if (smmu->version == ARM_SMMU_V2) {
2243 if (smmu->num_context_banks > smmu->num_context_irqs) {
2245 "found only %d context irq(s) but %d required\n",
2246 smmu->num_context_irqs, smmu->num_context_banks);
2250 /* Ignore superfluous interrupts */
2251 smmu->num_context_irqs = smmu->num_context_banks;
2254 for (i = 0; i < smmu->num_global_irqs; ++i) {
2255 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2256 arm_smmu_global_fault,
2258 "arm-smmu global fault",
2261 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2267 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2268 "smmu.%pa", &ioaddr);
2270 dev_err(dev, "Failed to register iommu in sysfs\n");
2274 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2275 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2277 err = iommu_device_register(&smmu->iommu);
2279 dev_err(dev, "Failed to register iommu\n");
2283 platform_set_drvdata(pdev, smmu);
2284 arm_smmu_device_reset(smmu);
2285 arm_smmu_test_smr_masks(smmu);
2288 * We want to avoid touching dev->power.lock in fastpaths unless
2289 * it's really going to do something useful - pm_runtime_enabled()
2290 * can serve as an ideal proxy for that decision. So, conditionally
2291 * enable pm_runtime.
2293 if (dev->pm_domain) {
2294 pm_runtime_set_active(dev);
2295 pm_runtime_enable(dev);
2299 * For ACPI and generic DT bindings, an SMMU will be probed before
2300 * any device which might need it, so we want the bus ops in place
2301 * ready to handle default domain setup as soon as any SMMU exists.
2303 if (!using_legacy_binding)
2304 arm_smmu_bus_init();
2310 * With the legacy DT binding in play, though, we have no guarantees about
2311 * probe order, but then we're also not doing default domains, so we can
2312 * delay setting bus ops until we're sure every possible SMMU is ready,
2313 * and that way ensure that no add_device() calls get missed.
2315 static int arm_smmu_legacy_bus_init(void)
2317 if (using_legacy_binding)
2318 arm_smmu_bus_init();
2321 device_initcall_sync(arm_smmu_legacy_bus_init);
2323 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2325 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2330 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2331 dev_err(&pdev->dev, "removing device with active domains!\n");
2333 arm_smmu_rpm_get(smmu);
2334 /* Turn the thing off */
2335 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2336 arm_smmu_rpm_put(smmu);
2338 if (pm_runtime_enabled(smmu->dev))
2339 pm_runtime_force_suspend(smmu->dev);
2341 clk_bulk_disable(smmu->num_clks, smmu->clks);
2343 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2346 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2348 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2351 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2355 arm_smmu_device_reset(smmu);
2360 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2362 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2364 clk_bulk_disable(smmu->num_clks, smmu->clks);
2369 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2371 if (pm_runtime_suspended(dev))
2374 return arm_smmu_runtime_resume(dev);
2377 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2379 if (pm_runtime_suspended(dev))
2382 return arm_smmu_runtime_suspend(dev);
2385 static const struct dev_pm_ops arm_smmu_pm_ops = {
2386 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2387 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2388 arm_smmu_runtime_resume, NULL)
2391 static struct platform_driver arm_smmu_driver = {
2394 .of_match_table = of_match_ptr(arm_smmu_of_match),
2395 .pm = &arm_smmu_pm_ops,
2396 .suppress_bind_attrs = true,
2398 .probe = arm_smmu_device_probe,
2399 .shutdown = arm_smmu_device_shutdown,
2401 builtin_platform_driver(arm_smmu_driver);