Merge tag 'for-5.6/io_uring-vfs-2020-01-29' of git://git.kernel.dk/linux-block
[linux-2.6-microblaze.git] / drivers / iommu / arm-smmu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *      - SMMUv1 and v2 implementations
11  *      - Stream-matching and stream-indexing
12  *      - v7/v8 long-descriptor format
13  *      - Non-secure access to the SMMU
14  *      - Context fault reporting
15  *      - Extended Stream ID (16 bit)
16  */
17
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
28 #include <linux/io.h>
29 #include <linux/iopoll.h>
30 #include <linux/init.h>
31 #include <linux/moduleparam.h>
32 #include <linux/of.h>
33 #include <linux/of_address.h>
34 #include <linux/of_device.h>
35 #include <linux/of_iommu.h>
36 #include <linux/pci.h>
37 #include <linux/platform_device.h>
38 #include <linux/pm_runtime.h>
39 #include <linux/ratelimit.h>
40 #include <linux/slab.h>
41
42 #include <linux/amba/bus.h>
43 #include <linux/fsl/mc.h>
44
45 #include "arm-smmu.h"
46
47 /*
48  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
49  * global register space are still, in fact, using a hypervisor to mediate it
50  * by trapping and emulating register accesses. Sadly, some deployed versions
51  * of said trapping code have bugs wherein they go horribly wrong for stores
52  * using r31 (i.e. XZR/WZR) as the source register.
53  */
54 #define QCOM_DUMMY_VAL -1
55
56 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
57 #define TLB_SPIN_COUNT                  10
58
59 #define MSI_IOVA_BASE                   0x8000000
60 #define MSI_IOVA_LENGTH                 0x100000
61
62 static int force_stage;
63 /*
64  * not really modular, but the easiest way to keep compat with existing
65  * bootargs behaviour is to continue using module_param() here.
66  */
67 module_param(force_stage, int, S_IRUGO);
68 MODULE_PARM_DESC(force_stage,
69         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
70 static bool disable_bypass =
71         IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
72 module_param(disable_bypass, bool, S_IRUGO);
73 MODULE_PARM_DESC(disable_bypass,
74         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
75
76 struct arm_smmu_s2cr {
77         struct iommu_group              *group;
78         int                             count;
79         enum arm_smmu_s2cr_type         type;
80         enum arm_smmu_s2cr_privcfg      privcfg;
81         u8                              cbndx;
82 };
83
84 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
85         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
86 }
87
88 struct arm_smmu_smr {
89         u16                             mask;
90         u16                             id;
91         bool                            valid;
92 };
93
94 struct arm_smmu_cb {
95         u64                             ttbr[2];
96         u32                             tcr[2];
97         u32                             mair[2];
98         struct arm_smmu_cfg             *cfg;
99 };
100
101 struct arm_smmu_master_cfg {
102         struct arm_smmu_device          *smmu;
103         s16                             smendx[];
104 };
105 #define INVALID_SMENDX                  -1
106 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
107 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
108 #define fwspec_smendx(fw, i) \
109         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
110 #define for_each_cfg_sme(fw, i, idx) \
111         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
112
113 static bool using_legacy_binding, using_generic_binding;
114
115 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
116 {
117         if (pm_runtime_enabled(smmu->dev))
118                 return pm_runtime_get_sync(smmu->dev);
119
120         return 0;
121 }
122
123 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
124 {
125         if (pm_runtime_enabled(smmu->dev))
126                 pm_runtime_put_autosuspend(smmu->dev);
127 }
128
129 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
130 {
131         return container_of(dom, struct arm_smmu_domain, domain);
132 }
133
134 static struct device_node *dev_get_dev_node(struct device *dev)
135 {
136         if (dev_is_pci(dev)) {
137                 struct pci_bus *bus = to_pci_dev(dev)->bus;
138
139                 while (!pci_is_root_bus(bus))
140                         bus = bus->parent;
141                 return of_node_get(bus->bridge->parent->of_node);
142         }
143
144         return of_node_get(dev->of_node);
145 }
146
147 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
148 {
149         *((__be32 *)data) = cpu_to_be32(alias);
150         return 0; /* Continue walking */
151 }
152
153 static int __find_legacy_master_phandle(struct device *dev, void *data)
154 {
155         struct of_phandle_iterator *it = *(void **)data;
156         struct device_node *np = it->node;
157         int err;
158
159         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
160                             "#stream-id-cells", -1)
161                 if (it->node == np) {
162                         *(void **)data = dev;
163                         return 1;
164                 }
165         it->node = np;
166         return err == -ENOENT ? 0 : err;
167 }
168
169 static struct platform_driver arm_smmu_driver;
170 static struct iommu_ops arm_smmu_ops;
171
172 static int arm_smmu_register_legacy_master(struct device *dev,
173                                            struct arm_smmu_device **smmu)
174 {
175         struct device *smmu_dev;
176         struct device_node *np;
177         struct of_phandle_iterator it;
178         void *data = &it;
179         u32 *sids;
180         __be32 pci_sid;
181         int err;
182
183         np = dev_get_dev_node(dev);
184         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
185                 of_node_put(np);
186                 return -ENODEV;
187         }
188
189         it.node = np;
190         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
191                                      __find_legacy_master_phandle);
192         smmu_dev = data;
193         of_node_put(np);
194         if (err == 0)
195                 return -ENODEV;
196         if (err < 0)
197                 return err;
198
199         if (dev_is_pci(dev)) {
200                 /* "mmu-masters" assumes Stream ID == Requester ID */
201                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
202                                        &pci_sid);
203                 it.cur = &pci_sid;
204                 it.cur_count = 1;
205         }
206
207         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
208                                 &arm_smmu_ops);
209         if (err)
210                 return err;
211
212         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
213         if (!sids)
214                 return -ENOMEM;
215
216         *smmu = dev_get_drvdata(smmu_dev);
217         of_phandle_iterator_args(&it, sids, it.cur_count);
218         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
219         kfree(sids);
220         return err;
221 }
222
223 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
224 {
225         int idx;
226
227         do {
228                 idx = find_next_zero_bit(map, end, start);
229                 if (idx == end)
230                         return -ENOSPC;
231         } while (test_and_set_bit(idx, map));
232
233         return idx;
234 }
235
236 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
237 {
238         clear_bit(idx, map);
239 }
240
241 /* Wait for any pending TLB invalidations to complete */
242 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
243                                 int sync, int status)
244 {
245         unsigned int spin_cnt, delay;
246         u32 reg;
247
248         if (smmu->impl && unlikely(smmu->impl->tlb_sync))
249                 return smmu->impl->tlb_sync(smmu, page, sync, status);
250
251         arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
252         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
253                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
254                         reg = arm_smmu_readl(smmu, page, status);
255                         if (!(reg & sTLBGSTATUS_GSACTIVE))
256                                 return;
257                         cpu_relax();
258                 }
259                 udelay(delay);
260         }
261         dev_err_ratelimited(smmu->dev,
262                             "TLB sync timed out -- SMMU may be deadlocked\n");
263 }
264
265 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
266 {
267         unsigned long flags;
268
269         spin_lock_irqsave(&smmu->global_sync_lock, flags);
270         __arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
271                             ARM_SMMU_GR0_sTLBGSTATUS);
272         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
273 }
274
275 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
276 {
277         struct arm_smmu_device *smmu = smmu_domain->smmu;
278         unsigned long flags;
279
280         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
281         __arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
282                             ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
283         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
284 }
285
286 static void arm_smmu_tlb_inv_context_s1(void *cookie)
287 {
288         struct arm_smmu_domain *smmu_domain = cookie;
289         /*
290          * The TLBI write may be relaxed, so ensure that PTEs cleared by the
291          * current CPU are visible beforehand.
292          */
293         wmb();
294         arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
295                           ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
296         arm_smmu_tlb_sync_context(smmu_domain);
297 }
298
299 static void arm_smmu_tlb_inv_context_s2(void *cookie)
300 {
301         struct arm_smmu_domain *smmu_domain = cookie;
302         struct arm_smmu_device *smmu = smmu_domain->smmu;
303
304         /* See above */
305         wmb();
306         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
307         arm_smmu_tlb_sync_global(smmu);
308 }
309
310 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
311                                       size_t granule, void *cookie, int reg)
312 {
313         struct arm_smmu_domain *smmu_domain = cookie;
314         struct arm_smmu_device *smmu = smmu_domain->smmu;
315         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
316         int idx = cfg->cbndx;
317
318         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
319                 wmb();
320
321         if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
322                 iova = (iova >> 12) << 12;
323                 iova |= cfg->asid;
324                 do {
325                         arm_smmu_cb_write(smmu, idx, reg, iova);
326                         iova += granule;
327                 } while (size -= granule);
328         } else {
329                 iova >>= 12;
330                 iova |= (u64)cfg->asid << 48;
331                 do {
332                         arm_smmu_cb_writeq(smmu, idx, reg, iova);
333                         iova += granule >> 12;
334                 } while (size -= granule);
335         }
336 }
337
338 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
339                                       size_t granule, void *cookie, int reg)
340 {
341         struct arm_smmu_domain *smmu_domain = cookie;
342         struct arm_smmu_device *smmu = smmu_domain->smmu;
343         int idx = smmu_domain->cfg.cbndx;
344
345         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
346                 wmb();
347
348         iova >>= 12;
349         do {
350                 if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
351                         arm_smmu_cb_writeq(smmu, idx, reg, iova);
352                 else
353                         arm_smmu_cb_write(smmu, idx, reg, iova);
354                 iova += granule >> 12;
355         } while (size -= granule);
356 }
357
358 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
359                                      size_t granule, void *cookie)
360 {
361         arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
362                                   ARM_SMMU_CB_S1_TLBIVA);
363         arm_smmu_tlb_sync_context(cookie);
364 }
365
366 static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova, size_t size,
367                                      size_t granule, void *cookie)
368 {
369         arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
370                                   ARM_SMMU_CB_S1_TLBIVAL);
371         arm_smmu_tlb_sync_context(cookie);
372 }
373
374 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
375                                      unsigned long iova, size_t granule,
376                                      void *cookie)
377 {
378         arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
379                                   ARM_SMMU_CB_S1_TLBIVAL);
380 }
381
382 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
383                                      size_t granule, void *cookie)
384 {
385         arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
386                                   ARM_SMMU_CB_S2_TLBIIPAS2);
387         arm_smmu_tlb_sync_context(cookie);
388 }
389
390 static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova, size_t size,
391                                      size_t granule, void *cookie)
392 {
393         arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
394                                   ARM_SMMU_CB_S2_TLBIIPAS2L);
395         arm_smmu_tlb_sync_context(cookie);
396 }
397
398 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
399                                      unsigned long iova, size_t granule,
400                                      void *cookie)
401 {
402         arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
403                                   ARM_SMMU_CB_S2_TLBIIPAS2L);
404 }
405
406 static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova, size_t size,
407                                        size_t granule, void *cookie)
408 {
409         arm_smmu_tlb_inv_context_s2(cookie);
410 }
411 /*
412  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
413  * almost negligible, but the benefit of getting the first one in as far ahead
414  * of the sync as possible is significant, hence we don't just make this a
415  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
416  * think.
417  */
418 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
419                                         unsigned long iova, size_t granule,
420                                         void *cookie)
421 {
422         struct arm_smmu_domain *smmu_domain = cookie;
423         struct arm_smmu_device *smmu = smmu_domain->smmu;
424
425         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
426                 wmb();
427
428         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
429 }
430
431 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
432         .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
433         .tlb_flush_walk = arm_smmu_tlb_inv_walk_s1,
434         .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s1,
435         .tlb_add_page   = arm_smmu_tlb_add_page_s1,
436 };
437
438 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
439         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
440         .tlb_flush_walk = arm_smmu_tlb_inv_walk_s2,
441         .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s2,
442         .tlb_add_page   = arm_smmu_tlb_add_page_s2,
443 };
444
445 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
446         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
447         .tlb_flush_walk = arm_smmu_tlb_inv_any_s2_v1,
448         .tlb_flush_leaf = arm_smmu_tlb_inv_any_s2_v1,
449         .tlb_add_page   = arm_smmu_tlb_add_page_s2_v1,
450 };
451
452 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
453 {
454         u32 fsr, fsynr, cbfrsynra;
455         unsigned long iova;
456         struct iommu_domain *domain = dev;
457         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
458         struct arm_smmu_device *smmu = smmu_domain->smmu;
459         int idx = smmu_domain->cfg.cbndx;
460
461         fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
462         if (!(fsr & FSR_FAULT))
463                 return IRQ_NONE;
464
465         fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
466         iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
467         cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
468
469         dev_err_ratelimited(smmu->dev,
470         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
471                             fsr, iova, fsynr, cbfrsynra, idx);
472
473         arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
474         return IRQ_HANDLED;
475 }
476
477 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
478 {
479         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
480         struct arm_smmu_device *smmu = dev;
481         static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
482                                       DEFAULT_RATELIMIT_BURST);
483
484         gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
485         gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
486         gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
487         gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
488
489         if (!gfsr)
490                 return IRQ_NONE;
491
492         if (__ratelimit(&rs)) {
493                 if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
494                     (gfsr & sGFSR_USF))
495                         dev_err(smmu->dev,
496                                 "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
497                                 (u16)gfsynr1);
498                 else
499                         dev_err(smmu->dev,
500                                 "Unexpected global fault, this could be serious\n");
501                 dev_err(smmu->dev,
502                         "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
503                         gfsr, gfsynr0, gfsynr1, gfsynr2);
504         }
505
506         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
507         return IRQ_HANDLED;
508 }
509
510 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
511                                        struct io_pgtable_cfg *pgtbl_cfg)
512 {
513         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
514         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
515         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
516
517         cb->cfg = cfg;
518
519         /* TCR */
520         if (stage1) {
521                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
522                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
523                 } else {
524                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
525                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
526                         cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM);
527                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
528                                 cb->tcr[1] |= TCR2_AS;
529                 }
530         } else {
531                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
532         }
533
534         /* TTBRs */
535         if (stage1) {
536                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
537                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
538                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
539                 } else {
540                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
541                         cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
542                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
543                         cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
544                 }
545         } else {
546                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
547         }
548
549         /* MAIRs (stage-1 only) */
550         if (stage1) {
551                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
552                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
553                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
554                 } else {
555                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
556                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
557                 }
558         }
559 }
560
561 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
562 {
563         u32 reg;
564         bool stage1;
565         struct arm_smmu_cb *cb = &smmu->cbs[idx];
566         struct arm_smmu_cfg *cfg = cb->cfg;
567
568         /* Unassigned context banks only need disabling */
569         if (!cfg) {
570                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
571                 return;
572         }
573
574         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
575
576         /* CBA2R */
577         if (smmu->version > ARM_SMMU_V1) {
578                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
579                         reg = CBA2R_VA64;
580                 else
581                         reg = 0;
582                 /* 16-bit VMIDs live in CBA2R */
583                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
584                         reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
585
586                 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
587         }
588
589         /* CBAR */
590         reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
591         if (smmu->version < ARM_SMMU_V2)
592                 reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
593
594         /*
595          * Use the weakest shareability/memory types, so they are
596          * overridden by the ttbcr/pte.
597          */
598         if (stage1) {
599                 reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
600                         FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
601         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
602                 /* 8-bit VMIDs live in CBAR */
603                 reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
604         }
605         arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
606
607         /*
608          * TCR
609          * We must write this before the TTBRs, since it determines the
610          * access behaviour of some fields (in particular, ASID[15:8]).
611          */
612         if (stage1 && smmu->version > ARM_SMMU_V1)
613                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
614         arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
615
616         /* TTBRs */
617         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
618                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
619                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
620                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
621         } else {
622                 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
623                 if (stage1)
624                         arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
625                                            cb->ttbr[1]);
626         }
627
628         /* MAIRs (stage-1 only) */
629         if (stage1) {
630                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
631                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
632         }
633
634         /* SCTLR */
635         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
636         if (stage1)
637                 reg |= SCTLR_S1_ASIDPNE;
638         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
639                 reg |= SCTLR_E;
640
641         arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
642 }
643
644 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
645                                         struct arm_smmu_device *smmu)
646 {
647         int irq, start, ret = 0;
648         unsigned long ias, oas;
649         struct io_pgtable_ops *pgtbl_ops;
650         struct io_pgtable_cfg pgtbl_cfg;
651         enum io_pgtable_fmt fmt;
652         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
653         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
654
655         mutex_lock(&smmu_domain->init_mutex);
656         if (smmu_domain->smmu)
657                 goto out_unlock;
658
659         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
660                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
661                 smmu_domain->smmu = smmu;
662                 goto out_unlock;
663         }
664
665         /*
666          * Mapping the requested stage onto what we support is surprisingly
667          * complicated, mainly because the spec allows S1+S2 SMMUs without
668          * support for nested translation. That means we end up with the
669          * following table:
670          *
671          * Requested        Supported        Actual
672          *     S1               N              S1
673          *     S1             S1+S2            S1
674          *     S1               S2             S2
675          *     S1               S1             S1
676          *     N                N              N
677          *     N              S1+S2            S2
678          *     N                S2             S2
679          *     N                S1             S1
680          *
681          * Note that you can't actually request stage-2 mappings.
682          */
683         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
684                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
685         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
686                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
687
688         /*
689          * Choosing a suitable context format is even more fiddly. Until we
690          * grow some way for the caller to express a preference, and/or move
691          * the decision into the io-pgtable code where it arguably belongs,
692          * just aim for the closest thing to the rest of the system, and hope
693          * that the hardware isn't esoteric enough that we can't assume AArch64
694          * support to be a superset of AArch32 support...
695          */
696         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
697                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
698         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
699             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
700             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
701             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
702                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
703         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
704             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
705                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
706                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
707                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
708
709         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
710                 ret = -EINVAL;
711                 goto out_unlock;
712         }
713
714         switch (smmu_domain->stage) {
715         case ARM_SMMU_DOMAIN_S1:
716                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
717                 start = smmu->num_s2_context_banks;
718                 ias = smmu->va_size;
719                 oas = smmu->ipa_size;
720                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
721                         fmt = ARM_64_LPAE_S1;
722                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
723                         fmt = ARM_32_LPAE_S1;
724                         ias = min(ias, 32UL);
725                         oas = min(oas, 40UL);
726                 } else {
727                         fmt = ARM_V7S;
728                         ias = min(ias, 32UL);
729                         oas = min(oas, 32UL);
730                 }
731                 smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
732                 break;
733         case ARM_SMMU_DOMAIN_NESTED:
734                 /*
735                  * We will likely want to change this if/when KVM gets
736                  * involved.
737                  */
738         case ARM_SMMU_DOMAIN_S2:
739                 cfg->cbar = CBAR_TYPE_S2_TRANS;
740                 start = 0;
741                 ias = smmu->ipa_size;
742                 oas = smmu->pa_size;
743                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
744                         fmt = ARM_64_LPAE_S2;
745                 } else {
746                         fmt = ARM_32_LPAE_S2;
747                         ias = min(ias, 40UL);
748                         oas = min(oas, 40UL);
749                 }
750                 if (smmu->version == ARM_SMMU_V2)
751                         smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
752                 else
753                         smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
754                 break;
755         default:
756                 ret = -EINVAL;
757                 goto out_unlock;
758         }
759         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
760                                       smmu->num_context_banks);
761         if (ret < 0)
762                 goto out_unlock;
763
764         cfg->cbndx = ret;
765         if (smmu->version < ARM_SMMU_V2) {
766                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
767                 cfg->irptndx %= smmu->num_context_irqs;
768         } else {
769                 cfg->irptndx = cfg->cbndx;
770         }
771
772         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
773                 cfg->vmid = cfg->cbndx + 1;
774         else
775                 cfg->asid = cfg->cbndx;
776
777         smmu_domain->smmu = smmu;
778         if (smmu->impl && smmu->impl->init_context) {
779                 ret = smmu->impl->init_context(smmu_domain);
780                 if (ret)
781                         goto out_unlock;
782         }
783
784         pgtbl_cfg = (struct io_pgtable_cfg) {
785                 .pgsize_bitmap  = smmu->pgsize_bitmap,
786                 .ias            = ias,
787                 .oas            = oas,
788                 .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
789                 .tlb            = smmu_domain->flush_ops,
790                 .iommu_dev      = smmu->dev,
791         };
792
793         if (smmu_domain->non_strict)
794                 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
795
796         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
797         if (!pgtbl_ops) {
798                 ret = -ENOMEM;
799                 goto out_clear_smmu;
800         }
801
802         /* Update the domain's page sizes to reflect the page table format */
803         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
804         domain->geometry.aperture_end = (1UL << ias) - 1;
805         domain->geometry.force_aperture = true;
806
807         /* Initialise the context bank with our page table cfg */
808         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
809         arm_smmu_write_context_bank(smmu, cfg->cbndx);
810
811         /*
812          * Request context fault interrupt. Do this last to avoid the
813          * handler seeing a half-initialised domain state.
814          */
815         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
816         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
817                                IRQF_SHARED, "arm-smmu-context-fault", domain);
818         if (ret < 0) {
819                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
820                         cfg->irptndx, irq);
821                 cfg->irptndx = INVALID_IRPTNDX;
822         }
823
824         mutex_unlock(&smmu_domain->init_mutex);
825
826         /* Publish page table ops for map/unmap */
827         smmu_domain->pgtbl_ops = pgtbl_ops;
828         return 0;
829
830 out_clear_smmu:
831         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
832         smmu_domain->smmu = NULL;
833 out_unlock:
834         mutex_unlock(&smmu_domain->init_mutex);
835         return ret;
836 }
837
838 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
839 {
840         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
841         struct arm_smmu_device *smmu = smmu_domain->smmu;
842         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
843         int ret, irq;
844
845         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
846                 return;
847
848         ret = arm_smmu_rpm_get(smmu);
849         if (ret < 0)
850                 return;
851
852         /*
853          * Disable the context bank and free the page tables before freeing
854          * it.
855          */
856         smmu->cbs[cfg->cbndx].cfg = NULL;
857         arm_smmu_write_context_bank(smmu, cfg->cbndx);
858
859         if (cfg->irptndx != INVALID_IRPTNDX) {
860                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
861                 devm_free_irq(smmu->dev, irq, domain);
862         }
863
864         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
865         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
866
867         arm_smmu_rpm_put(smmu);
868 }
869
870 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
871 {
872         struct arm_smmu_domain *smmu_domain;
873
874         if (type != IOMMU_DOMAIN_UNMANAGED &&
875             type != IOMMU_DOMAIN_DMA &&
876             type != IOMMU_DOMAIN_IDENTITY)
877                 return NULL;
878         /*
879          * Allocate the domain and initialise some of its data structures.
880          * We can't really do anything meaningful until we've added a
881          * master.
882          */
883         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
884         if (!smmu_domain)
885                 return NULL;
886
887         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
888             iommu_get_dma_cookie(&smmu_domain->domain))) {
889                 kfree(smmu_domain);
890                 return NULL;
891         }
892
893         mutex_init(&smmu_domain->init_mutex);
894         spin_lock_init(&smmu_domain->cb_lock);
895
896         return &smmu_domain->domain;
897 }
898
899 static void arm_smmu_domain_free(struct iommu_domain *domain)
900 {
901         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
902
903         /*
904          * Free the domain resources. We assume that all devices have
905          * already been detached.
906          */
907         iommu_put_dma_cookie(domain);
908         arm_smmu_destroy_domain_context(domain);
909         kfree(smmu_domain);
910 }
911
912 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
913 {
914         struct arm_smmu_smr *smr = smmu->smrs + idx;
915         u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
916
917         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
918                 reg |= SMR_VALID;
919         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
920 }
921
922 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
923 {
924         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
925         u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
926                   FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
927                   FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
928
929         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
930             smmu->smrs[idx].valid)
931                 reg |= S2CR_EXIDVALID;
932         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
933 }
934
935 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
936 {
937         arm_smmu_write_s2cr(smmu, idx);
938         if (smmu->smrs)
939                 arm_smmu_write_smr(smmu, idx);
940 }
941
942 /*
943  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
944  * should be called after sCR0 is written.
945  */
946 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
947 {
948         u32 smr;
949
950         if (!smmu->smrs)
951                 return;
952
953         /*
954          * SMR.ID bits may not be preserved if the corresponding MASK
955          * bits are set, so check each one separately. We can reject
956          * masters later if they try to claim IDs outside these masks.
957          */
958         smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
959         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
960         smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
961         smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
962
963         smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
964         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
965         smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
966         smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
967 }
968
969 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
970 {
971         struct arm_smmu_smr *smrs = smmu->smrs;
972         int i, free_idx = -ENOSPC;
973
974         /* Stream indexing is blissfully easy */
975         if (!smrs)
976                 return id;
977
978         /* Validating SMRs is... less so */
979         for (i = 0; i < smmu->num_mapping_groups; ++i) {
980                 if (!smrs[i].valid) {
981                         /*
982                          * Note the first free entry we come across, which
983                          * we'll claim in the end if nothing else matches.
984                          */
985                         if (free_idx < 0)
986                                 free_idx = i;
987                         continue;
988                 }
989                 /*
990                  * If the new entry is _entirely_ matched by an existing entry,
991                  * then reuse that, with the guarantee that there also cannot
992                  * be any subsequent conflicting entries. In normal use we'd
993                  * expect simply identical entries for this case, but there's
994                  * no harm in accommodating the generalisation.
995                  */
996                 if ((mask & smrs[i].mask) == mask &&
997                     !((id ^ smrs[i].id) & ~smrs[i].mask))
998                         return i;
999                 /*
1000                  * If the new entry has any other overlap with an existing one,
1001                  * though, then there always exists at least one stream ID
1002                  * which would cause a conflict, and we can't allow that risk.
1003                  */
1004                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1005                         return -EINVAL;
1006         }
1007
1008         return free_idx;
1009 }
1010
1011 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1012 {
1013         if (--smmu->s2crs[idx].count)
1014                 return false;
1015
1016         smmu->s2crs[idx] = s2cr_init_val;
1017         if (smmu->smrs)
1018                 smmu->smrs[idx].valid = false;
1019
1020         return true;
1021 }
1022
1023 static int arm_smmu_master_alloc_smes(struct device *dev)
1024 {
1025         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1026         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1027         struct arm_smmu_device *smmu = cfg->smmu;
1028         struct arm_smmu_smr *smrs = smmu->smrs;
1029         struct iommu_group *group;
1030         int i, idx, ret;
1031
1032         mutex_lock(&smmu->stream_map_mutex);
1033         /* Figure out a viable stream map entry allocation */
1034         for_each_cfg_sme(fwspec, i, idx) {
1035                 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1036                 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1037
1038                 if (idx != INVALID_SMENDX) {
1039                         ret = -EEXIST;
1040                         goto out_err;
1041                 }
1042
1043                 ret = arm_smmu_find_sme(smmu, sid, mask);
1044                 if (ret < 0)
1045                         goto out_err;
1046
1047                 idx = ret;
1048                 if (smrs && smmu->s2crs[idx].count == 0) {
1049                         smrs[idx].id = sid;
1050                         smrs[idx].mask = mask;
1051                         smrs[idx].valid = true;
1052                 }
1053                 smmu->s2crs[idx].count++;
1054                 cfg->smendx[i] = (s16)idx;
1055         }
1056
1057         group = iommu_group_get_for_dev(dev);
1058         if (IS_ERR(group)) {
1059                 ret = PTR_ERR(group);
1060                 goto out_err;
1061         }
1062         iommu_group_put(group);
1063
1064         /* It worked! Now, poke the actual hardware */
1065         for_each_cfg_sme(fwspec, i, idx) {
1066                 arm_smmu_write_sme(smmu, idx);
1067                 smmu->s2crs[idx].group = group;
1068         }
1069
1070         mutex_unlock(&smmu->stream_map_mutex);
1071         return 0;
1072
1073 out_err:
1074         while (i--) {
1075                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1076                 cfg->smendx[i] = INVALID_SMENDX;
1077         }
1078         mutex_unlock(&smmu->stream_map_mutex);
1079         return ret;
1080 }
1081
1082 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1083 {
1084         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1085         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1086         int i, idx;
1087
1088         mutex_lock(&smmu->stream_map_mutex);
1089         for_each_cfg_sme(fwspec, i, idx) {
1090                 if (arm_smmu_free_sme(smmu, idx))
1091                         arm_smmu_write_sme(smmu, idx);
1092                 cfg->smendx[i] = INVALID_SMENDX;
1093         }
1094         mutex_unlock(&smmu->stream_map_mutex);
1095 }
1096
1097 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1098                                       struct iommu_fwspec *fwspec)
1099 {
1100         struct arm_smmu_device *smmu = smmu_domain->smmu;
1101         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1102         u8 cbndx = smmu_domain->cfg.cbndx;
1103         enum arm_smmu_s2cr_type type;
1104         int i, idx;
1105
1106         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1107                 type = S2CR_TYPE_BYPASS;
1108         else
1109                 type = S2CR_TYPE_TRANS;
1110
1111         for_each_cfg_sme(fwspec, i, idx) {
1112                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1113                         continue;
1114
1115                 s2cr[idx].type = type;
1116                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1117                 s2cr[idx].cbndx = cbndx;
1118                 arm_smmu_write_s2cr(smmu, idx);
1119         }
1120         return 0;
1121 }
1122
1123 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1124 {
1125         int ret;
1126         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1127         struct arm_smmu_device *smmu;
1128         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1129
1130         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1131                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1132                 return -ENXIO;
1133         }
1134
1135         /*
1136          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1137          * domains between of_xlate() and add_device() - we have no way to cope
1138          * with that, so until ARM gets converted to rely on groups and default
1139          * domains, just say no (but more politely than by dereferencing NULL).
1140          * This should be at least a WARN_ON once that's sorted.
1141          */
1142         if (!fwspec->iommu_priv)
1143                 return -ENODEV;
1144
1145         smmu = fwspec_smmu(fwspec);
1146
1147         ret = arm_smmu_rpm_get(smmu);
1148         if (ret < 0)
1149                 return ret;
1150
1151         /* Ensure that the domain is finalised */
1152         ret = arm_smmu_init_domain_context(domain, smmu);
1153         if (ret < 0)
1154                 goto rpm_put;
1155
1156         /*
1157          * Sanity check the domain. We don't support domains across
1158          * different SMMUs.
1159          */
1160         if (smmu_domain->smmu != smmu) {
1161                 dev_err(dev,
1162                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1163                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1164                 ret = -EINVAL;
1165                 goto rpm_put;
1166         }
1167
1168         /* Looks ok, so add the device to the domain */
1169         ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1170
1171         /*
1172          * Setup an autosuspend delay to avoid bouncing runpm state.
1173          * Otherwise, if a driver for a suspended consumer device
1174          * unmaps buffers, it will runpm resume/suspend for each one.
1175          *
1176          * For example, when used by a GPU device, when an application
1177          * or game exits, it can trigger unmapping 100s or 1000s of
1178          * buffers.  With a runpm cycle for each buffer, that adds up
1179          * to 5-10sec worth of reprogramming the context bank, while
1180          * the system appears to be locked up to the user.
1181          */
1182         pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1183         pm_runtime_use_autosuspend(smmu->dev);
1184
1185 rpm_put:
1186         arm_smmu_rpm_put(smmu);
1187         return ret;
1188 }
1189
1190 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1191                         phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
1192 {
1193         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1194         struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1195         int ret;
1196
1197         if (!ops)
1198                 return -ENODEV;
1199
1200         arm_smmu_rpm_get(smmu);
1201         ret = ops->map(ops, iova, paddr, size, prot);
1202         arm_smmu_rpm_put(smmu);
1203
1204         return ret;
1205 }
1206
1207 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1208                              size_t size, struct iommu_iotlb_gather *gather)
1209 {
1210         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1211         struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1212         size_t ret;
1213
1214         if (!ops)
1215                 return 0;
1216
1217         arm_smmu_rpm_get(smmu);
1218         ret = ops->unmap(ops, iova, size, gather);
1219         arm_smmu_rpm_put(smmu);
1220
1221         return ret;
1222 }
1223
1224 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1225 {
1226         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1227         struct arm_smmu_device *smmu = smmu_domain->smmu;
1228
1229         if (smmu_domain->flush_ops) {
1230                 arm_smmu_rpm_get(smmu);
1231                 smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1232                 arm_smmu_rpm_put(smmu);
1233         }
1234 }
1235
1236 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1237                                 struct iommu_iotlb_gather *gather)
1238 {
1239         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1240         struct arm_smmu_device *smmu = smmu_domain->smmu;
1241
1242         if (!smmu)
1243                 return;
1244
1245         arm_smmu_rpm_get(smmu);
1246         if (smmu->version == ARM_SMMU_V2 ||
1247             smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1248                 arm_smmu_tlb_sync_context(smmu_domain);
1249         else
1250                 arm_smmu_tlb_sync_global(smmu);
1251         arm_smmu_rpm_put(smmu);
1252 }
1253
1254 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1255                                               dma_addr_t iova)
1256 {
1257         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1258         struct arm_smmu_device *smmu = smmu_domain->smmu;
1259         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1260         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1261         struct device *dev = smmu->dev;
1262         void __iomem *reg;
1263         u32 tmp;
1264         u64 phys;
1265         unsigned long va, flags;
1266         int ret, idx = cfg->cbndx;
1267
1268         ret = arm_smmu_rpm_get(smmu);
1269         if (ret < 0)
1270                 return 0;
1271
1272         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1273         va = iova & ~0xfffUL;
1274         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1275                 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1276         else
1277                 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1278
1279         reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1280         if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) {
1281                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1282                 dev_err(dev,
1283                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1284                         &iova);
1285                 return ops->iova_to_phys(ops, iova);
1286         }
1287
1288         phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1289         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1290         if (phys & CB_PAR_F) {
1291                 dev_err(dev, "translation fault!\n");
1292                 dev_err(dev, "PAR = 0x%llx\n", phys);
1293                 return 0;
1294         }
1295
1296         arm_smmu_rpm_put(smmu);
1297
1298         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1299 }
1300
1301 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1302                                         dma_addr_t iova)
1303 {
1304         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1305         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1306
1307         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1308                 return iova;
1309
1310         if (!ops)
1311                 return 0;
1312
1313         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1314                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1315                 return arm_smmu_iova_to_phys_hard(domain, iova);
1316
1317         return ops->iova_to_phys(ops, iova);
1318 }
1319
1320 static bool arm_smmu_capable(enum iommu_cap cap)
1321 {
1322         switch (cap) {
1323         case IOMMU_CAP_CACHE_COHERENCY:
1324                 /*
1325                  * Return true here as the SMMU can always send out coherent
1326                  * requests.
1327                  */
1328                 return true;
1329         case IOMMU_CAP_NOEXEC:
1330                 return true;
1331         default:
1332                 return false;
1333         }
1334 }
1335
1336 static
1337 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1338 {
1339         struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1340                                                           fwnode);
1341         put_device(dev);
1342         return dev ? dev_get_drvdata(dev) : NULL;
1343 }
1344
1345 static int arm_smmu_add_device(struct device *dev)
1346 {
1347         struct arm_smmu_device *smmu;
1348         struct arm_smmu_master_cfg *cfg;
1349         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1350         int i, ret;
1351
1352         if (using_legacy_binding) {
1353                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1354
1355                 /*
1356                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1357                  * will allocate/initialise a new one. Thus we need to update fwspec for
1358                  * later use.
1359                  */
1360                 fwspec = dev_iommu_fwspec_get(dev);
1361                 if (ret)
1362                         goto out_free;
1363         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1364                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1365         } else {
1366                 return -ENODEV;
1367         }
1368
1369         ret = -EINVAL;
1370         for (i = 0; i < fwspec->num_ids; i++) {
1371                 u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
1372                 u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
1373
1374                 if (sid & ~smmu->streamid_mask) {
1375                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1376                                 sid, smmu->streamid_mask);
1377                         goto out_free;
1378                 }
1379                 if (mask & ~smmu->smr_mask_mask) {
1380                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1381                                 mask, smmu->smr_mask_mask);
1382                         goto out_free;
1383                 }
1384         }
1385
1386         ret = -ENOMEM;
1387         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1388                       GFP_KERNEL);
1389         if (!cfg)
1390                 goto out_free;
1391
1392         cfg->smmu = smmu;
1393         fwspec->iommu_priv = cfg;
1394         while (i--)
1395                 cfg->smendx[i] = INVALID_SMENDX;
1396
1397         ret = arm_smmu_rpm_get(smmu);
1398         if (ret < 0)
1399                 goto out_cfg_free;
1400
1401         ret = arm_smmu_master_alloc_smes(dev);
1402         arm_smmu_rpm_put(smmu);
1403
1404         if (ret)
1405                 goto out_cfg_free;
1406
1407         iommu_device_link(&smmu->iommu, dev);
1408
1409         device_link_add(dev, smmu->dev,
1410                         DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1411
1412         return 0;
1413
1414 out_cfg_free:
1415         kfree(cfg);
1416 out_free:
1417         iommu_fwspec_free(dev);
1418         return ret;
1419 }
1420
1421 static void arm_smmu_remove_device(struct device *dev)
1422 {
1423         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1424         struct arm_smmu_master_cfg *cfg;
1425         struct arm_smmu_device *smmu;
1426         int ret;
1427
1428         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1429                 return;
1430
1431         cfg  = fwspec->iommu_priv;
1432         smmu = cfg->smmu;
1433
1434         ret = arm_smmu_rpm_get(smmu);
1435         if (ret < 0)
1436                 return;
1437
1438         iommu_device_unlink(&smmu->iommu, dev);
1439         arm_smmu_master_free_smes(fwspec);
1440
1441         arm_smmu_rpm_put(smmu);
1442
1443         iommu_group_remove_device(dev);
1444         kfree(fwspec->iommu_priv);
1445         iommu_fwspec_free(dev);
1446 }
1447
1448 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1449 {
1450         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1451         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1452         struct iommu_group *group = NULL;
1453         int i, idx;
1454
1455         for_each_cfg_sme(fwspec, i, idx) {
1456                 if (group && smmu->s2crs[idx].group &&
1457                     group != smmu->s2crs[idx].group)
1458                         return ERR_PTR(-EINVAL);
1459
1460                 group = smmu->s2crs[idx].group;
1461         }
1462
1463         if (group)
1464                 return iommu_group_ref_get(group);
1465
1466         if (dev_is_pci(dev))
1467                 group = pci_device_group(dev);
1468         else if (dev_is_fsl_mc(dev))
1469                 group = fsl_mc_device_group(dev);
1470         else
1471                 group = generic_device_group(dev);
1472
1473         return group;
1474 }
1475
1476 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1477                                     enum iommu_attr attr, void *data)
1478 {
1479         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1480
1481         switch(domain->type) {
1482         case IOMMU_DOMAIN_UNMANAGED:
1483                 switch (attr) {
1484                 case DOMAIN_ATTR_NESTING:
1485                         *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1486                         return 0;
1487                 default:
1488                         return -ENODEV;
1489                 }
1490                 break;
1491         case IOMMU_DOMAIN_DMA:
1492                 switch (attr) {
1493                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1494                         *(int *)data = smmu_domain->non_strict;
1495                         return 0;
1496                 default:
1497                         return -ENODEV;
1498                 }
1499                 break;
1500         default:
1501                 return -EINVAL;
1502         }
1503 }
1504
1505 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1506                                     enum iommu_attr attr, void *data)
1507 {
1508         int ret = 0;
1509         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1510
1511         mutex_lock(&smmu_domain->init_mutex);
1512
1513         switch(domain->type) {
1514         case IOMMU_DOMAIN_UNMANAGED:
1515                 switch (attr) {
1516                 case DOMAIN_ATTR_NESTING:
1517                         if (smmu_domain->smmu) {
1518                                 ret = -EPERM;
1519                                 goto out_unlock;
1520                         }
1521
1522                         if (*(int *)data)
1523                                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1524                         else
1525                                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1526                         break;
1527                 default:
1528                         ret = -ENODEV;
1529                 }
1530                 break;
1531         case IOMMU_DOMAIN_DMA:
1532                 switch (attr) {
1533                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1534                         smmu_domain->non_strict = *(int *)data;
1535                         break;
1536                 default:
1537                         ret = -ENODEV;
1538                 }
1539                 break;
1540         default:
1541                 ret = -EINVAL;
1542         }
1543 out_unlock:
1544         mutex_unlock(&smmu_domain->init_mutex);
1545         return ret;
1546 }
1547
1548 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1549 {
1550         u32 mask, fwid = 0;
1551
1552         if (args->args_count > 0)
1553                 fwid |= FIELD_PREP(SMR_ID, args->args[0]);
1554
1555         if (args->args_count > 1)
1556                 fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
1557         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1558                 fwid |= FIELD_PREP(SMR_MASK, mask);
1559
1560         return iommu_fwspec_add_ids(dev, &fwid, 1);
1561 }
1562
1563 static void arm_smmu_get_resv_regions(struct device *dev,
1564                                       struct list_head *head)
1565 {
1566         struct iommu_resv_region *region;
1567         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1568
1569         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1570                                          prot, IOMMU_RESV_SW_MSI);
1571         if (!region)
1572                 return;
1573
1574         list_add_tail(&region->list, head);
1575
1576         iommu_dma_get_resv_regions(dev, head);
1577 }
1578
1579 static void arm_smmu_put_resv_regions(struct device *dev,
1580                                       struct list_head *head)
1581 {
1582         struct iommu_resv_region *entry, *next;
1583
1584         list_for_each_entry_safe(entry, next, head, list)
1585                 kfree(entry);
1586 }
1587
1588 static struct iommu_ops arm_smmu_ops = {
1589         .capable                = arm_smmu_capable,
1590         .domain_alloc           = arm_smmu_domain_alloc,
1591         .domain_free            = arm_smmu_domain_free,
1592         .attach_dev             = arm_smmu_attach_dev,
1593         .map                    = arm_smmu_map,
1594         .unmap                  = arm_smmu_unmap,
1595         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
1596         .iotlb_sync             = arm_smmu_iotlb_sync,
1597         .iova_to_phys           = arm_smmu_iova_to_phys,
1598         .add_device             = arm_smmu_add_device,
1599         .remove_device          = arm_smmu_remove_device,
1600         .device_group           = arm_smmu_device_group,
1601         .domain_get_attr        = arm_smmu_domain_get_attr,
1602         .domain_set_attr        = arm_smmu_domain_set_attr,
1603         .of_xlate               = arm_smmu_of_xlate,
1604         .get_resv_regions       = arm_smmu_get_resv_regions,
1605         .put_resv_regions       = arm_smmu_put_resv_regions,
1606         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1607 };
1608
1609 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1610 {
1611         int i;
1612         u32 reg;
1613
1614         /* clear global FSR */
1615         reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1616         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1617
1618         /*
1619          * Reset stream mapping groups: Initial values mark all SMRn as
1620          * invalid and all S2CRn as bypass unless overridden.
1621          */
1622         for (i = 0; i < smmu->num_mapping_groups; ++i)
1623                 arm_smmu_write_sme(smmu, i);
1624
1625         /* Make sure all context banks are disabled and clear CB_FSR  */
1626         for (i = 0; i < smmu->num_context_banks; ++i) {
1627                 arm_smmu_write_context_bank(smmu, i);
1628                 arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT);
1629         }
1630
1631         /* Invalidate the TLB, just in case */
1632         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1633         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1634
1635         reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1636
1637         /* Enable fault reporting */
1638         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1639
1640         /* Disable TLB broadcasting. */
1641         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1642
1643         /* Enable client access, handling unmatched streams as appropriate */
1644         reg &= ~sCR0_CLIENTPD;
1645         if (disable_bypass)
1646                 reg |= sCR0_USFCFG;
1647         else
1648                 reg &= ~sCR0_USFCFG;
1649
1650         /* Disable forced broadcasting */
1651         reg &= ~sCR0_FB;
1652
1653         /* Don't upgrade barriers */
1654         reg &= ~(sCR0_BSU);
1655
1656         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1657                 reg |= sCR0_VMID16EN;
1658
1659         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1660                 reg |= sCR0_EXIDENABLE;
1661
1662         if (smmu->impl && smmu->impl->reset)
1663                 smmu->impl->reset(smmu);
1664
1665         /* Push the button */
1666         arm_smmu_tlb_sync_global(smmu);
1667         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1668 }
1669
1670 static int arm_smmu_id_size_to_bits(int size)
1671 {
1672         switch (size) {
1673         case 0:
1674                 return 32;
1675         case 1:
1676                 return 36;
1677         case 2:
1678                 return 40;
1679         case 3:
1680                 return 42;
1681         case 4:
1682                 return 44;
1683         case 5:
1684         default:
1685                 return 48;
1686         }
1687 }
1688
1689 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1690 {
1691         unsigned int size;
1692         u32 id;
1693         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1694         int i;
1695
1696         dev_notice(smmu->dev, "probing hardware configuration...\n");
1697         dev_notice(smmu->dev, "SMMUv%d with:\n",
1698                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1699
1700         /* ID0 */
1701         id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1702
1703         /* Restrict available stages based on module parameter */
1704         if (force_stage == 1)
1705                 id &= ~(ID0_S2TS | ID0_NTS);
1706         else if (force_stage == 2)
1707                 id &= ~(ID0_S1TS | ID0_NTS);
1708
1709         if (id & ID0_S1TS) {
1710                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1711                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1712         }
1713
1714         if (id & ID0_S2TS) {
1715                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1716                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1717         }
1718
1719         if (id & ID0_NTS) {
1720                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1721                 dev_notice(smmu->dev, "\tnested translation\n");
1722         }
1723
1724         if (!(smmu->features &
1725                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1726                 dev_err(smmu->dev, "\tno translation support!\n");
1727                 return -ENODEV;
1728         }
1729
1730         if ((id & ID0_S1TS) &&
1731                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1732                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1733                 dev_notice(smmu->dev, "\taddress translation ops\n");
1734         }
1735
1736         /*
1737          * In order for DMA API calls to work properly, we must defer to what
1738          * the FW says about coherency, regardless of what the hardware claims.
1739          * Fortunately, this also opens up a workaround for systems where the
1740          * ID register value has ended up configured incorrectly.
1741          */
1742         cttw_reg = !!(id & ID0_CTTW);
1743         if (cttw_fw || cttw_reg)
1744                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1745                            cttw_fw ? "" : "non-");
1746         if (cttw_fw != cttw_reg)
1747                 dev_notice(smmu->dev,
1748                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1749
1750         /* Max. number of entries we have for stream matching/indexing */
1751         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1752                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1753                 size = 1 << 16;
1754         } else {
1755                 size = 1 << FIELD_GET(ID0_NUMSIDB, id);
1756         }
1757         smmu->streamid_mask = size - 1;
1758         if (id & ID0_SMS) {
1759                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1760                 size = FIELD_GET(ID0_NUMSMRG, id);
1761                 if (size == 0) {
1762                         dev_err(smmu->dev,
1763                                 "stream-matching supported, but no SMRs present!\n");
1764                         return -ENODEV;
1765                 }
1766
1767                 /* Zero-initialised to mark as invalid */
1768                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1769                                           GFP_KERNEL);
1770                 if (!smmu->smrs)
1771                         return -ENOMEM;
1772
1773                 dev_notice(smmu->dev,
1774                            "\tstream matching with %u register groups", size);
1775         }
1776         /* s2cr->type == 0 means translation, so initialise explicitly */
1777         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1778                                          GFP_KERNEL);
1779         if (!smmu->s2crs)
1780                 return -ENOMEM;
1781         for (i = 0; i < size; i++)
1782                 smmu->s2crs[i] = s2cr_init_val;
1783
1784         smmu->num_mapping_groups = size;
1785         mutex_init(&smmu->stream_map_mutex);
1786         spin_lock_init(&smmu->global_sync_lock);
1787
1788         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1789                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1790                 if (!(id & ID0_PTFS_NO_AARCH32S))
1791                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1792         }
1793
1794         /* ID1 */
1795         id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1796         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1797
1798         /* Check for size mismatch of SMMU address space from mapped region */
1799         size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
1800         if (smmu->numpage != 2 * size << smmu->pgshift)
1801                 dev_warn(smmu->dev,
1802                         "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1803                         2 * size << smmu->pgshift, smmu->numpage);
1804         /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1805         smmu->numpage = size;
1806
1807         smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
1808         smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
1809         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1810                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1811                 return -ENODEV;
1812         }
1813         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1814                    smmu->num_context_banks, smmu->num_s2_context_banks);
1815         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1816                                  sizeof(*smmu->cbs), GFP_KERNEL);
1817         if (!smmu->cbs)
1818                 return -ENOMEM;
1819
1820         /* ID2 */
1821         id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1822         size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
1823         smmu->ipa_size = size;
1824
1825         /* The output mask is also applied for bypass */
1826         size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
1827         smmu->pa_size = size;
1828
1829         if (id & ID2_VMID16)
1830                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1831
1832         /*
1833          * What the page table walker can address actually depends on which
1834          * descriptor format is in use, but since a) we don't know that yet,
1835          * and b) it can vary per context bank, this will have to do...
1836          */
1837         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1838                 dev_warn(smmu->dev,
1839                          "failed to set DMA mask for table walker\n");
1840
1841         if (smmu->version < ARM_SMMU_V2) {
1842                 smmu->va_size = smmu->ipa_size;
1843                 if (smmu->version == ARM_SMMU_V1_64K)
1844                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1845         } else {
1846                 size = FIELD_GET(ID2_UBS, id);
1847                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1848                 if (id & ID2_PTFS_4K)
1849                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1850                 if (id & ID2_PTFS_16K)
1851                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1852                 if (id & ID2_PTFS_64K)
1853                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1854         }
1855
1856         /* Now we've corralled the various formats, what'll it do? */
1857         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1858                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1859         if (smmu->features &
1860             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1861                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1862         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1863                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1864         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1865                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1866
1867         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1868                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1869         else
1870                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1871         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1872                    smmu->pgsize_bitmap);
1873
1874
1875         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1876                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1877                            smmu->va_size, smmu->ipa_size);
1878
1879         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1880                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1881                            smmu->ipa_size, smmu->pa_size);
1882
1883         if (smmu->impl && smmu->impl->cfg_probe)
1884                 return smmu->impl->cfg_probe(smmu);
1885
1886         return 0;
1887 }
1888
1889 struct arm_smmu_match_data {
1890         enum arm_smmu_arch_version version;
1891         enum arm_smmu_implementation model;
1892 };
1893
1894 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1895 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1896
1897 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1898 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1899 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1900 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1901 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1902 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1903
1904 static const struct of_device_id arm_smmu_of_match[] = {
1905         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1906         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1907         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1908         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1909         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1910         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1911         { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1912         { },
1913 };
1914
1915 #ifdef CONFIG_ACPI
1916 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1917 {
1918         int ret = 0;
1919
1920         switch (model) {
1921         case ACPI_IORT_SMMU_V1:
1922         case ACPI_IORT_SMMU_CORELINK_MMU400:
1923                 smmu->version = ARM_SMMU_V1;
1924                 smmu->model = GENERIC_SMMU;
1925                 break;
1926         case ACPI_IORT_SMMU_CORELINK_MMU401:
1927                 smmu->version = ARM_SMMU_V1_64K;
1928                 smmu->model = GENERIC_SMMU;
1929                 break;
1930         case ACPI_IORT_SMMU_V2:
1931                 smmu->version = ARM_SMMU_V2;
1932                 smmu->model = GENERIC_SMMU;
1933                 break;
1934         case ACPI_IORT_SMMU_CORELINK_MMU500:
1935                 smmu->version = ARM_SMMU_V2;
1936                 smmu->model = ARM_MMU500;
1937                 break;
1938         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1939                 smmu->version = ARM_SMMU_V2;
1940                 smmu->model = CAVIUM_SMMUV2;
1941                 break;
1942         default:
1943                 ret = -ENODEV;
1944         }
1945
1946         return ret;
1947 }
1948
1949 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1950                                       struct arm_smmu_device *smmu)
1951 {
1952         struct device *dev = smmu->dev;
1953         struct acpi_iort_node *node =
1954                 *(struct acpi_iort_node **)dev_get_platdata(dev);
1955         struct acpi_iort_smmu *iort_smmu;
1956         int ret;
1957
1958         /* Retrieve SMMU1/2 specific data */
1959         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1960
1961         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1962         if (ret < 0)
1963                 return ret;
1964
1965         /* Ignore the configuration access interrupt */
1966         smmu->num_global_irqs = 1;
1967
1968         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1969                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1970
1971         return 0;
1972 }
1973 #else
1974 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1975                                              struct arm_smmu_device *smmu)
1976 {
1977         return -ENODEV;
1978 }
1979 #endif
1980
1981 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1982                                     struct arm_smmu_device *smmu)
1983 {
1984         const struct arm_smmu_match_data *data;
1985         struct device *dev = &pdev->dev;
1986         bool legacy_binding;
1987
1988         if (of_property_read_u32(dev->of_node, "#global-interrupts",
1989                                  &smmu->num_global_irqs)) {
1990                 dev_err(dev, "missing #global-interrupts property\n");
1991                 return -ENODEV;
1992         }
1993
1994         data = of_device_get_match_data(dev);
1995         smmu->version = data->version;
1996         smmu->model = data->model;
1997
1998         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
1999         if (legacy_binding && !using_generic_binding) {
2000                 if (!using_legacy_binding)
2001                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2002                 using_legacy_binding = true;
2003         } else if (!legacy_binding && !using_legacy_binding) {
2004                 using_generic_binding = true;
2005         } else {
2006                 dev_err(dev, "not probing due to mismatched DT properties\n");
2007                 return -ENODEV;
2008         }
2009
2010         if (of_dma_is_coherent(dev->of_node))
2011                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2012
2013         return 0;
2014 }
2015
2016 static void arm_smmu_bus_init(void)
2017 {
2018         /* Oh, for a proper bus abstraction */
2019         if (!iommu_present(&platform_bus_type))
2020                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2021 #ifdef CONFIG_ARM_AMBA
2022         if (!iommu_present(&amba_bustype))
2023                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2024 #endif
2025 #ifdef CONFIG_PCI
2026         if (!iommu_present(&pci_bus_type)) {
2027                 pci_request_acs();
2028                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2029         }
2030 #endif
2031 #ifdef CONFIG_FSL_MC_BUS
2032         if (!iommu_present(&fsl_mc_bus_type))
2033                 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2034 #endif
2035 }
2036
2037 static int arm_smmu_device_probe(struct platform_device *pdev)
2038 {
2039         struct resource *res;
2040         resource_size_t ioaddr;
2041         struct arm_smmu_device *smmu;
2042         struct device *dev = &pdev->dev;
2043         int num_irqs, i, err;
2044
2045         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2046         if (!smmu) {
2047                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2048                 return -ENOMEM;
2049         }
2050         smmu->dev = dev;
2051
2052         if (dev->of_node)
2053                 err = arm_smmu_device_dt_probe(pdev, smmu);
2054         else
2055                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2056
2057         if (err)
2058                 return err;
2059
2060         smmu = arm_smmu_impl_init(smmu);
2061         if (IS_ERR(smmu))
2062                 return PTR_ERR(smmu);
2063
2064         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2065         ioaddr = res->start;
2066         smmu->base = devm_ioremap_resource(dev, res);
2067         if (IS_ERR(smmu->base))
2068                 return PTR_ERR(smmu->base);
2069         /*
2070          * The resource size should effectively match the value of SMMU_TOP;
2071          * stash that temporarily until we know PAGESIZE to validate it with.
2072          */
2073         smmu->numpage = resource_size(res);
2074
2075         num_irqs = 0;
2076         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2077                 num_irqs++;
2078                 if (num_irqs > smmu->num_global_irqs)
2079                         smmu->num_context_irqs++;
2080         }
2081
2082         if (!smmu->num_context_irqs) {
2083                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2084                         num_irqs, smmu->num_global_irqs + 1);
2085                 return -ENODEV;
2086         }
2087
2088         smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2089                                   GFP_KERNEL);
2090         if (!smmu->irqs) {
2091                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2092                 return -ENOMEM;
2093         }
2094
2095         for (i = 0; i < num_irqs; ++i) {
2096                 int irq = platform_get_irq(pdev, i);
2097
2098                 if (irq < 0)
2099                         return -ENODEV;
2100                 smmu->irqs[i] = irq;
2101         }
2102
2103         err = devm_clk_bulk_get_all(dev, &smmu->clks);
2104         if (err < 0) {
2105                 dev_err(dev, "failed to get clocks %d\n", err);
2106                 return err;
2107         }
2108         smmu->num_clks = err;
2109
2110         err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2111         if (err)
2112                 return err;
2113
2114         err = arm_smmu_device_cfg_probe(smmu);
2115         if (err)
2116                 return err;
2117
2118         if (smmu->version == ARM_SMMU_V2) {
2119                 if (smmu->num_context_banks > smmu->num_context_irqs) {
2120                         dev_err(dev,
2121                               "found only %d context irq(s) but %d required\n",
2122                               smmu->num_context_irqs, smmu->num_context_banks);
2123                         return -ENODEV;
2124                 }
2125
2126                 /* Ignore superfluous interrupts */
2127                 smmu->num_context_irqs = smmu->num_context_banks;
2128         }
2129
2130         for (i = 0; i < smmu->num_global_irqs; ++i) {
2131                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2132                                        arm_smmu_global_fault,
2133                                        IRQF_SHARED,
2134                                        "arm-smmu global fault",
2135                                        smmu);
2136                 if (err) {
2137                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2138                                 i, smmu->irqs[i]);
2139                         return err;
2140                 }
2141         }
2142
2143         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2144                                      "smmu.%pa", &ioaddr);
2145         if (err) {
2146                 dev_err(dev, "Failed to register iommu in sysfs\n");
2147                 return err;
2148         }
2149
2150         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2151         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2152
2153         err = iommu_device_register(&smmu->iommu);
2154         if (err) {
2155                 dev_err(dev, "Failed to register iommu\n");
2156                 return err;
2157         }
2158
2159         platform_set_drvdata(pdev, smmu);
2160         arm_smmu_device_reset(smmu);
2161         arm_smmu_test_smr_masks(smmu);
2162
2163         /*
2164          * We want to avoid touching dev->power.lock in fastpaths unless
2165          * it's really going to do something useful - pm_runtime_enabled()
2166          * can serve as an ideal proxy for that decision. So, conditionally
2167          * enable pm_runtime.
2168          */
2169         if (dev->pm_domain) {
2170                 pm_runtime_set_active(dev);
2171                 pm_runtime_enable(dev);
2172         }
2173
2174         /*
2175          * For ACPI and generic DT bindings, an SMMU will be probed before
2176          * any device which might need it, so we want the bus ops in place
2177          * ready to handle default domain setup as soon as any SMMU exists.
2178          */
2179         if (!using_legacy_binding)
2180                 arm_smmu_bus_init();
2181
2182         return 0;
2183 }
2184
2185 /*
2186  * With the legacy DT binding in play, though, we have no guarantees about
2187  * probe order, but then we're also not doing default domains, so we can
2188  * delay setting bus ops until we're sure every possible SMMU is ready,
2189  * and that way ensure that no add_device() calls get missed.
2190  */
2191 static int arm_smmu_legacy_bus_init(void)
2192 {
2193         if (using_legacy_binding)
2194                 arm_smmu_bus_init();
2195         return 0;
2196 }
2197 device_initcall_sync(arm_smmu_legacy_bus_init);
2198
2199 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2200 {
2201         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2202
2203         if (!smmu)
2204                 return;
2205
2206         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2207                 dev_err(&pdev->dev, "removing device with active domains!\n");
2208
2209         arm_smmu_rpm_get(smmu);
2210         /* Turn the thing off */
2211         arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, sCR0_CLIENTPD);
2212         arm_smmu_rpm_put(smmu);
2213
2214         if (pm_runtime_enabled(smmu->dev))
2215                 pm_runtime_force_suspend(smmu->dev);
2216         else
2217                 clk_bulk_disable(smmu->num_clks, smmu->clks);
2218
2219         clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2220 }
2221
2222 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2223 {
2224         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2225         int ret;
2226
2227         ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2228         if (ret)
2229                 return ret;
2230
2231         arm_smmu_device_reset(smmu);
2232
2233         return 0;
2234 }
2235
2236 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2237 {
2238         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2239
2240         clk_bulk_disable(smmu->num_clks, smmu->clks);
2241
2242         return 0;
2243 }
2244
2245 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2246 {
2247         if (pm_runtime_suspended(dev))
2248                 return 0;
2249
2250         return arm_smmu_runtime_resume(dev);
2251 }
2252
2253 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2254 {
2255         if (pm_runtime_suspended(dev))
2256                 return 0;
2257
2258         return arm_smmu_runtime_suspend(dev);
2259 }
2260
2261 static const struct dev_pm_ops arm_smmu_pm_ops = {
2262         SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2263         SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2264                            arm_smmu_runtime_resume, NULL)
2265 };
2266
2267 static struct platform_driver arm_smmu_driver = {
2268         .driver = {
2269                 .name                   = "arm-smmu",
2270                 .of_match_table         = of_match_ptr(arm_smmu_of_match),
2271                 .pm                     = &arm_smmu_pm_ops,
2272                 .suppress_bind_attrs    = true,
2273         },
2274         .probe  = arm_smmu_device_probe,
2275         .shutdown = arm_smmu_device_shutdown,
2276 };
2277 builtin_platform_driver(arm_smmu_driver);