iommu/arm-smmu-v3: Split arm_smmu_tlb_inv_range()
[linux-2.6-microblaze.git] / drivers / iommu / arm / arm-smmu-v3 / arm-smmu-v3.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_iommu.h>
27 #include <linux/of_platform.h>
28 #include <linux/pci.h>
29 #include <linux/pci-ats.h>
30 #include <linux/platform_device.h>
31
32 #include <linux/amba/bus.h>
33
34 #include "arm-smmu-v3.h"
35
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44         "Disable MSI-based polling for CMD_SYNC completion.");
45
46 enum arm_smmu_msi_index {
47         EVTQ_MSI_INDEX,
48         GERROR_MSI_INDEX,
49         PRIQ_MSI_INDEX,
50         ARM_SMMU_MAX_MSIS,
51 };
52
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54         [EVTQ_MSI_INDEX] = {
55                 ARM_SMMU_EVTQ_IRQ_CFG0,
56                 ARM_SMMU_EVTQ_IRQ_CFG1,
57                 ARM_SMMU_EVTQ_IRQ_CFG2,
58         },
59         [GERROR_MSI_INDEX] = {
60                 ARM_SMMU_GERROR_IRQ_CFG0,
61                 ARM_SMMU_GERROR_IRQ_CFG1,
62                 ARM_SMMU_GERROR_IRQ_CFG2,
63         },
64         [PRIQ_MSI_INDEX] = {
65                 ARM_SMMU_PRIQ_IRQ_CFG0,
66                 ARM_SMMU_PRIQ_IRQ_CFG1,
67                 ARM_SMMU_PRIQ_IRQ_CFG2,
68         },
69 };
70
71 struct arm_smmu_option_prop {
72         u32 opt;
73         const char *prop;
74 };
75
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87         { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88         { 0, NULL},
89 };
90
91 static void parse_driver_options(struct arm_smmu_device *smmu)
92 {
93         int i = 0;
94
95         do {
96                 if (of_property_read_bool(smmu->dev->of_node,
97                                                 arm_smmu_options[i].prop)) {
98                         smmu->options |= arm_smmu_options[i].opt;
99                         dev_notice(smmu->dev, "option %s\n",
100                                 arm_smmu_options[i].prop);
101                 }
102         } while (arm_smmu_options[++i].opt);
103 }
104
105 /* Low-level queue manipulation functions */
106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
107 {
108         u32 space, prod, cons;
109
110         prod = Q_IDX(q, q->prod);
111         cons = Q_IDX(q, q->cons);
112
113         if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114                 space = (1 << q->max_n_shift) - (prod - cons);
115         else
116                 space = cons - prod;
117
118         return space >= n;
119 }
120
121 static bool queue_full(struct arm_smmu_ll_queue *q)
122 {
123         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
125 }
126
127 static bool queue_empty(struct arm_smmu_ll_queue *q)
128 {
129         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
131 }
132
133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
134 {
135         return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136                 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137                ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138                 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
139 }
140
141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
142 {
143         /*
144          * Ensure that all CPU accesses (reads and writes) to the queue
145          * are complete before we update the cons pointer.
146          */
147         __iomb();
148         writel_relaxed(q->llq.cons, q->cons_reg);
149 }
150
151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
152 {
153         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154         q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
155 }
156
157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
158 {
159         u32 prod;
160         int ret = 0;
161
162         /*
163          * We can't use the _relaxed() variant here, as we must prevent
164          * speculative reads of the queue before we have determined that
165          * prod has indeed moved.
166          */
167         prod = readl(q->prod_reg);
168
169         if (Q_OVF(prod) != Q_OVF(q->llq.prod))
170                 ret = -EOVERFLOW;
171
172         q->llq.prod = prod;
173         return ret;
174 }
175
176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
177 {
178         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
179         return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
180 }
181
182 static void queue_poll_init(struct arm_smmu_device *smmu,
183                             struct arm_smmu_queue_poll *qp)
184 {
185         qp->delay = 1;
186         qp->spin_cnt = 0;
187         qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
188         qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
189 }
190
191 static int queue_poll(struct arm_smmu_queue_poll *qp)
192 {
193         if (ktime_compare(ktime_get(), qp->timeout) > 0)
194                 return -ETIMEDOUT;
195
196         if (qp->wfe) {
197                 wfe();
198         } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
199                 cpu_relax();
200         } else {
201                 udelay(qp->delay);
202                 qp->delay *= 2;
203                 qp->spin_cnt = 0;
204         }
205
206         return 0;
207 }
208
209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
210 {
211         int i;
212
213         for (i = 0; i < n_dwords; ++i)
214                 *dst++ = cpu_to_le64(*src++);
215 }
216
217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
218 {
219         int i;
220
221         for (i = 0; i < n_dwords; ++i)
222                 *dst++ = le64_to_cpu(*src++);
223 }
224
225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
226 {
227         if (queue_empty(&q->llq))
228                 return -EAGAIN;
229
230         queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
231         queue_inc_cons(&q->llq);
232         queue_sync_cons_out(q);
233         return 0;
234 }
235
236 /* High-level queue accessors */
237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
238 {
239         memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
240         cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
241
242         switch (ent->opcode) {
243         case CMDQ_OP_TLBI_EL2_ALL:
244         case CMDQ_OP_TLBI_NSNH_ALL:
245                 break;
246         case CMDQ_OP_PREFETCH_CFG:
247                 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
248                 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
249                 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
250                 break;
251         case CMDQ_OP_CFGI_CD:
252                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
253                 fallthrough;
254         case CMDQ_OP_CFGI_STE:
255                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
256                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
257                 break;
258         case CMDQ_OP_CFGI_CD_ALL:
259                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
260                 break;
261         case CMDQ_OP_CFGI_ALL:
262                 /* Cover the entire SID range */
263                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
264                 break;
265         case CMDQ_OP_TLBI_NH_VA:
266                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
267                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
268                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
269                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
270                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
271                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
272                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
273                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
274                 break;
275         case CMDQ_OP_TLBI_S2_IPA:
276                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
279                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
283                 break;
284         case CMDQ_OP_TLBI_NH_ASID:
285                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
286                 fallthrough;
287         case CMDQ_OP_TLBI_S12_VMALL:
288                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289                 break;
290         case CMDQ_OP_ATC_INV:
291                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
292                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
293                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
294                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
295                 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
296                 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
297                 break;
298         case CMDQ_OP_PRI_RESP:
299                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
300                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
301                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
302                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
303                 switch (ent->pri.resp) {
304                 case PRI_RESP_DENY:
305                 case PRI_RESP_FAIL:
306                 case PRI_RESP_SUCC:
307                         break;
308                 default:
309                         return -EINVAL;
310                 }
311                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
312                 break;
313         case CMDQ_OP_CMD_SYNC:
314                 if (ent->sync.msiaddr) {
315                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
316                         cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
317                 } else {
318                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
319                 }
320                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
321                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
322                 break;
323         default:
324                 return -ENOENT;
325         }
326
327         return 0;
328 }
329
330 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
331                                          u32 prod)
332 {
333         struct arm_smmu_queue *q = &smmu->cmdq.q;
334         struct arm_smmu_cmdq_ent ent = {
335                 .opcode = CMDQ_OP_CMD_SYNC,
336         };
337
338         /*
339          * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
340          * payload, so the write will zero the entire command on that platform.
341          */
342         if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
343                 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
344                                    q->ent_dwords * 8;
345         }
346
347         arm_smmu_cmdq_build_cmd(cmd, &ent);
348 }
349
350 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
351 {
352         static const char *cerror_str[] = {
353                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
354                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
355                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
356                 [CMDQ_ERR_CERROR_ATC_INV_IDX]   = "ATC invalidate timeout",
357         };
358
359         int i;
360         u64 cmd[CMDQ_ENT_DWORDS];
361         struct arm_smmu_queue *q = &smmu->cmdq.q;
362         u32 cons = readl_relaxed(q->cons_reg);
363         u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
364         struct arm_smmu_cmdq_ent cmd_sync = {
365                 .opcode = CMDQ_OP_CMD_SYNC,
366         };
367
368         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
369                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
370
371         switch (idx) {
372         case CMDQ_ERR_CERROR_ABT_IDX:
373                 dev_err(smmu->dev, "retrying command fetch\n");
374         case CMDQ_ERR_CERROR_NONE_IDX:
375                 return;
376         case CMDQ_ERR_CERROR_ATC_INV_IDX:
377                 /*
378                  * ATC Invalidation Completion timeout. CONS is still pointing
379                  * at the CMD_SYNC. Attempt to complete other pending commands
380                  * by repeating the CMD_SYNC, though we might well end up back
381                  * here since the ATC invalidation may still be pending.
382                  */
383                 return;
384         case CMDQ_ERR_CERROR_ILL_IDX:
385         default:
386                 break;
387         }
388
389         /*
390          * We may have concurrent producers, so we need to be careful
391          * not to touch any of the shadow cmdq state.
392          */
393         queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
394         dev_err(smmu->dev, "skipping command in error state:\n");
395         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
396                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
397
398         /* Convert the erroneous command into a CMD_SYNC */
399         if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
400                 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
401                 return;
402         }
403
404         queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
405 }
406
407 /*
408  * Command queue locking.
409  * This is a form of bastardised rwlock with the following major changes:
410  *
411  * - The only LOCK routines are exclusive_trylock() and shared_lock().
412  *   Neither have barrier semantics, and instead provide only a control
413  *   dependency.
414  *
415  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
416  *   fails if the caller appears to be the last lock holder (yes, this is
417  *   racy). All successful UNLOCK routines have RELEASE semantics.
418  */
419 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
420 {
421         int val;
422
423         /*
424          * We can try to avoid the cmpxchg() loop by simply incrementing the
425          * lock counter. When held in exclusive state, the lock counter is set
426          * to INT_MIN so these increments won't hurt as the value will remain
427          * negative.
428          */
429         if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
430                 return;
431
432         do {
433                 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
434         } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
435 }
436
437 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
438 {
439         (void)atomic_dec_return_release(&cmdq->lock);
440 }
441
442 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
443 {
444         if (atomic_read(&cmdq->lock) == 1)
445                 return false;
446
447         arm_smmu_cmdq_shared_unlock(cmdq);
448         return true;
449 }
450
451 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)            \
452 ({                                                                      \
453         bool __ret;                                                     \
454         local_irq_save(flags);                                          \
455         __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);       \
456         if (!__ret)                                                     \
457                 local_irq_restore(flags);                               \
458         __ret;                                                          \
459 })
460
461 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)          \
462 ({                                                                      \
463         atomic_set_release(&cmdq->lock, 0);                             \
464         local_irq_restore(flags);                                       \
465 })
466
467
468 /*
469  * Command queue insertion.
470  * This is made fiddly by our attempts to achieve some sort of scalability
471  * since there is one queue shared amongst all of the CPUs in the system.  If
472  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
473  * then you'll *love* this monstrosity.
474  *
475  * The basic idea is to split the queue up into ranges of commands that are
476  * owned by a given CPU; the owner may not have written all of the commands
477  * itself, but is responsible for advancing the hardware prod pointer when
478  * the time comes. The algorithm is roughly:
479  *
480  *      1. Allocate some space in the queue. At this point we also discover
481  *         whether the head of the queue is currently owned by another CPU,
482  *         or whether we are the owner.
483  *
484  *      2. Write our commands into our allocated slots in the queue.
485  *
486  *      3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
487  *
488  *      4. If we are an owner:
489  *              a. Wait for the previous owner to finish.
490  *              b. Mark the queue head as unowned, which tells us the range
491  *                 that we are responsible for publishing.
492  *              c. Wait for all commands in our owned range to become valid.
493  *              d. Advance the hardware prod pointer.
494  *              e. Tell the next owner we've finished.
495  *
496  *      5. If we are inserting a CMD_SYNC (we may or may not have been an
497  *         owner), then we need to stick around until it has completed:
498  *              a. If we have MSIs, the SMMU can write back into the CMD_SYNC
499  *                 to clear the first 4 bytes.
500  *              b. Otherwise, we spin waiting for the hardware cons pointer to
501  *                 advance past our command.
502  *
503  * The devil is in the details, particularly the use of locking for handling
504  * SYNC completion and freeing up space in the queue before we think that it is
505  * full.
506  */
507 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
508                                                u32 sprod, u32 eprod, bool set)
509 {
510         u32 swidx, sbidx, ewidx, ebidx;
511         struct arm_smmu_ll_queue llq = {
512                 .max_n_shift    = cmdq->q.llq.max_n_shift,
513                 .prod           = sprod,
514         };
515
516         ewidx = BIT_WORD(Q_IDX(&llq, eprod));
517         ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
518
519         while (llq.prod != eprod) {
520                 unsigned long mask;
521                 atomic_long_t *ptr;
522                 u32 limit = BITS_PER_LONG;
523
524                 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
525                 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
526
527                 ptr = &cmdq->valid_map[swidx];
528
529                 if ((swidx == ewidx) && (sbidx < ebidx))
530                         limit = ebidx;
531
532                 mask = GENMASK(limit - 1, sbidx);
533
534                 /*
535                  * The valid bit is the inverse of the wrap bit. This means
536                  * that a zero-initialised queue is invalid and, after marking
537                  * all entries as valid, they become invalid again when we
538                  * wrap.
539                  */
540                 if (set) {
541                         atomic_long_xor(mask, ptr);
542                 } else { /* Poll */
543                         unsigned long valid;
544
545                         valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
546                         atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
547                 }
548
549                 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
550         }
551 }
552
553 /* Mark all entries in the range [sprod, eprod) as valid */
554 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
555                                         u32 sprod, u32 eprod)
556 {
557         __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
558 }
559
560 /* Wait for all entries in the range [sprod, eprod) to become valid */
561 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
562                                          u32 sprod, u32 eprod)
563 {
564         __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
565 }
566
567 /* Wait for the command queue to become non-full */
568 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
569                                              struct arm_smmu_ll_queue *llq)
570 {
571         unsigned long flags;
572         struct arm_smmu_queue_poll qp;
573         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
574         int ret = 0;
575
576         /*
577          * Try to update our copy of cons by grabbing exclusive cmdq access. If
578          * that fails, spin until somebody else updates it for us.
579          */
580         if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
581                 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
582                 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
583                 llq->val = READ_ONCE(cmdq->q.llq.val);
584                 return 0;
585         }
586
587         queue_poll_init(smmu, &qp);
588         do {
589                 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
590                 if (!queue_full(llq))
591                         break;
592
593                 ret = queue_poll(&qp);
594         } while (!ret);
595
596         return ret;
597 }
598
599 /*
600  * Wait until the SMMU signals a CMD_SYNC completion MSI.
601  * Must be called with the cmdq lock held in some capacity.
602  */
603 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
604                                           struct arm_smmu_ll_queue *llq)
605 {
606         int ret = 0;
607         struct arm_smmu_queue_poll qp;
608         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
609         u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
610
611         queue_poll_init(smmu, &qp);
612
613         /*
614          * The MSI won't generate an event, since it's being written back
615          * into the command queue.
616          */
617         qp.wfe = false;
618         smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
619         llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
620         return ret;
621 }
622
623 /*
624  * Wait until the SMMU cons index passes llq->prod.
625  * Must be called with the cmdq lock held in some capacity.
626  */
627 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
628                                                struct arm_smmu_ll_queue *llq)
629 {
630         struct arm_smmu_queue_poll qp;
631         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
632         u32 prod = llq->prod;
633         int ret = 0;
634
635         queue_poll_init(smmu, &qp);
636         llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
637         do {
638                 if (queue_consumed(llq, prod))
639                         break;
640
641                 ret = queue_poll(&qp);
642
643                 /*
644                  * This needs to be a readl() so that our subsequent call
645                  * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
646                  *
647                  * Specifically, we need to ensure that we observe all
648                  * shared_lock()s by other CMD_SYNCs that share our owner,
649                  * so that a failing call to tryunlock() means that we're
650                  * the last one out and therefore we can safely advance
651                  * cmdq->q.llq.cons. Roughly speaking:
652                  *
653                  * CPU 0                CPU1                    CPU2 (us)
654                  *
655                  * if (sync)
656                  *      shared_lock();
657                  *
658                  * dma_wmb();
659                  * set_valid_map();
660                  *
661                  *                      if (owner) {
662                  *                              poll_valid_map();
663                  *                              <control dependency>
664                  *                              writel(prod_reg);
665                  *
666                  *                                              readl(cons_reg);
667                  *                                              tryunlock();
668                  *
669                  * Requires us to see CPU 0's shared_lock() acquisition.
670                  */
671                 llq->cons = readl(cmdq->q.cons_reg);
672         } while (!ret);
673
674         return ret;
675 }
676
677 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
678                                          struct arm_smmu_ll_queue *llq)
679 {
680         if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
681                 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
682
683         return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
684 }
685
686 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
687                                         u32 prod, int n)
688 {
689         int i;
690         struct arm_smmu_ll_queue llq = {
691                 .max_n_shift    = cmdq->q.llq.max_n_shift,
692                 .prod           = prod,
693         };
694
695         for (i = 0; i < n; ++i) {
696                 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
697
698                 prod = queue_inc_prod_n(&llq, i);
699                 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
700         }
701 }
702
703 /*
704  * This is the actual insertion function, and provides the following
705  * ordering guarantees to callers:
706  *
707  * - There is a dma_wmb() before publishing any commands to the queue.
708  *   This can be relied upon to order prior writes to data structures
709  *   in memory (such as a CD or an STE) before the command.
710  *
711  * - On completion of a CMD_SYNC, there is a control dependency.
712  *   This can be relied upon to order subsequent writes to memory (e.g.
713  *   freeing an IOVA) after completion of the CMD_SYNC.
714  *
715  * - Command insertion is totally ordered, so if two CPUs each race to
716  *   insert their own list of commands then all of the commands from one
717  *   CPU will appear before any of the commands from the other CPU.
718  */
719 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
720                                        u64 *cmds, int n, bool sync)
721 {
722         u64 cmd_sync[CMDQ_ENT_DWORDS];
723         u32 prod;
724         unsigned long flags;
725         bool owner;
726         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
727         struct arm_smmu_ll_queue llq = {
728                 .max_n_shift = cmdq->q.llq.max_n_shift,
729         }, head = llq;
730         int ret = 0;
731
732         /* 1. Allocate some space in the queue */
733         local_irq_save(flags);
734         llq.val = READ_ONCE(cmdq->q.llq.val);
735         do {
736                 u64 old;
737
738                 while (!queue_has_space(&llq, n + sync)) {
739                         local_irq_restore(flags);
740                         if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
741                                 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
742                         local_irq_save(flags);
743                 }
744
745                 head.cons = llq.cons;
746                 head.prod = queue_inc_prod_n(&llq, n + sync) |
747                                              CMDQ_PROD_OWNED_FLAG;
748
749                 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
750                 if (old == llq.val)
751                         break;
752
753                 llq.val = old;
754         } while (1);
755         owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
756         head.prod &= ~CMDQ_PROD_OWNED_FLAG;
757         llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
758
759         /*
760          * 2. Write our commands into the queue
761          * Dependency ordering from the cmpxchg() loop above.
762          */
763         arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
764         if (sync) {
765                 prod = queue_inc_prod_n(&llq, n);
766                 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
767                 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
768
769                 /*
770                  * In order to determine completion of our CMD_SYNC, we must
771                  * ensure that the queue can't wrap twice without us noticing.
772                  * We achieve that by taking the cmdq lock as shared before
773                  * marking our slot as valid.
774                  */
775                 arm_smmu_cmdq_shared_lock(cmdq);
776         }
777
778         /* 3. Mark our slots as valid, ensuring commands are visible first */
779         dma_wmb();
780         arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
781
782         /* 4. If we are the owner, take control of the SMMU hardware */
783         if (owner) {
784                 /* a. Wait for previous owner to finish */
785                 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
786
787                 /* b. Stop gathering work by clearing the owned flag */
788                 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
789                                                    &cmdq->q.llq.atomic.prod);
790                 prod &= ~CMDQ_PROD_OWNED_FLAG;
791
792                 /*
793                  * c. Wait for any gathered work to be written to the queue.
794                  * Note that we read our own entries so that we have the control
795                  * dependency required by (d).
796                  */
797                 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
798
799                 /*
800                  * d. Advance the hardware prod pointer
801                  * Control dependency ordering from the entries becoming valid.
802                  */
803                 writel_relaxed(prod, cmdq->q.prod_reg);
804
805                 /*
806                  * e. Tell the next owner we're done
807                  * Make sure we've updated the hardware first, so that we don't
808                  * race to update prod and potentially move it backwards.
809                  */
810                 atomic_set_release(&cmdq->owner_prod, prod);
811         }
812
813         /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
814         if (sync) {
815                 llq.prod = queue_inc_prod_n(&llq, n);
816                 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
817                 if (ret) {
818                         dev_err_ratelimited(smmu->dev,
819                                             "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
820                                             llq.prod,
821                                             readl_relaxed(cmdq->q.prod_reg),
822                                             readl_relaxed(cmdq->q.cons_reg));
823                 }
824
825                 /*
826                  * Try to unlock the cmdq lock. This will fail if we're the last
827                  * reader, in which case we can safely update cmdq->q.llq.cons
828                  */
829                 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
830                         WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
831                         arm_smmu_cmdq_shared_unlock(cmdq);
832                 }
833         }
834
835         local_irq_restore(flags);
836         return ret;
837 }
838
839 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
840                                    struct arm_smmu_cmdq_ent *ent)
841 {
842         u64 cmd[CMDQ_ENT_DWORDS];
843
844         if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
845                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
846                          ent->opcode);
847                 return -EINVAL;
848         }
849
850         return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
851 }
852
853 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
854 {
855         return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
856 }
857
858 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
859                                     struct arm_smmu_cmdq_batch *cmds,
860                                     struct arm_smmu_cmdq_ent *cmd)
861 {
862         if (cmds->num == CMDQ_BATCH_ENTRIES) {
863                 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
864                 cmds->num = 0;
865         }
866         arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
867         cmds->num++;
868 }
869
870 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
871                                       struct arm_smmu_cmdq_batch *cmds)
872 {
873         return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
874 }
875
876 /* Context descriptor manipulation functions */
877 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
878 {
879         struct arm_smmu_cmdq_ent cmd = {
880                 .opcode = CMDQ_OP_TLBI_NH_ASID,
881                 .tlbi.asid = asid,
882         };
883
884         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
885         arm_smmu_cmdq_issue_sync(smmu);
886 }
887
888 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
889                              int ssid, bool leaf)
890 {
891         size_t i;
892         unsigned long flags;
893         struct arm_smmu_master *master;
894         struct arm_smmu_cmdq_batch cmds = {};
895         struct arm_smmu_device *smmu = smmu_domain->smmu;
896         struct arm_smmu_cmdq_ent cmd = {
897                 .opcode = CMDQ_OP_CFGI_CD,
898                 .cfgi   = {
899                         .ssid   = ssid,
900                         .leaf   = leaf,
901                 },
902         };
903
904         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
905         list_for_each_entry(master, &smmu_domain->devices, domain_head) {
906                 for (i = 0; i < master->num_sids; i++) {
907                         cmd.cfgi.sid = master->sids[i];
908                         arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
909                 }
910         }
911         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
912
913         arm_smmu_cmdq_batch_submit(smmu, &cmds);
914 }
915
916 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
917                                         struct arm_smmu_l1_ctx_desc *l1_desc)
918 {
919         size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
920
921         l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
922                                              &l1_desc->l2ptr_dma, GFP_KERNEL);
923         if (!l1_desc->l2ptr) {
924                 dev_warn(smmu->dev,
925                          "failed to allocate context descriptor table\n");
926                 return -ENOMEM;
927         }
928         return 0;
929 }
930
931 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
932                                       struct arm_smmu_l1_ctx_desc *l1_desc)
933 {
934         u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
935                   CTXDESC_L1_DESC_V;
936
937         /* See comment in arm_smmu_write_ctx_desc() */
938         WRITE_ONCE(*dst, cpu_to_le64(val));
939 }
940
941 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
942                                    u32 ssid)
943 {
944         __le64 *l1ptr;
945         unsigned int idx;
946         struct arm_smmu_l1_ctx_desc *l1_desc;
947         struct arm_smmu_device *smmu = smmu_domain->smmu;
948         struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
949
950         if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
951                 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
952
953         idx = ssid >> CTXDESC_SPLIT;
954         l1_desc = &cdcfg->l1_desc[idx];
955         if (!l1_desc->l2ptr) {
956                 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
957                         return NULL;
958
959                 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
960                 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
961                 /* An invalid L1CD can be cached */
962                 arm_smmu_sync_cd(smmu_domain, ssid, false);
963         }
964         idx = ssid & (CTXDESC_L2_ENTRIES - 1);
965         return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
966 }
967
968 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
969                             struct arm_smmu_ctx_desc *cd)
970 {
971         /*
972          * This function handles the following cases:
973          *
974          * (1) Install primary CD, for normal DMA traffic (SSID = 0).
975          * (2) Install a secondary CD, for SID+SSID traffic.
976          * (3) Update ASID of a CD. Atomically write the first 64 bits of the
977          *     CD, then invalidate the old entry and mappings.
978          * (4) Quiesce the context without clearing the valid bit. Disable
979          *     translation, and ignore any translation fault.
980          * (5) Remove a secondary CD.
981          */
982         u64 val;
983         bool cd_live;
984         __le64 *cdptr;
985         struct arm_smmu_device *smmu = smmu_domain->smmu;
986
987         if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
988                 return -E2BIG;
989
990         cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
991         if (!cdptr)
992                 return -ENOMEM;
993
994         val = le64_to_cpu(cdptr[0]);
995         cd_live = !!(val & CTXDESC_CD_0_V);
996
997         if (!cd) { /* (5) */
998                 val = 0;
999         } else if (cd == &quiet_cd) { /* (4) */
1000                 val |= CTXDESC_CD_0_TCR_EPD0;
1001         } else if (cd_live) { /* (3) */
1002                 val &= ~CTXDESC_CD_0_ASID;
1003                 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1004                 /*
1005                  * Until CD+TLB invalidation, both ASIDs may be used for tagging
1006                  * this substream's traffic
1007                  */
1008         } else { /* (1) and (2) */
1009                 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1010                 cdptr[2] = 0;
1011                 cdptr[3] = cpu_to_le64(cd->mair);
1012
1013                 /*
1014                  * STE is live, and the SMMU might read dwords of this CD in any
1015                  * order. Ensure that it observes valid values before reading
1016                  * V=1.
1017                  */
1018                 arm_smmu_sync_cd(smmu_domain, ssid, true);
1019
1020                 val = cd->tcr |
1021 #ifdef __BIG_ENDIAN
1022                         CTXDESC_CD_0_ENDI |
1023 #endif
1024                         CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1025                         (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1026                         CTXDESC_CD_0_AA64 |
1027                         FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1028                         CTXDESC_CD_0_V;
1029
1030                 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1031                 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1032                         val |= CTXDESC_CD_0_S;
1033         }
1034
1035         /*
1036          * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1037          * "Configuration structures and configuration invalidation completion"
1038          *
1039          *   The size of single-copy atomic reads made by the SMMU is
1040          *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1041          *   field within an aligned 64-bit span of a structure can be altered
1042          *   without first making the structure invalid.
1043          */
1044         WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1045         arm_smmu_sync_cd(smmu_domain, ssid, true);
1046         return 0;
1047 }
1048
1049 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1050 {
1051         int ret;
1052         size_t l1size;
1053         size_t max_contexts;
1054         struct arm_smmu_device *smmu = smmu_domain->smmu;
1055         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1056         struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1057
1058         max_contexts = 1 << cfg->s1cdmax;
1059
1060         if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1061             max_contexts <= CTXDESC_L2_ENTRIES) {
1062                 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1063                 cdcfg->num_l1_ents = max_contexts;
1064
1065                 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1066         } else {
1067                 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1068                 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1069                                                   CTXDESC_L2_ENTRIES);
1070
1071                 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1072                                               sizeof(*cdcfg->l1_desc),
1073                                               GFP_KERNEL);
1074                 if (!cdcfg->l1_desc)
1075                         return -ENOMEM;
1076
1077                 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1078         }
1079
1080         cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1081                                            GFP_KERNEL);
1082         if (!cdcfg->cdtab) {
1083                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1084                 ret = -ENOMEM;
1085                 goto err_free_l1;
1086         }
1087
1088         return 0;
1089
1090 err_free_l1:
1091         if (cdcfg->l1_desc) {
1092                 devm_kfree(smmu->dev, cdcfg->l1_desc);
1093                 cdcfg->l1_desc = NULL;
1094         }
1095         return ret;
1096 }
1097
1098 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1099 {
1100         int i;
1101         size_t size, l1size;
1102         struct arm_smmu_device *smmu = smmu_domain->smmu;
1103         struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1104
1105         if (cdcfg->l1_desc) {
1106                 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1107
1108                 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1109                         if (!cdcfg->l1_desc[i].l2ptr)
1110                                 continue;
1111
1112                         dmam_free_coherent(smmu->dev, size,
1113                                            cdcfg->l1_desc[i].l2ptr,
1114                                            cdcfg->l1_desc[i].l2ptr_dma);
1115                 }
1116                 devm_kfree(smmu->dev, cdcfg->l1_desc);
1117                 cdcfg->l1_desc = NULL;
1118
1119                 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1120         } else {
1121                 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1122         }
1123
1124         dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1125         cdcfg->cdtab_dma = 0;
1126         cdcfg->cdtab = NULL;
1127 }
1128
1129 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1130 {
1131         bool free;
1132         struct arm_smmu_ctx_desc *old_cd;
1133
1134         if (!cd->asid)
1135                 return false;
1136
1137         free = refcount_dec_and_test(&cd->refs);
1138         if (free) {
1139                 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1140                 WARN_ON(old_cd != cd);
1141         }
1142         return free;
1143 }
1144
1145 /* Stream table manipulation functions */
1146 static void
1147 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1148 {
1149         u64 val = 0;
1150
1151         val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1152         val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1153
1154         /* See comment in arm_smmu_write_ctx_desc() */
1155         WRITE_ONCE(*dst, cpu_to_le64(val));
1156 }
1157
1158 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1159 {
1160         struct arm_smmu_cmdq_ent cmd = {
1161                 .opcode = CMDQ_OP_CFGI_STE,
1162                 .cfgi   = {
1163                         .sid    = sid,
1164                         .leaf   = true,
1165                 },
1166         };
1167
1168         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1169         arm_smmu_cmdq_issue_sync(smmu);
1170 }
1171
1172 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1173                                       __le64 *dst)
1174 {
1175         /*
1176          * This is hideously complicated, but we only really care about
1177          * three cases at the moment:
1178          *
1179          * 1. Invalid (all zero) -> bypass/fault (init)
1180          * 2. Bypass/fault -> translation/bypass (attach)
1181          * 3. Translation/bypass -> bypass/fault (detach)
1182          *
1183          * Given that we can't update the STE atomically and the SMMU
1184          * doesn't read the thing in a defined order, that leaves us
1185          * with the following maintenance requirements:
1186          *
1187          * 1. Update Config, return (init time STEs aren't live)
1188          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1189          * 3. Update Config, sync
1190          */
1191         u64 val = le64_to_cpu(dst[0]);
1192         bool ste_live = false;
1193         struct arm_smmu_device *smmu = NULL;
1194         struct arm_smmu_s1_cfg *s1_cfg = NULL;
1195         struct arm_smmu_s2_cfg *s2_cfg = NULL;
1196         struct arm_smmu_domain *smmu_domain = NULL;
1197         struct arm_smmu_cmdq_ent prefetch_cmd = {
1198                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1199                 .prefetch       = {
1200                         .sid    = sid,
1201                 },
1202         };
1203
1204         if (master) {
1205                 smmu_domain = master->domain;
1206                 smmu = master->smmu;
1207         }
1208
1209         if (smmu_domain) {
1210                 switch (smmu_domain->stage) {
1211                 case ARM_SMMU_DOMAIN_S1:
1212                         s1_cfg = &smmu_domain->s1_cfg;
1213                         break;
1214                 case ARM_SMMU_DOMAIN_S2:
1215                 case ARM_SMMU_DOMAIN_NESTED:
1216                         s2_cfg = &smmu_domain->s2_cfg;
1217                         break;
1218                 default:
1219                         break;
1220                 }
1221         }
1222
1223         if (val & STRTAB_STE_0_V) {
1224                 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1225                 case STRTAB_STE_0_CFG_BYPASS:
1226                         break;
1227                 case STRTAB_STE_0_CFG_S1_TRANS:
1228                 case STRTAB_STE_0_CFG_S2_TRANS:
1229                         ste_live = true;
1230                         break;
1231                 case STRTAB_STE_0_CFG_ABORT:
1232                         BUG_ON(!disable_bypass);
1233                         break;
1234                 default:
1235                         BUG(); /* STE corruption */
1236                 }
1237         }
1238
1239         /* Nuke the existing STE_0 value, as we're going to rewrite it */
1240         val = STRTAB_STE_0_V;
1241
1242         /* Bypass/fault */
1243         if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1244                 if (!smmu_domain && disable_bypass)
1245                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1246                 else
1247                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1248
1249                 dst[0] = cpu_to_le64(val);
1250                 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1251                                                 STRTAB_STE_1_SHCFG_INCOMING));
1252                 dst[2] = 0; /* Nuke the VMID */
1253                 /*
1254                  * The SMMU can perform negative caching, so we must sync
1255                  * the STE regardless of whether the old value was live.
1256                  */
1257                 if (smmu)
1258                         arm_smmu_sync_ste_for_sid(smmu, sid);
1259                 return;
1260         }
1261
1262         if (s1_cfg) {
1263                 BUG_ON(ste_live);
1264                 dst[1] = cpu_to_le64(
1265                          FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1266                          FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1267                          FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1268                          FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1269                          FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1270
1271                 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1272                    !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1273                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1274
1275                 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1276                         FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1277                         FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1278                         FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1279         }
1280
1281         if (s2_cfg) {
1282                 BUG_ON(ste_live);
1283                 dst[2] = cpu_to_le64(
1284                          FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1285                          FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1286 #ifdef __BIG_ENDIAN
1287                          STRTAB_STE_2_S2ENDI |
1288 #endif
1289                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1290                          STRTAB_STE_2_S2R);
1291
1292                 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1293
1294                 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1295         }
1296
1297         if (master->ats_enabled)
1298                 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1299                                                  STRTAB_STE_1_EATS_TRANS));
1300
1301         arm_smmu_sync_ste_for_sid(smmu, sid);
1302         /* See comment in arm_smmu_write_ctx_desc() */
1303         WRITE_ONCE(dst[0], cpu_to_le64(val));
1304         arm_smmu_sync_ste_for_sid(smmu, sid);
1305
1306         /* It's likely that we'll want to use the new STE soon */
1307         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1308                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1309 }
1310
1311 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1312 {
1313         unsigned int i;
1314
1315         for (i = 0; i < nent; ++i) {
1316                 arm_smmu_write_strtab_ent(NULL, -1, strtab);
1317                 strtab += STRTAB_STE_DWORDS;
1318         }
1319 }
1320
1321 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1322 {
1323         size_t size;
1324         void *strtab;
1325         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1326         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1327
1328         if (desc->l2ptr)
1329                 return 0;
1330
1331         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1332         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1333
1334         desc->span = STRTAB_SPLIT + 1;
1335         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1336                                           GFP_KERNEL);
1337         if (!desc->l2ptr) {
1338                 dev_err(smmu->dev,
1339                         "failed to allocate l2 stream table for SID %u\n",
1340                         sid);
1341                 return -ENOMEM;
1342         }
1343
1344         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1345         arm_smmu_write_strtab_l1_desc(strtab, desc);
1346         return 0;
1347 }
1348
1349 /* IRQ and event handlers */
1350 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1351 {
1352         int i;
1353         struct arm_smmu_device *smmu = dev;
1354         struct arm_smmu_queue *q = &smmu->evtq.q;
1355         struct arm_smmu_ll_queue *llq = &q->llq;
1356         u64 evt[EVTQ_ENT_DWORDS];
1357
1358         do {
1359                 while (!queue_remove_raw(q, evt)) {
1360                         u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1361
1362                         dev_info(smmu->dev, "event 0x%02x received:\n", id);
1363                         for (i = 0; i < ARRAY_SIZE(evt); ++i)
1364                                 dev_info(smmu->dev, "\t0x%016llx\n",
1365                                          (unsigned long long)evt[i]);
1366
1367                 }
1368
1369                 /*
1370                  * Not much we can do on overflow, so scream and pretend we're
1371                  * trying harder.
1372                  */
1373                 if (queue_sync_prod_in(q) == -EOVERFLOW)
1374                         dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1375         } while (!queue_empty(llq));
1376
1377         /* Sync our overflow flag, as we believe we're up to speed */
1378         llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1379                     Q_IDX(llq, llq->cons);
1380         return IRQ_HANDLED;
1381 }
1382
1383 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1384 {
1385         u32 sid, ssid;
1386         u16 grpid;
1387         bool ssv, last;
1388
1389         sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1390         ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1391         ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1392         last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1393         grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1394
1395         dev_info(smmu->dev, "unexpected PRI request received:\n");
1396         dev_info(smmu->dev,
1397                  "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1398                  sid, ssid, grpid, last ? "L" : "",
1399                  evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1400                  evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1401                  evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1402                  evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1403                  evt[1] & PRIQ_1_ADDR_MASK);
1404
1405         if (last) {
1406                 struct arm_smmu_cmdq_ent cmd = {
1407                         .opcode                 = CMDQ_OP_PRI_RESP,
1408                         .substream_valid        = ssv,
1409                         .pri                    = {
1410                                 .sid    = sid,
1411                                 .ssid   = ssid,
1412                                 .grpid  = grpid,
1413                                 .resp   = PRI_RESP_DENY,
1414                         },
1415                 };
1416
1417                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1418         }
1419 }
1420
1421 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1422 {
1423         struct arm_smmu_device *smmu = dev;
1424         struct arm_smmu_queue *q = &smmu->priq.q;
1425         struct arm_smmu_ll_queue *llq = &q->llq;
1426         u64 evt[PRIQ_ENT_DWORDS];
1427
1428         do {
1429                 while (!queue_remove_raw(q, evt))
1430                         arm_smmu_handle_ppr(smmu, evt);
1431
1432                 if (queue_sync_prod_in(q) == -EOVERFLOW)
1433                         dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1434         } while (!queue_empty(llq));
1435
1436         /* Sync our overflow flag, as we believe we're up to speed */
1437         llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1438                       Q_IDX(llq, llq->cons);
1439         queue_sync_cons_out(q);
1440         return IRQ_HANDLED;
1441 }
1442
1443 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1444
1445 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1446 {
1447         u32 gerror, gerrorn, active;
1448         struct arm_smmu_device *smmu = dev;
1449
1450         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1451         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1452
1453         active = gerror ^ gerrorn;
1454         if (!(active & GERROR_ERR_MASK))
1455                 return IRQ_NONE; /* No errors pending */
1456
1457         dev_warn(smmu->dev,
1458                  "unexpected global error reported (0x%08x), this could be serious\n",
1459                  active);
1460
1461         if (active & GERROR_SFM_ERR) {
1462                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1463                 arm_smmu_device_disable(smmu);
1464         }
1465
1466         if (active & GERROR_MSI_GERROR_ABT_ERR)
1467                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1468
1469         if (active & GERROR_MSI_PRIQ_ABT_ERR)
1470                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1471
1472         if (active & GERROR_MSI_EVTQ_ABT_ERR)
1473                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1474
1475         if (active & GERROR_MSI_CMDQ_ABT_ERR)
1476                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1477
1478         if (active & GERROR_PRIQ_ABT_ERR)
1479                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1480
1481         if (active & GERROR_EVTQ_ABT_ERR)
1482                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1483
1484         if (active & GERROR_CMDQ_ERR)
1485                 arm_smmu_cmdq_skip_err(smmu);
1486
1487         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1488         return IRQ_HANDLED;
1489 }
1490
1491 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1492 {
1493         struct arm_smmu_device *smmu = dev;
1494
1495         arm_smmu_evtq_thread(irq, dev);
1496         if (smmu->features & ARM_SMMU_FEAT_PRI)
1497                 arm_smmu_priq_thread(irq, dev);
1498
1499         return IRQ_HANDLED;
1500 }
1501
1502 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1503 {
1504         arm_smmu_gerror_handler(irq, dev);
1505         return IRQ_WAKE_THREAD;
1506 }
1507
1508 static void
1509 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1510                         struct arm_smmu_cmdq_ent *cmd)
1511 {
1512         size_t log2_span;
1513         size_t span_mask;
1514         /* ATC invalidates are always on 4096-bytes pages */
1515         size_t inval_grain_shift = 12;
1516         unsigned long page_start, page_end;
1517
1518         /*
1519          * ATS and PASID:
1520          *
1521          * If substream_valid is clear, the PCIe TLP is sent without a PASID
1522          * prefix. In that case all ATC entries within the address range are
1523          * invalidated, including those that were requested with a PASID! There
1524          * is no way to invalidate only entries without PASID.
1525          *
1526          * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1527          * traffic), translation requests without PASID create ATC entries
1528          * without PASID, which must be invalidated with substream_valid clear.
1529          * This has the unpleasant side-effect of invalidating all PASID-tagged
1530          * ATC entries within the address range.
1531          */
1532         *cmd = (struct arm_smmu_cmdq_ent) {
1533                 .opcode                 = CMDQ_OP_ATC_INV,
1534                 .substream_valid        = !!ssid,
1535                 .atc.ssid               = ssid,
1536         };
1537
1538         if (!size) {
1539                 cmd->atc.size = ATC_INV_SIZE_ALL;
1540                 return;
1541         }
1542
1543         page_start      = iova >> inval_grain_shift;
1544         page_end        = (iova + size - 1) >> inval_grain_shift;
1545
1546         /*
1547          * In an ATS Invalidate Request, the address must be aligned on the
1548          * range size, which must be a power of two number of page sizes. We
1549          * thus have to choose between grossly over-invalidating the region, or
1550          * splitting the invalidation into multiple commands. For simplicity
1551          * we'll go with the first solution, but should refine it in the future
1552          * if multiple commands are shown to be more efficient.
1553          *
1554          * Find the smallest power of two that covers the range. The most
1555          * significant differing bit between the start and end addresses,
1556          * fls(start ^ end), indicates the required span. For example:
1557          *
1558          * We want to invalidate pages [8; 11]. This is already the ideal range:
1559          *              x = 0b1000 ^ 0b1011 = 0b11
1560          *              span = 1 << fls(x) = 4
1561          *
1562          * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1563          *              x = 0b0111 ^ 0b1010 = 0b1101
1564          *              span = 1 << fls(x) = 16
1565          */
1566         log2_span       = fls_long(page_start ^ page_end);
1567         span_mask       = (1ULL << log2_span) - 1;
1568
1569         page_start      &= ~span_mask;
1570
1571         cmd->atc.addr   = page_start << inval_grain_shift;
1572         cmd->atc.size   = log2_span;
1573 }
1574
1575 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1576 {
1577         int i;
1578         struct arm_smmu_cmdq_ent cmd;
1579
1580         arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1581
1582         for (i = 0; i < master->num_sids; i++) {
1583                 cmd.atc.sid = master->sids[i];
1584                 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1585         }
1586
1587         return arm_smmu_cmdq_issue_sync(master->smmu);
1588 }
1589
1590 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1591                             unsigned long iova, size_t size)
1592 {
1593         int i;
1594         unsigned long flags;
1595         struct arm_smmu_cmdq_ent cmd;
1596         struct arm_smmu_master *master;
1597         struct arm_smmu_cmdq_batch cmds = {};
1598
1599         if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1600                 return 0;
1601
1602         /*
1603          * Ensure that we've completed prior invalidation of the main TLBs
1604          * before we read 'nr_ats_masters' in case of a concurrent call to
1605          * arm_smmu_enable_ats():
1606          *
1607          *      // unmap()                      // arm_smmu_enable_ats()
1608          *      TLBI+SYNC                       atomic_inc(&nr_ats_masters);
1609          *      smp_mb();                       [...]
1610          *      atomic_read(&nr_ats_masters);   pci_enable_ats() // writel()
1611          *
1612          * Ensures that we always see the incremented 'nr_ats_masters' count if
1613          * ATS was enabled at the PCI device before completion of the TLBI.
1614          */
1615         smp_mb();
1616         if (!atomic_read(&smmu_domain->nr_ats_masters))
1617                 return 0;
1618
1619         arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1620
1621         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1622         list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1623                 if (!master->ats_enabled)
1624                         continue;
1625
1626                 for (i = 0; i < master->num_sids; i++) {
1627                         cmd.atc.sid = master->sids[i];
1628                         arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1629                 }
1630         }
1631         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1632
1633         return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1634 }
1635
1636 /* IO_PGTABLE API */
1637 static void arm_smmu_tlb_inv_context(void *cookie)
1638 {
1639         struct arm_smmu_domain *smmu_domain = cookie;
1640         struct arm_smmu_device *smmu = smmu_domain->smmu;
1641         struct arm_smmu_cmdq_ent cmd;
1642
1643         /*
1644          * NOTE: when io-pgtable is in non-strict mode, we may get here with
1645          * PTEs previously cleared by unmaps on the current CPU not yet visible
1646          * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1647          * insertion to guarantee those are observed before the TLBI. Do be
1648          * careful, 007.
1649          */
1650         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1651                 arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1652         } else {
1653                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1654                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1655                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1656                 arm_smmu_cmdq_issue_sync(smmu);
1657         }
1658         arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1659 }
1660
1661 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1662                                      unsigned long iova, size_t size,
1663                                      size_t granule,
1664                                      struct arm_smmu_domain *smmu_domain)
1665 {
1666         struct arm_smmu_device *smmu = smmu_domain->smmu;
1667         unsigned long end = iova + size, num_pages = 0, tg = 0;
1668         size_t inv_range = granule;
1669         struct arm_smmu_cmdq_batch cmds = {};
1670
1671         if (!size)
1672                 return;
1673
1674         if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1675                 /* Get the leaf page size */
1676                 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1677
1678                 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1679                 cmd->tlbi.tg = (tg - 10) / 2;
1680
1681                 /* Determine what level the granule is at */
1682                 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1683
1684                 num_pages = size >> tg;
1685         }
1686
1687         while (iova < end) {
1688                 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1689                         /*
1690                          * On each iteration of the loop, the range is 5 bits
1691                          * worth of the aligned size remaining.
1692                          * The range in pages is:
1693                          *
1694                          * range = (num_pages & (0x1f << __ffs(num_pages)))
1695                          */
1696                         unsigned long scale, num;
1697
1698                         /* Determine the power of 2 multiple number of pages */
1699                         scale = __ffs(num_pages);
1700                         cmd->tlbi.scale = scale;
1701
1702                         /* Determine how many chunks of 2^scale size we have */
1703                         num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1704                         cmd->tlbi.num = num - 1;
1705
1706                         /* range is num * 2^scale * pgsize */
1707                         inv_range = num << (scale + tg);
1708
1709                         /* Clear out the lower order bits for the next iteration */
1710                         num_pages -= num << scale;
1711                 }
1712
1713                 cmd->tlbi.addr = iova;
1714                 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1715                 iova += inv_range;
1716         }
1717         arm_smmu_cmdq_batch_submit(smmu, &cmds);
1718 }
1719
1720 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1721                                           size_t granule, bool leaf,
1722                                           struct arm_smmu_domain *smmu_domain)
1723 {
1724         struct arm_smmu_cmdq_ent cmd = {
1725                 .tlbi = {
1726                         .leaf   = leaf,
1727                 },
1728         };
1729
1730         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1731                 cmd.opcode      = CMDQ_OP_TLBI_NH_VA;
1732                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1733         } else {
1734                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1735                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1736         }
1737         __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1738
1739         /*
1740          * Unfortunately, this can't be leaf-only since we may have
1741          * zapped an entire table.
1742          */
1743         arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1744 }
1745
1746 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1747                                          unsigned long iova, size_t granule,
1748                                          void *cookie)
1749 {
1750         struct arm_smmu_domain *smmu_domain = cookie;
1751         struct iommu_domain *domain = &smmu_domain->domain;
1752
1753         iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1754 }
1755
1756 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1757                                   size_t granule, void *cookie)
1758 {
1759         arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1760 }
1761
1762 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1763         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1764         .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1765         .tlb_add_page   = arm_smmu_tlb_inv_page_nosync,
1766 };
1767
1768 /* IOMMU API */
1769 static bool arm_smmu_capable(enum iommu_cap cap)
1770 {
1771         switch (cap) {
1772         case IOMMU_CAP_CACHE_COHERENCY:
1773                 return true;
1774         case IOMMU_CAP_NOEXEC:
1775                 return true;
1776         default:
1777                 return false;
1778         }
1779 }
1780
1781 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1782 {
1783         struct arm_smmu_domain *smmu_domain;
1784
1785         if (type != IOMMU_DOMAIN_UNMANAGED &&
1786             type != IOMMU_DOMAIN_DMA &&
1787             type != IOMMU_DOMAIN_IDENTITY)
1788                 return NULL;
1789
1790         /*
1791          * Allocate the domain and initialise some of its data structures.
1792          * We can't really do anything meaningful until we've added a
1793          * master.
1794          */
1795         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1796         if (!smmu_domain)
1797                 return NULL;
1798
1799         if (type == IOMMU_DOMAIN_DMA &&
1800             iommu_get_dma_cookie(&smmu_domain->domain)) {
1801                 kfree(smmu_domain);
1802                 return NULL;
1803         }
1804
1805         mutex_init(&smmu_domain->init_mutex);
1806         INIT_LIST_HEAD(&smmu_domain->devices);
1807         spin_lock_init(&smmu_domain->devices_lock);
1808         INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
1809
1810         return &smmu_domain->domain;
1811 }
1812
1813 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1814 {
1815         int idx, size = 1 << span;
1816
1817         do {
1818                 idx = find_first_zero_bit(map, size);
1819                 if (idx == size)
1820                         return -ENOSPC;
1821         } while (test_and_set_bit(idx, map));
1822
1823         return idx;
1824 }
1825
1826 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1827 {
1828         clear_bit(idx, map);
1829 }
1830
1831 static void arm_smmu_domain_free(struct iommu_domain *domain)
1832 {
1833         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1834         struct arm_smmu_device *smmu = smmu_domain->smmu;
1835
1836         iommu_put_dma_cookie(domain);
1837         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1838
1839         /* Free the CD and ASID, if we allocated them */
1840         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1841                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1842
1843                 /* Prevent SVA from touching the CD while we're freeing it */
1844                 mutex_lock(&arm_smmu_asid_lock);
1845                 if (cfg->cdcfg.cdtab)
1846                         arm_smmu_free_cd_tables(smmu_domain);
1847                 arm_smmu_free_asid(&cfg->cd);
1848                 mutex_unlock(&arm_smmu_asid_lock);
1849         } else {
1850                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1851                 if (cfg->vmid)
1852                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1853         }
1854
1855         kfree(smmu_domain);
1856 }
1857
1858 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1859                                        struct arm_smmu_master *master,
1860                                        struct io_pgtable_cfg *pgtbl_cfg)
1861 {
1862         int ret;
1863         u32 asid;
1864         struct arm_smmu_device *smmu = smmu_domain->smmu;
1865         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1866         typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1867
1868         refcount_set(&cfg->cd.refs, 1);
1869
1870         /* Prevent SVA from modifying the ASID until it is written to the CD */
1871         mutex_lock(&arm_smmu_asid_lock);
1872         ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1873                        XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1874         if (ret)
1875                 goto out_unlock;
1876
1877         cfg->s1cdmax = master->ssid_bits;
1878
1879         ret = arm_smmu_alloc_cd_tables(smmu_domain);
1880         if (ret)
1881                 goto out_free_asid;
1882
1883         cfg->cd.asid    = (u16)asid;
1884         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1885         cfg->cd.tcr     = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1886                           FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1887                           FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1888                           FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1889                           FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1890                           FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1891                           CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1892         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair;
1893
1894         /*
1895          * Note that this will end up calling arm_smmu_sync_cd() before
1896          * the master has been added to the devices list for this domain.
1897          * This isn't an issue because the STE hasn't been installed yet.
1898          */
1899         ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1900         if (ret)
1901                 goto out_free_cd_tables;
1902
1903         mutex_unlock(&arm_smmu_asid_lock);
1904         return 0;
1905
1906 out_free_cd_tables:
1907         arm_smmu_free_cd_tables(smmu_domain);
1908 out_free_asid:
1909         arm_smmu_free_asid(&cfg->cd);
1910 out_unlock:
1911         mutex_unlock(&arm_smmu_asid_lock);
1912         return ret;
1913 }
1914
1915 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1916                                        struct arm_smmu_master *master,
1917                                        struct io_pgtable_cfg *pgtbl_cfg)
1918 {
1919         int vmid;
1920         struct arm_smmu_device *smmu = smmu_domain->smmu;
1921         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1922         typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1923
1924         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1925         if (vmid < 0)
1926                 return vmid;
1927
1928         vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1929         cfg->vmid       = (u16)vmid;
1930         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1931         cfg->vtcr       = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1932                           FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1933                           FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1934                           FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1935                           FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1936                           FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1937                           FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1938         return 0;
1939 }
1940
1941 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1942                                     struct arm_smmu_master *master)
1943 {
1944         int ret;
1945         unsigned long ias, oas;
1946         enum io_pgtable_fmt fmt;
1947         struct io_pgtable_cfg pgtbl_cfg;
1948         struct io_pgtable_ops *pgtbl_ops;
1949         int (*finalise_stage_fn)(struct arm_smmu_domain *,
1950                                  struct arm_smmu_master *,
1951                                  struct io_pgtable_cfg *);
1952         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1953         struct arm_smmu_device *smmu = smmu_domain->smmu;
1954
1955         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1956                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1957                 return 0;
1958         }
1959
1960         /* Restrict the stage to what we can actually support */
1961         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1962                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1963         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1964                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1965
1966         switch (smmu_domain->stage) {
1967         case ARM_SMMU_DOMAIN_S1:
1968                 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1969                 ias = min_t(unsigned long, ias, VA_BITS);
1970                 oas = smmu->ias;
1971                 fmt = ARM_64_LPAE_S1;
1972                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1973                 break;
1974         case ARM_SMMU_DOMAIN_NESTED:
1975         case ARM_SMMU_DOMAIN_S2:
1976                 ias = smmu->ias;
1977                 oas = smmu->oas;
1978                 fmt = ARM_64_LPAE_S2;
1979                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1980                 break;
1981         default:
1982                 return -EINVAL;
1983         }
1984
1985         pgtbl_cfg = (struct io_pgtable_cfg) {
1986                 .pgsize_bitmap  = smmu->pgsize_bitmap,
1987                 .ias            = ias,
1988                 .oas            = oas,
1989                 .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENCY,
1990                 .tlb            = &arm_smmu_flush_ops,
1991                 .iommu_dev      = smmu->dev,
1992         };
1993
1994         if (smmu_domain->non_strict)
1995                 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1996
1997         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1998         if (!pgtbl_ops)
1999                 return -ENOMEM;
2000
2001         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2002         domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2003         domain->geometry.force_aperture = true;
2004
2005         ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2006         if (ret < 0) {
2007                 free_io_pgtable_ops(pgtbl_ops);
2008                 return ret;
2009         }
2010
2011         smmu_domain->pgtbl_ops = pgtbl_ops;
2012         return 0;
2013 }
2014
2015 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2016 {
2017         __le64 *step;
2018         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2019
2020         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2021                 struct arm_smmu_strtab_l1_desc *l1_desc;
2022                 int idx;
2023
2024                 /* Two-level walk */
2025                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2026                 l1_desc = &cfg->l1_desc[idx];
2027                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2028                 step = &l1_desc->l2ptr[idx];
2029         } else {
2030                 /* Simple linear lookup */
2031                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2032         }
2033
2034         return step;
2035 }
2036
2037 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2038 {
2039         int i, j;
2040         struct arm_smmu_device *smmu = master->smmu;
2041
2042         for (i = 0; i < master->num_sids; ++i) {
2043                 u32 sid = master->sids[i];
2044                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2045
2046                 /* Bridged PCI devices may end up with duplicated IDs */
2047                 for (j = 0; j < i; j++)
2048                         if (master->sids[j] == sid)
2049                                 break;
2050                 if (j < i)
2051                         continue;
2052
2053                 arm_smmu_write_strtab_ent(master, sid, step);
2054         }
2055 }
2056
2057 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2058 {
2059         struct device *dev = master->dev;
2060         struct arm_smmu_device *smmu = master->smmu;
2061         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2062
2063         if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2064                 return false;
2065
2066         if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2067                 return false;
2068
2069         return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2070 }
2071
2072 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2073 {
2074         size_t stu;
2075         struct pci_dev *pdev;
2076         struct arm_smmu_device *smmu = master->smmu;
2077         struct arm_smmu_domain *smmu_domain = master->domain;
2078
2079         /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2080         if (!master->ats_enabled)
2081                 return;
2082
2083         /* Smallest Translation Unit: log2 of the smallest supported granule */
2084         stu = __ffs(smmu->pgsize_bitmap);
2085         pdev = to_pci_dev(master->dev);
2086
2087         atomic_inc(&smmu_domain->nr_ats_masters);
2088         arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2089         if (pci_enable_ats(pdev, stu))
2090                 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2091 }
2092
2093 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2094 {
2095         struct arm_smmu_domain *smmu_domain = master->domain;
2096
2097         if (!master->ats_enabled)
2098                 return;
2099
2100         pci_disable_ats(to_pci_dev(master->dev));
2101         /*
2102          * Ensure ATS is disabled at the endpoint before we issue the
2103          * ATC invalidation via the SMMU.
2104          */
2105         wmb();
2106         arm_smmu_atc_inv_master(master);
2107         atomic_dec(&smmu_domain->nr_ats_masters);
2108 }
2109
2110 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2111 {
2112         int ret;
2113         int features;
2114         int num_pasids;
2115         struct pci_dev *pdev;
2116
2117         if (!dev_is_pci(master->dev))
2118                 return -ENODEV;
2119
2120         pdev = to_pci_dev(master->dev);
2121
2122         features = pci_pasid_features(pdev);
2123         if (features < 0)
2124                 return features;
2125
2126         num_pasids = pci_max_pasids(pdev);
2127         if (num_pasids <= 0)
2128                 return num_pasids;
2129
2130         ret = pci_enable_pasid(pdev, features);
2131         if (ret) {
2132                 dev_err(&pdev->dev, "Failed to enable PASID\n");
2133                 return ret;
2134         }
2135
2136         master->ssid_bits = min_t(u8, ilog2(num_pasids),
2137                                   master->smmu->ssid_bits);
2138         return 0;
2139 }
2140
2141 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2142 {
2143         struct pci_dev *pdev;
2144
2145         if (!dev_is_pci(master->dev))
2146                 return;
2147
2148         pdev = to_pci_dev(master->dev);
2149
2150         if (!pdev->pasid_enabled)
2151                 return;
2152
2153         master->ssid_bits = 0;
2154         pci_disable_pasid(pdev);
2155 }
2156
2157 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2158 {
2159         unsigned long flags;
2160         struct arm_smmu_domain *smmu_domain = master->domain;
2161
2162         if (!smmu_domain)
2163                 return;
2164
2165         arm_smmu_disable_ats(master);
2166
2167         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2168         list_del(&master->domain_head);
2169         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2170
2171         master->domain = NULL;
2172         master->ats_enabled = false;
2173         arm_smmu_install_ste_for_dev(master);
2174 }
2175
2176 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2177 {
2178         int ret = 0;
2179         unsigned long flags;
2180         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2181         struct arm_smmu_device *smmu;
2182         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2183         struct arm_smmu_master *master;
2184
2185         if (!fwspec)
2186                 return -ENOENT;
2187
2188         master = dev_iommu_priv_get(dev);
2189         smmu = master->smmu;
2190
2191         /*
2192          * Checking that SVA is disabled ensures that this device isn't bound to
2193          * any mm, and can be safely detached from its old domain. Bonds cannot
2194          * be removed concurrently since we're holding the group mutex.
2195          */
2196         if (arm_smmu_master_sva_enabled(master)) {
2197                 dev_err(dev, "cannot attach - SVA enabled\n");
2198                 return -EBUSY;
2199         }
2200
2201         arm_smmu_detach_dev(master);
2202
2203         mutex_lock(&smmu_domain->init_mutex);
2204
2205         if (!smmu_domain->smmu) {
2206                 smmu_domain->smmu = smmu;
2207                 ret = arm_smmu_domain_finalise(domain, master);
2208                 if (ret) {
2209                         smmu_domain->smmu = NULL;
2210                         goto out_unlock;
2211                 }
2212         } else if (smmu_domain->smmu != smmu) {
2213                 dev_err(dev,
2214                         "cannot attach to SMMU %s (upstream of %s)\n",
2215                         dev_name(smmu_domain->smmu->dev),
2216                         dev_name(smmu->dev));
2217                 ret = -ENXIO;
2218                 goto out_unlock;
2219         } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2220                    master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2221                 dev_err(dev,
2222                         "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2223                         smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2224                 ret = -EINVAL;
2225                 goto out_unlock;
2226         }
2227
2228         master->domain = smmu_domain;
2229
2230         if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2231                 master->ats_enabled = arm_smmu_ats_supported(master);
2232
2233         arm_smmu_install_ste_for_dev(master);
2234
2235         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2236         list_add(&master->domain_head, &smmu_domain->devices);
2237         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2238
2239         arm_smmu_enable_ats(master);
2240
2241 out_unlock:
2242         mutex_unlock(&smmu_domain->init_mutex);
2243         return ret;
2244 }
2245
2246 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2247                         phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2248 {
2249         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2250
2251         if (!ops)
2252                 return -ENODEV;
2253
2254         return ops->map(ops, iova, paddr, size, prot, gfp);
2255 }
2256
2257 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2258                              size_t size, struct iommu_iotlb_gather *gather)
2259 {
2260         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2261         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2262
2263         if (!ops)
2264                 return 0;
2265
2266         return ops->unmap(ops, iova, size, gather);
2267 }
2268
2269 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2270 {
2271         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2272
2273         if (smmu_domain->smmu)
2274                 arm_smmu_tlb_inv_context(smmu_domain);
2275 }
2276
2277 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2278                                 struct iommu_iotlb_gather *gather)
2279 {
2280         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2281
2282         arm_smmu_tlb_inv_range_domain(gather->start,
2283                                       gather->end - gather->start,
2284                                       gather->pgsize, true, smmu_domain);
2285 }
2286
2287 static phys_addr_t
2288 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2289 {
2290         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2291
2292         if (domain->type == IOMMU_DOMAIN_IDENTITY)
2293                 return iova;
2294
2295         if (!ops)
2296                 return 0;
2297
2298         return ops->iova_to_phys(ops, iova);
2299 }
2300
2301 static struct platform_driver arm_smmu_driver;
2302
2303 static
2304 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2305 {
2306         struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2307                                                           fwnode);
2308         put_device(dev);
2309         return dev ? dev_get_drvdata(dev) : NULL;
2310 }
2311
2312 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2313 {
2314         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2315
2316         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2317                 limit *= 1UL << STRTAB_SPLIT;
2318
2319         return sid < limit;
2320 }
2321
2322 static struct iommu_ops arm_smmu_ops;
2323
2324 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2325 {
2326         int i, ret;
2327         struct arm_smmu_device *smmu;
2328         struct arm_smmu_master *master;
2329         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2330
2331         if (!fwspec || fwspec->ops != &arm_smmu_ops)
2332                 return ERR_PTR(-ENODEV);
2333
2334         if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2335                 return ERR_PTR(-EBUSY);
2336
2337         smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2338         if (!smmu)
2339                 return ERR_PTR(-ENODEV);
2340
2341         master = kzalloc(sizeof(*master), GFP_KERNEL);
2342         if (!master)
2343                 return ERR_PTR(-ENOMEM);
2344
2345         master->dev = dev;
2346         master->smmu = smmu;
2347         master->sids = fwspec->ids;
2348         master->num_sids = fwspec->num_ids;
2349         INIT_LIST_HEAD(&master->bonds);
2350         dev_iommu_priv_set(dev, master);
2351
2352         /* Check the SIDs are in range of the SMMU and our stream table */
2353         for (i = 0; i < master->num_sids; i++) {
2354                 u32 sid = master->sids[i];
2355
2356                 if (!arm_smmu_sid_in_range(smmu, sid)) {
2357                         ret = -ERANGE;
2358                         goto err_free_master;
2359                 }
2360
2361                 /* Ensure l2 strtab is initialised */
2362                 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2363                         ret = arm_smmu_init_l2_strtab(smmu, sid);
2364                         if (ret)
2365                                 goto err_free_master;
2366                 }
2367         }
2368
2369         master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2370
2371         /*
2372          * Note that PASID must be enabled before, and disabled after ATS:
2373          * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2374          *
2375          *   Behavior is undefined if this bit is Set and the value of the PASID
2376          *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2377          *   are changed.
2378          */
2379         arm_smmu_enable_pasid(master);
2380
2381         if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2382                 master->ssid_bits = min_t(u8, master->ssid_bits,
2383                                           CTXDESC_LINEAR_CDMAX);
2384
2385         return &smmu->iommu;
2386
2387 err_free_master:
2388         kfree(master);
2389         dev_iommu_priv_set(dev, NULL);
2390         return ERR_PTR(ret);
2391 }
2392
2393 static void arm_smmu_release_device(struct device *dev)
2394 {
2395         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2396         struct arm_smmu_master *master;
2397
2398         if (!fwspec || fwspec->ops != &arm_smmu_ops)
2399                 return;
2400
2401         master = dev_iommu_priv_get(dev);
2402         WARN_ON(arm_smmu_master_sva_enabled(master));
2403         arm_smmu_detach_dev(master);
2404         arm_smmu_disable_pasid(master);
2405         kfree(master);
2406         iommu_fwspec_free(dev);
2407 }
2408
2409 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2410 {
2411         struct iommu_group *group;
2412
2413         /*
2414          * We don't support devices sharing stream IDs other than PCI RID
2415          * aliases, since the necessary ID-to-device lookup becomes rather
2416          * impractical given a potential sparse 32-bit stream ID space.
2417          */
2418         if (dev_is_pci(dev))
2419                 group = pci_device_group(dev);
2420         else
2421                 group = generic_device_group(dev);
2422
2423         return group;
2424 }
2425
2426 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2427                                     enum iommu_attr attr, void *data)
2428 {
2429         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2430
2431         switch (domain->type) {
2432         case IOMMU_DOMAIN_UNMANAGED:
2433                 switch (attr) {
2434                 case DOMAIN_ATTR_NESTING:
2435                         *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2436                         return 0;
2437                 default:
2438                         return -ENODEV;
2439                 }
2440                 break;
2441         case IOMMU_DOMAIN_DMA:
2442                 switch (attr) {
2443                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2444                         *(int *)data = smmu_domain->non_strict;
2445                         return 0;
2446                 default:
2447                         return -ENODEV;
2448                 }
2449                 break;
2450         default:
2451                 return -EINVAL;
2452         }
2453 }
2454
2455 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2456                                     enum iommu_attr attr, void *data)
2457 {
2458         int ret = 0;
2459         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2460
2461         mutex_lock(&smmu_domain->init_mutex);
2462
2463         switch (domain->type) {
2464         case IOMMU_DOMAIN_UNMANAGED:
2465                 switch (attr) {
2466                 case DOMAIN_ATTR_NESTING:
2467                         if (smmu_domain->smmu) {
2468                                 ret = -EPERM;
2469                                 goto out_unlock;
2470                         }
2471
2472                         if (*(int *)data)
2473                                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2474                         else
2475                                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2476                         break;
2477                 default:
2478                         ret = -ENODEV;
2479                 }
2480                 break;
2481         case IOMMU_DOMAIN_DMA:
2482                 switch(attr) {
2483                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2484                         smmu_domain->non_strict = *(int *)data;
2485                         break;
2486                 default:
2487                         ret = -ENODEV;
2488                 }
2489                 break;
2490         default:
2491                 ret = -EINVAL;
2492         }
2493
2494 out_unlock:
2495         mutex_unlock(&smmu_domain->init_mutex);
2496         return ret;
2497 }
2498
2499 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2500 {
2501         return iommu_fwspec_add_ids(dev, args->args, 1);
2502 }
2503
2504 static void arm_smmu_get_resv_regions(struct device *dev,
2505                                       struct list_head *head)
2506 {
2507         struct iommu_resv_region *region;
2508         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2509
2510         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2511                                          prot, IOMMU_RESV_SW_MSI);
2512         if (!region)
2513                 return;
2514
2515         list_add_tail(&region->list, head);
2516
2517         iommu_dma_get_resv_regions(dev, head);
2518 }
2519
2520 static bool arm_smmu_dev_has_feature(struct device *dev,
2521                                      enum iommu_dev_features feat)
2522 {
2523         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2524
2525         if (!master)
2526                 return false;
2527
2528         switch (feat) {
2529         case IOMMU_DEV_FEAT_SVA:
2530                 return arm_smmu_master_sva_supported(master);
2531         default:
2532                 return false;
2533         }
2534 }
2535
2536 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2537                                          enum iommu_dev_features feat)
2538 {
2539         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2540
2541         if (!master)
2542                 return false;
2543
2544         switch (feat) {
2545         case IOMMU_DEV_FEAT_SVA:
2546                 return arm_smmu_master_sva_enabled(master);
2547         default:
2548                 return false;
2549         }
2550 }
2551
2552 static int arm_smmu_dev_enable_feature(struct device *dev,
2553                                        enum iommu_dev_features feat)
2554 {
2555         if (!arm_smmu_dev_has_feature(dev, feat))
2556                 return -ENODEV;
2557
2558         if (arm_smmu_dev_feature_enabled(dev, feat))
2559                 return -EBUSY;
2560
2561         switch (feat) {
2562         case IOMMU_DEV_FEAT_SVA:
2563                 return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2564         default:
2565                 return -EINVAL;
2566         }
2567 }
2568
2569 static int arm_smmu_dev_disable_feature(struct device *dev,
2570                                         enum iommu_dev_features feat)
2571 {
2572         if (!arm_smmu_dev_feature_enabled(dev, feat))
2573                 return -EINVAL;
2574
2575         switch (feat) {
2576         case IOMMU_DEV_FEAT_SVA:
2577                 return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2578         default:
2579                 return -EINVAL;
2580         }
2581 }
2582
2583 static struct iommu_ops arm_smmu_ops = {
2584         .capable                = arm_smmu_capable,
2585         .domain_alloc           = arm_smmu_domain_alloc,
2586         .domain_free            = arm_smmu_domain_free,
2587         .attach_dev             = arm_smmu_attach_dev,
2588         .map                    = arm_smmu_map,
2589         .unmap                  = arm_smmu_unmap,
2590         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
2591         .iotlb_sync             = arm_smmu_iotlb_sync,
2592         .iova_to_phys           = arm_smmu_iova_to_phys,
2593         .probe_device           = arm_smmu_probe_device,
2594         .release_device         = arm_smmu_release_device,
2595         .device_group           = arm_smmu_device_group,
2596         .domain_get_attr        = arm_smmu_domain_get_attr,
2597         .domain_set_attr        = arm_smmu_domain_set_attr,
2598         .of_xlate               = arm_smmu_of_xlate,
2599         .get_resv_regions       = arm_smmu_get_resv_regions,
2600         .put_resv_regions       = generic_iommu_put_resv_regions,
2601         .dev_has_feat           = arm_smmu_dev_has_feature,
2602         .dev_feat_enabled       = arm_smmu_dev_feature_enabled,
2603         .dev_enable_feat        = arm_smmu_dev_enable_feature,
2604         .dev_disable_feat       = arm_smmu_dev_disable_feature,
2605         .sva_bind               = arm_smmu_sva_bind,
2606         .sva_unbind             = arm_smmu_sva_unbind,
2607         .sva_get_pasid          = arm_smmu_sva_get_pasid,
2608         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
2609 };
2610
2611 /* Probing and initialisation functions */
2612 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2613                                    struct arm_smmu_queue *q,
2614                                    void __iomem *page,
2615                                    unsigned long prod_off,
2616                                    unsigned long cons_off,
2617                                    size_t dwords, const char *name)
2618 {
2619         size_t qsz;
2620
2621         do {
2622                 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2623                 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2624                                               GFP_KERNEL);
2625                 if (q->base || qsz < PAGE_SIZE)
2626                         break;
2627
2628                 q->llq.max_n_shift--;
2629         } while (1);
2630
2631         if (!q->base) {
2632                 dev_err(smmu->dev,
2633                         "failed to allocate queue (0x%zx bytes) for %s\n",
2634                         qsz, name);
2635                 return -ENOMEM;
2636         }
2637
2638         if (!WARN_ON(q->base_dma & (qsz - 1))) {
2639                 dev_info(smmu->dev, "allocated %u entries for %s\n",
2640                          1 << q->llq.max_n_shift, name);
2641         }
2642
2643         q->prod_reg     = page + prod_off;
2644         q->cons_reg     = page + cons_off;
2645         q->ent_dwords   = dwords;
2646
2647         q->q_base  = Q_BASE_RWA;
2648         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2649         q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2650
2651         q->llq.prod = q->llq.cons = 0;
2652         return 0;
2653 }
2654
2655 static void arm_smmu_cmdq_free_bitmap(void *data)
2656 {
2657         unsigned long *bitmap = data;
2658         bitmap_free(bitmap);
2659 }
2660
2661 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2662 {
2663         int ret = 0;
2664         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2665         unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2666         atomic_long_t *bitmap;
2667
2668         atomic_set(&cmdq->owner_prod, 0);
2669         atomic_set(&cmdq->lock, 0);
2670
2671         bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2672         if (!bitmap) {
2673                 dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2674                 ret = -ENOMEM;
2675         } else {
2676                 cmdq->valid_map = bitmap;
2677                 devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2678         }
2679
2680         return ret;
2681 }
2682
2683 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2684 {
2685         int ret;
2686
2687         /* cmdq */
2688         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2689                                       ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2690                                       CMDQ_ENT_DWORDS, "cmdq");
2691         if (ret)
2692                 return ret;
2693
2694         ret = arm_smmu_cmdq_init(smmu);
2695         if (ret)
2696                 return ret;
2697
2698         /* evtq */
2699         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2700                                       ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2701                                       EVTQ_ENT_DWORDS, "evtq");
2702         if (ret)
2703                 return ret;
2704
2705         /* priq */
2706         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2707                 return 0;
2708
2709         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2710                                        ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2711                                        PRIQ_ENT_DWORDS, "priq");
2712 }
2713
2714 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2715 {
2716         unsigned int i;
2717         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2718         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2719         void *strtab = smmu->strtab_cfg.strtab;
2720
2721         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2722         if (!cfg->l1_desc) {
2723                 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2724                 return -ENOMEM;
2725         }
2726
2727         for (i = 0; i < cfg->num_l1_ents; ++i) {
2728                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2729                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2730         }
2731
2732         return 0;
2733 }
2734
2735 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2736 {
2737         void *strtab;
2738         u64 reg;
2739         u32 size, l1size;
2740         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2741
2742         /* Calculate the L1 size, capped to the SIDSIZE. */
2743         size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2744         size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2745         cfg->num_l1_ents = 1 << size;
2746
2747         size += STRTAB_SPLIT;
2748         if (size < smmu->sid_bits)
2749                 dev_warn(smmu->dev,
2750                          "2-level strtab only covers %u/%u bits of SID\n",
2751                          size, smmu->sid_bits);
2752
2753         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2754         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2755                                      GFP_KERNEL);
2756         if (!strtab) {
2757                 dev_err(smmu->dev,
2758                         "failed to allocate l1 stream table (%u bytes)\n",
2759                         l1size);
2760                 return -ENOMEM;
2761         }
2762         cfg->strtab = strtab;
2763
2764         /* Configure strtab_base_cfg for 2 levels */
2765         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2766         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2767         reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2768         cfg->strtab_base_cfg = reg;
2769
2770         return arm_smmu_init_l1_strtab(smmu);
2771 }
2772
2773 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2774 {
2775         void *strtab;
2776         u64 reg;
2777         u32 size;
2778         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2779
2780         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2781         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2782                                      GFP_KERNEL);
2783         if (!strtab) {
2784                 dev_err(smmu->dev,
2785                         "failed to allocate linear stream table (%u bytes)\n",
2786                         size);
2787                 return -ENOMEM;
2788         }
2789         cfg->strtab = strtab;
2790         cfg->num_l1_ents = 1 << smmu->sid_bits;
2791
2792         /* Configure strtab_base_cfg for a linear table covering all SIDs */
2793         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2794         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2795         cfg->strtab_base_cfg = reg;
2796
2797         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2798         return 0;
2799 }
2800
2801 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2802 {
2803         u64 reg;
2804         int ret;
2805
2806         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2807                 ret = arm_smmu_init_strtab_2lvl(smmu);
2808         else
2809                 ret = arm_smmu_init_strtab_linear(smmu);
2810
2811         if (ret)
2812                 return ret;
2813
2814         /* Set the strtab base address */
2815         reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2816         reg |= STRTAB_BASE_RA;
2817         smmu->strtab_cfg.strtab_base = reg;
2818
2819         /* Allocate the first VMID for stage-2 bypass STEs */
2820         set_bit(0, smmu->vmid_map);
2821         return 0;
2822 }
2823
2824 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2825 {
2826         int ret;
2827
2828         ret = arm_smmu_init_queues(smmu);
2829         if (ret)
2830                 return ret;
2831
2832         return arm_smmu_init_strtab(smmu);
2833 }
2834
2835 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2836                                    unsigned int reg_off, unsigned int ack_off)
2837 {
2838         u32 reg;
2839
2840         writel_relaxed(val, smmu->base + reg_off);
2841         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2842                                           1, ARM_SMMU_POLL_TIMEOUT_US);
2843 }
2844
2845 /* GBPA is "special" */
2846 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2847 {
2848         int ret;
2849         u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2850
2851         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2852                                          1, ARM_SMMU_POLL_TIMEOUT_US);
2853         if (ret)
2854                 return ret;
2855
2856         reg &= ~clr;
2857         reg |= set;
2858         writel_relaxed(reg | GBPA_UPDATE, gbpa);
2859         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2860                                          1, ARM_SMMU_POLL_TIMEOUT_US);
2861
2862         if (ret)
2863                 dev_err(smmu->dev, "GBPA not responding to update\n");
2864         return ret;
2865 }
2866
2867 static void arm_smmu_free_msis(void *data)
2868 {
2869         struct device *dev = data;
2870         platform_msi_domain_free_irqs(dev);
2871 }
2872
2873 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2874 {
2875         phys_addr_t doorbell;
2876         struct device *dev = msi_desc_to_dev(desc);
2877         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2878         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2879
2880         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2881         doorbell &= MSI_CFG0_ADDR_MASK;
2882
2883         writeq_relaxed(doorbell, smmu->base + cfg[0]);
2884         writel_relaxed(msg->data, smmu->base + cfg[1]);
2885         writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2886 }
2887
2888 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2889 {
2890         struct msi_desc *desc;
2891         int ret, nvec = ARM_SMMU_MAX_MSIS;
2892         struct device *dev = smmu->dev;
2893
2894         /* Clear the MSI address regs */
2895         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2896         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2897
2898         if (smmu->features & ARM_SMMU_FEAT_PRI)
2899                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2900         else
2901                 nvec--;
2902
2903         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2904                 return;
2905
2906         if (!dev->msi_domain) {
2907                 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2908                 return;
2909         }
2910
2911         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2912         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2913         if (ret) {
2914                 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2915                 return;
2916         }
2917
2918         for_each_msi_entry(desc, dev) {
2919                 switch (desc->platform.msi_index) {
2920                 case EVTQ_MSI_INDEX:
2921                         smmu->evtq.q.irq = desc->irq;
2922                         break;
2923                 case GERROR_MSI_INDEX:
2924                         smmu->gerr_irq = desc->irq;
2925                         break;
2926                 case PRIQ_MSI_INDEX:
2927                         smmu->priq.q.irq = desc->irq;
2928                         break;
2929                 default:        /* Unknown */
2930                         continue;
2931                 }
2932         }
2933
2934         /* Add callback to free MSIs on teardown */
2935         devm_add_action(dev, arm_smmu_free_msis, dev);
2936 }
2937
2938 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2939 {
2940         int irq, ret;
2941
2942         arm_smmu_setup_msis(smmu);
2943
2944         /* Request interrupt lines */
2945         irq = smmu->evtq.q.irq;
2946         if (irq) {
2947                 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2948                                                 arm_smmu_evtq_thread,
2949                                                 IRQF_ONESHOT,
2950                                                 "arm-smmu-v3-evtq", smmu);
2951                 if (ret < 0)
2952                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
2953         } else {
2954                 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2955         }
2956
2957         irq = smmu->gerr_irq;
2958         if (irq) {
2959                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2960                                        0, "arm-smmu-v3-gerror", smmu);
2961                 if (ret < 0)
2962                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
2963         } else {
2964                 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2965         }
2966
2967         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2968                 irq = smmu->priq.q.irq;
2969                 if (irq) {
2970                         ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2971                                                         arm_smmu_priq_thread,
2972                                                         IRQF_ONESHOT,
2973                                                         "arm-smmu-v3-priq",
2974                                                         smmu);
2975                         if (ret < 0)
2976                                 dev_warn(smmu->dev,
2977                                          "failed to enable priq irq\n");
2978                 } else {
2979                         dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2980                 }
2981         }
2982 }
2983
2984 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2985 {
2986         int ret, irq;
2987         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2988
2989         /* Disable IRQs first */
2990         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2991                                       ARM_SMMU_IRQ_CTRLACK);
2992         if (ret) {
2993                 dev_err(smmu->dev, "failed to disable irqs\n");
2994                 return ret;
2995         }
2996
2997         irq = smmu->combined_irq;
2998         if (irq) {
2999                 /*
3000                  * Cavium ThunderX2 implementation doesn't support unique irq
3001                  * lines. Use a single irq line for all the SMMUv3 interrupts.
3002                  */
3003                 ret = devm_request_threaded_irq(smmu->dev, irq,
3004                                         arm_smmu_combined_irq_handler,
3005                                         arm_smmu_combined_irq_thread,
3006                                         IRQF_ONESHOT,
3007                                         "arm-smmu-v3-combined-irq", smmu);
3008                 if (ret < 0)
3009                         dev_warn(smmu->dev, "failed to enable combined irq\n");
3010         } else
3011                 arm_smmu_setup_unique_irqs(smmu);
3012
3013         if (smmu->features & ARM_SMMU_FEAT_PRI)
3014                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3015
3016         /* Enable interrupt generation on the SMMU */
3017         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3018                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3019         if (ret)
3020                 dev_warn(smmu->dev, "failed to enable irqs\n");
3021
3022         return 0;
3023 }
3024
3025 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3026 {
3027         int ret;
3028
3029         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3030         if (ret)
3031                 dev_err(smmu->dev, "failed to clear cr0\n");
3032
3033         return ret;
3034 }
3035
3036 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3037 {
3038         int ret;
3039         u32 reg, enables;
3040         struct arm_smmu_cmdq_ent cmd;
3041
3042         /* Clear CR0 and sync (disables SMMU and queue processing) */
3043         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3044         if (reg & CR0_SMMUEN) {
3045                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3046                 WARN_ON(is_kdump_kernel() && !disable_bypass);
3047                 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3048         }
3049
3050         ret = arm_smmu_device_disable(smmu);
3051         if (ret)
3052                 return ret;
3053
3054         /* CR1 (table and queue memory attributes) */
3055         reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3056               FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3057               FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3058               FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3059               FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3060               FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3061         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3062
3063         /* CR2 (random crap) */
3064         reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3065         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3066
3067         /* Stream table */
3068         writeq_relaxed(smmu->strtab_cfg.strtab_base,
3069                        smmu->base + ARM_SMMU_STRTAB_BASE);
3070         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3071                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3072
3073         /* Command queue */
3074         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3075         writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3076         writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3077
3078         enables = CR0_CMDQEN;
3079         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3080                                       ARM_SMMU_CR0ACK);
3081         if (ret) {
3082                 dev_err(smmu->dev, "failed to enable command queue\n");
3083                 return ret;
3084         }
3085
3086         /* Invalidate any cached configuration */
3087         cmd.opcode = CMDQ_OP_CFGI_ALL;
3088         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3089         arm_smmu_cmdq_issue_sync(smmu);
3090
3091         /* Invalidate any stale TLB entries */
3092         if (smmu->features & ARM_SMMU_FEAT_HYP) {
3093                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3094                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3095         }
3096
3097         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3098         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3099         arm_smmu_cmdq_issue_sync(smmu);
3100
3101         /* Event queue */
3102         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3103         writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3104         writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3105
3106         enables |= CR0_EVTQEN;
3107         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3108                                       ARM_SMMU_CR0ACK);
3109         if (ret) {
3110                 dev_err(smmu->dev, "failed to enable event queue\n");
3111                 return ret;
3112         }
3113
3114         /* PRI queue */
3115         if (smmu->features & ARM_SMMU_FEAT_PRI) {
3116                 writeq_relaxed(smmu->priq.q.q_base,
3117                                smmu->base + ARM_SMMU_PRIQ_BASE);
3118                 writel_relaxed(smmu->priq.q.llq.prod,
3119                                smmu->page1 + ARM_SMMU_PRIQ_PROD);
3120                 writel_relaxed(smmu->priq.q.llq.cons,
3121                                smmu->page1 + ARM_SMMU_PRIQ_CONS);
3122
3123                 enables |= CR0_PRIQEN;
3124                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3125                                               ARM_SMMU_CR0ACK);
3126                 if (ret) {
3127                         dev_err(smmu->dev, "failed to enable PRI queue\n");
3128                         return ret;
3129                 }
3130         }
3131
3132         if (smmu->features & ARM_SMMU_FEAT_ATS) {
3133                 enables |= CR0_ATSCHK;
3134                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3135                                               ARM_SMMU_CR0ACK);
3136                 if (ret) {
3137                         dev_err(smmu->dev, "failed to enable ATS check\n");
3138                         return ret;
3139                 }
3140         }
3141
3142         ret = arm_smmu_setup_irqs(smmu);
3143         if (ret) {
3144                 dev_err(smmu->dev, "failed to setup irqs\n");
3145                 return ret;
3146         }
3147
3148         if (is_kdump_kernel())
3149                 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3150
3151         /* Enable the SMMU interface, or ensure bypass */
3152         if (!bypass || disable_bypass) {
3153                 enables |= CR0_SMMUEN;
3154         } else {
3155                 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3156                 if (ret)
3157                         return ret;
3158         }
3159         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3160                                       ARM_SMMU_CR0ACK);
3161         if (ret) {
3162                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3163                 return ret;
3164         }
3165
3166         return 0;
3167 }
3168
3169 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3170 {
3171         u32 reg;
3172         bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3173
3174         /* IDR0 */
3175         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3176
3177         /* 2-level structures */
3178         if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3179                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3180
3181         if (reg & IDR0_CD2L)
3182                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3183
3184         /*
3185          * Translation table endianness.
3186          * We currently require the same endianness as the CPU, but this
3187          * could be changed later by adding a new IO_PGTABLE_QUIRK.
3188          */
3189         switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3190         case IDR0_TTENDIAN_MIXED:
3191                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3192                 break;
3193 #ifdef __BIG_ENDIAN
3194         case IDR0_TTENDIAN_BE:
3195                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3196                 break;
3197 #else
3198         case IDR0_TTENDIAN_LE:
3199                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3200                 break;
3201 #endif
3202         default:
3203                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3204                 return -ENXIO;
3205         }
3206
3207         /* Boolean feature flags */
3208         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3209                 smmu->features |= ARM_SMMU_FEAT_PRI;
3210
3211         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3212                 smmu->features |= ARM_SMMU_FEAT_ATS;
3213
3214         if (reg & IDR0_SEV)
3215                 smmu->features |= ARM_SMMU_FEAT_SEV;
3216
3217         if (reg & IDR0_MSI) {
3218                 smmu->features |= ARM_SMMU_FEAT_MSI;
3219                 if (coherent && !disable_msipolling)
3220                         smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3221         }
3222
3223         if (reg & IDR0_HYP)
3224                 smmu->features |= ARM_SMMU_FEAT_HYP;
3225
3226         /*
3227          * The coherency feature as set by FW is used in preference to the ID
3228          * register, but warn on mismatch.
3229          */
3230         if (!!(reg & IDR0_COHACC) != coherent)
3231                 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3232                          coherent ? "true" : "false");
3233
3234         switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3235         case IDR0_STALL_MODEL_FORCE:
3236                 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3237                 fallthrough;
3238         case IDR0_STALL_MODEL_STALL:
3239                 smmu->features |= ARM_SMMU_FEAT_STALLS;
3240         }
3241
3242         if (reg & IDR0_S1P)
3243                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3244
3245         if (reg & IDR0_S2P)
3246                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3247
3248         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3249                 dev_err(smmu->dev, "no translation support!\n");
3250                 return -ENXIO;
3251         }
3252
3253         /* We only support the AArch64 table format at present */
3254         switch (FIELD_GET(IDR0_TTF, reg)) {
3255         case IDR0_TTF_AARCH32_64:
3256                 smmu->ias = 40;
3257                 fallthrough;
3258         case IDR0_TTF_AARCH64:
3259                 break;
3260         default:
3261                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3262                 return -ENXIO;
3263         }
3264
3265         /* ASID/VMID sizes */
3266         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3267         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3268
3269         /* IDR1 */
3270         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3271         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3272                 dev_err(smmu->dev, "embedded implementation not supported\n");
3273                 return -ENXIO;
3274         }
3275
3276         /* Queue sizes, capped to ensure natural alignment */
3277         smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3278                                              FIELD_GET(IDR1_CMDQS, reg));
3279         if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3280                 /*
3281                  * We don't support splitting up batches, so one batch of
3282                  * commands plus an extra sync needs to fit inside the command
3283                  * queue. There's also no way we can handle the weird alignment
3284                  * restrictions on the base pointer for a unit-length queue.
3285                  */
3286                 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3287                         CMDQ_BATCH_ENTRIES);
3288                 return -ENXIO;
3289         }
3290
3291         smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3292                                              FIELD_GET(IDR1_EVTQS, reg));
3293         smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3294                                              FIELD_GET(IDR1_PRIQS, reg));
3295
3296         /* SID/SSID sizes */
3297         smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3298         smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3299
3300         /*
3301          * If the SMMU supports fewer bits than would fill a single L2 stream
3302          * table, use a linear table instead.
3303          */
3304         if (smmu->sid_bits <= STRTAB_SPLIT)
3305                 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3306
3307         /* IDR3 */
3308         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3309         if (FIELD_GET(IDR3_RIL, reg))
3310                 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3311
3312         /* IDR5 */
3313         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3314
3315         /* Maximum number of outstanding stalls */
3316         smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3317
3318         /* Page sizes */
3319         if (reg & IDR5_GRAN64K)
3320                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3321         if (reg & IDR5_GRAN16K)
3322                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3323         if (reg & IDR5_GRAN4K)
3324                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3325
3326         /* Input address size */
3327         if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3328                 smmu->features |= ARM_SMMU_FEAT_VAX;
3329
3330         /* Output address size */
3331         switch (FIELD_GET(IDR5_OAS, reg)) {
3332         case IDR5_OAS_32_BIT:
3333                 smmu->oas = 32;
3334                 break;
3335         case IDR5_OAS_36_BIT:
3336                 smmu->oas = 36;
3337                 break;
3338         case IDR5_OAS_40_BIT:
3339                 smmu->oas = 40;
3340                 break;
3341         case IDR5_OAS_42_BIT:
3342                 smmu->oas = 42;
3343                 break;
3344         case IDR5_OAS_44_BIT:
3345                 smmu->oas = 44;
3346                 break;
3347         case IDR5_OAS_52_BIT:
3348                 smmu->oas = 52;
3349                 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3350                 break;
3351         default:
3352                 dev_info(smmu->dev,
3353                         "unknown output address size. Truncating to 48-bit\n");
3354                 fallthrough;
3355         case IDR5_OAS_48_BIT:
3356                 smmu->oas = 48;
3357         }
3358
3359         if (arm_smmu_ops.pgsize_bitmap == -1UL)
3360                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3361         else
3362                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3363
3364         /* Set the DMA mask for our table walker */
3365         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3366                 dev_warn(smmu->dev,
3367                          "failed to set DMA mask for table walker\n");
3368
3369         smmu->ias = max(smmu->ias, smmu->oas);
3370
3371         if (arm_smmu_sva_supported(smmu))
3372                 smmu->features |= ARM_SMMU_FEAT_SVA;
3373
3374         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3375                  smmu->ias, smmu->oas, smmu->features);
3376         return 0;
3377 }
3378
3379 #ifdef CONFIG_ACPI
3380 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3381 {
3382         switch (model) {
3383         case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3384                 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3385                 break;
3386         case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3387                 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3388                 break;
3389         }
3390
3391         dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3392 }
3393
3394 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3395                                       struct arm_smmu_device *smmu)
3396 {
3397         struct acpi_iort_smmu_v3 *iort_smmu;
3398         struct device *dev = smmu->dev;
3399         struct acpi_iort_node *node;
3400
3401         node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3402
3403         /* Retrieve SMMUv3 specific data */
3404         iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3405
3406         acpi_smmu_get_options(iort_smmu->model, smmu);
3407
3408         if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3409                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3410
3411         return 0;
3412 }
3413 #else
3414 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3415                                              struct arm_smmu_device *smmu)
3416 {
3417         return -ENODEV;
3418 }
3419 #endif
3420
3421 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3422                                     struct arm_smmu_device *smmu)
3423 {
3424         struct device *dev = &pdev->dev;
3425         u32 cells;
3426         int ret = -EINVAL;
3427
3428         if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3429                 dev_err(dev, "missing #iommu-cells property\n");
3430         else if (cells != 1)
3431                 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3432         else
3433                 ret = 0;
3434
3435         parse_driver_options(smmu);
3436
3437         if (of_dma_is_coherent(dev->of_node))
3438                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3439
3440         return ret;
3441 }
3442
3443 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3444 {
3445         if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3446                 return SZ_64K;
3447         else
3448                 return SZ_128K;
3449 }
3450
3451 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3452 {
3453         int err;
3454
3455 #ifdef CONFIG_PCI
3456         if (pci_bus_type.iommu_ops != ops) {
3457                 err = bus_set_iommu(&pci_bus_type, ops);
3458                 if (err)
3459                         return err;
3460         }
3461 #endif
3462 #ifdef CONFIG_ARM_AMBA
3463         if (amba_bustype.iommu_ops != ops) {
3464                 err = bus_set_iommu(&amba_bustype, ops);
3465                 if (err)
3466                         goto err_reset_pci_ops;
3467         }
3468 #endif
3469         if (platform_bus_type.iommu_ops != ops) {
3470                 err = bus_set_iommu(&platform_bus_type, ops);
3471                 if (err)
3472                         goto err_reset_amba_ops;
3473         }
3474
3475         return 0;
3476
3477 err_reset_amba_ops:
3478 #ifdef CONFIG_ARM_AMBA
3479         bus_set_iommu(&amba_bustype, NULL);
3480 #endif
3481 err_reset_pci_ops: __maybe_unused;
3482 #ifdef CONFIG_PCI
3483         bus_set_iommu(&pci_bus_type, NULL);
3484 #endif
3485         return err;
3486 }
3487
3488 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3489                                       resource_size_t size)
3490 {
3491         struct resource res = DEFINE_RES_MEM(start, size);
3492
3493         return devm_ioremap_resource(dev, &res);
3494 }
3495
3496 static int arm_smmu_device_probe(struct platform_device *pdev)
3497 {
3498         int irq, ret;
3499         struct resource *res;
3500         resource_size_t ioaddr;
3501         struct arm_smmu_device *smmu;
3502         struct device *dev = &pdev->dev;
3503         bool bypass;
3504
3505         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3506         if (!smmu) {
3507                 dev_err(dev, "failed to allocate arm_smmu_device\n");
3508                 return -ENOMEM;
3509         }
3510         smmu->dev = dev;
3511
3512         if (dev->of_node) {
3513                 ret = arm_smmu_device_dt_probe(pdev, smmu);
3514         } else {
3515                 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3516                 if (ret == -ENODEV)
3517                         return ret;
3518         }
3519
3520         /* Set bypass mode according to firmware probing result */
3521         bypass = !!ret;
3522
3523         /* Base address */
3524         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3525         if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3526                 dev_err(dev, "MMIO region too small (%pr)\n", res);
3527                 return -EINVAL;
3528         }
3529         ioaddr = res->start;
3530
3531         /*
3532          * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3533          * the PMCG registers which are reserved by the PMU driver.
3534          */
3535         smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3536         if (IS_ERR(smmu->base))
3537                 return PTR_ERR(smmu->base);
3538
3539         if (arm_smmu_resource_size(smmu) > SZ_64K) {
3540                 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3541                                                ARM_SMMU_REG_SZ);
3542                 if (IS_ERR(smmu->page1))
3543                         return PTR_ERR(smmu->page1);
3544         } else {
3545                 smmu->page1 = smmu->base;
3546         }
3547
3548         /* Interrupt lines */
3549
3550         irq = platform_get_irq_byname_optional(pdev, "combined");
3551         if (irq > 0)
3552                 smmu->combined_irq = irq;
3553         else {
3554                 irq = platform_get_irq_byname_optional(pdev, "eventq");
3555                 if (irq > 0)
3556                         smmu->evtq.q.irq = irq;
3557
3558                 irq = platform_get_irq_byname_optional(pdev, "priq");
3559                 if (irq > 0)
3560                         smmu->priq.q.irq = irq;
3561
3562                 irq = platform_get_irq_byname_optional(pdev, "gerror");
3563                 if (irq > 0)
3564                         smmu->gerr_irq = irq;
3565         }
3566         /* Probe the h/w */
3567         ret = arm_smmu_device_hw_probe(smmu);
3568         if (ret)
3569                 return ret;
3570
3571         /* Initialise in-memory data structures */
3572         ret = arm_smmu_init_structures(smmu);
3573         if (ret)
3574                 return ret;
3575
3576         /* Record our private device structure */
3577         platform_set_drvdata(pdev, smmu);
3578
3579         /* Reset the device */
3580         ret = arm_smmu_device_reset(smmu, bypass);
3581         if (ret)
3582                 return ret;
3583
3584         /* And we're up. Go go go! */
3585         ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3586                                      "smmu3.%pa", &ioaddr);
3587         if (ret)
3588                 return ret;
3589
3590         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3591         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3592
3593         ret = iommu_device_register(&smmu->iommu);
3594         if (ret) {
3595                 dev_err(dev, "Failed to register iommu\n");
3596                 return ret;
3597         }
3598
3599         return arm_smmu_set_bus_ops(&arm_smmu_ops);
3600 }
3601
3602 static int arm_smmu_device_remove(struct platform_device *pdev)
3603 {
3604         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3605
3606         arm_smmu_set_bus_ops(NULL);
3607         iommu_device_unregister(&smmu->iommu);
3608         iommu_device_sysfs_remove(&smmu->iommu);
3609         arm_smmu_device_disable(smmu);
3610
3611         return 0;
3612 }
3613
3614 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3615 {
3616         arm_smmu_device_remove(pdev);
3617 }
3618
3619 static const struct of_device_id arm_smmu_of_match[] = {
3620         { .compatible = "arm,smmu-v3", },
3621         { },
3622 };
3623 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3624
3625 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3626 {
3627         arm_smmu_sva_notifier_synchronize();
3628         platform_driver_unregister(drv);
3629 }
3630
3631 static struct platform_driver arm_smmu_driver = {
3632         .driver = {
3633                 .name                   = "arm-smmu-v3",
3634                 .of_match_table         = arm_smmu_of_match,
3635                 .suppress_bind_attrs    = true,
3636         },
3637         .probe  = arm_smmu_device_probe,
3638         .remove = arm_smmu_device_remove,
3639         .shutdown = arm_smmu_device_shutdown,
3640 };
3641 module_driver(arm_smmu_driver, platform_driver_register,
3642               arm_smmu_driver_unregister);
3643
3644 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3645 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3646 MODULE_ALIAS("platform:arm-smmu-v3");
3647 MODULE_LICENSE("GPL v2");