Merge tag 'irqchip-fixes-5.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / drivers / iommu / arm / arm-smmu-v3 / arm-smmu-v3.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_platform.h>
27 #include <linux/pci.h>
28 #include <linux/pci-ats.h>
29 #include <linux/platform_device.h>
30
31 #include <linux/amba/bus.h>
32
33 #include "arm-smmu-v3.h"
34 #include "../../iommu-sva-lib.h"
35
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44         "Disable MSI-based polling for CMD_SYNC completion.");
45
46 enum arm_smmu_msi_index {
47         EVTQ_MSI_INDEX,
48         GERROR_MSI_INDEX,
49         PRIQ_MSI_INDEX,
50         ARM_SMMU_MAX_MSIS,
51 };
52
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54         [EVTQ_MSI_INDEX] = {
55                 ARM_SMMU_EVTQ_IRQ_CFG0,
56                 ARM_SMMU_EVTQ_IRQ_CFG1,
57                 ARM_SMMU_EVTQ_IRQ_CFG2,
58         },
59         [GERROR_MSI_INDEX] = {
60                 ARM_SMMU_GERROR_IRQ_CFG0,
61                 ARM_SMMU_GERROR_IRQ_CFG1,
62                 ARM_SMMU_GERROR_IRQ_CFG2,
63         },
64         [PRIQ_MSI_INDEX] = {
65                 ARM_SMMU_PRIQ_IRQ_CFG0,
66                 ARM_SMMU_PRIQ_IRQ_CFG1,
67                 ARM_SMMU_PRIQ_IRQ_CFG2,
68         },
69 };
70
71 struct arm_smmu_option_prop {
72         u32 opt;
73         const char *prop;
74 };
75
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87         { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88         { 0, NULL},
89 };
90
91 static void parse_driver_options(struct arm_smmu_device *smmu)
92 {
93         int i = 0;
94
95         do {
96                 if (of_property_read_bool(smmu->dev->of_node,
97                                                 arm_smmu_options[i].prop)) {
98                         smmu->options |= arm_smmu_options[i].opt;
99                         dev_notice(smmu->dev, "option %s\n",
100                                 arm_smmu_options[i].prop);
101                 }
102         } while (arm_smmu_options[++i].opt);
103 }
104
105 /* Low-level queue manipulation functions */
106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
107 {
108         u32 space, prod, cons;
109
110         prod = Q_IDX(q, q->prod);
111         cons = Q_IDX(q, q->cons);
112
113         if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114                 space = (1 << q->max_n_shift) - (prod - cons);
115         else
116                 space = cons - prod;
117
118         return space >= n;
119 }
120
121 static bool queue_full(struct arm_smmu_ll_queue *q)
122 {
123         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
125 }
126
127 static bool queue_empty(struct arm_smmu_ll_queue *q)
128 {
129         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
131 }
132
133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
134 {
135         return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136                 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137                ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138                 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
139 }
140
141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
142 {
143         /*
144          * Ensure that all CPU accesses (reads and writes) to the queue
145          * are complete before we update the cons pointer.
146          */
147         __iomb();
148         writel_relaxed(q->llq.cons, q->cons_reg);
149 }
150
151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
152 {
153         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154         q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
155 }
156
157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
158 {
159         u32 prod;
160         int ret = 0;
161
162         /*
163          * We can't use the _relaxed() variant here, as we must prevent
164          * speculative reads of the queue before we have determined that
165          * prod has indeed moved.
166          */
167         prod = readl(q->prod_reg);
168
169         if (Q_OVF(prod) != Q_OVF(q->llq.prod))
170                 ret = -EOVERFLOW;
171
172         q->llq.prod = prod;
173         return ret;
174 }
175
176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
177 {
178         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
179         return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
180 }
181
182 static void queue_poll_init(struct arm_smmu_device *smmu,
183                             struct arm_smmu_queue_poll *qp)
184 {
185         qp->delay = 1;
186         qp->spin_cnt = 0;
187         qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
188         qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
189 }
190
191 static int queue_poll(struct arm_smmu_queue_poll *qp)
192 {
193         if (ktime_compare(ktime_get(), qp->timeout) > 0)
194                 return -ETIMEDOUT;
195
196         if (qp->wfe) {
197                 wfe();
198         } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
199                 cpu_relax();
200         } else {
201                 udelay(qp->delay);
202                 qp->delay *= 2;
203                 qp->spin_cnt = 0;
204         }
205
206         return 0;
207 }
208
209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
210 {
211         int i;
212
213         for (i = 0; i < n_dwords; ++i)
214                 *dst++ = cpu_to_le64(*src++);
215 }
216
217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
218 {
219         int i;
220
221         for (i = 0; i < n_dwords; ++i)
222                 *dst++ = le64_to_cpu(*src++);
223 }
224
225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
226 {
227         if (queue_empty(&q->llq))
228                 return -EAGAIN;
229
230         queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
231         queue_inc_cons(&q->llq);
232         queue_sync_cons_out(q);
233         return 0;
234 }
235
236 /* High-level queue accessors */
237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
238 {
239         memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
240         cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
241
242         switch (ent->opcode) {
243         case CMDQ_OP_TLBI_EL2_ALL:
244         case CMDQ_OP_TLBI_NSNH_ALL:
245                 break;
246         case CMDQ_OP_PREFETCH_CFG:
247                 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
248                 break;
249         case CMDQ_OP_CFGI_CD:
250                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
251                 fallthrough;
252         case CMDQ_OP_CFGI_STE:
253                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
254                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
255                 break;
256         case CMDQ_OP_CFGI_CD_ALL:
257                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
258                 break;
259         case CMDQ_OP_CFGI_ALL:
260                 /* Cover the entire SID range */
261                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
262                 break;
263         case CMDQ_OP_TLBI_NH_VA:
264                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
265                 fallthrough;
266         case CMDQ_OP_TLBI_EL2_VA:
267                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
268                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
269                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
270                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
271                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
272                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
273                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
274                 break;
275         case CMDQ_OP_TLBI_S2_IPA:
276                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
279                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
283                 break;
284         case CMDQ_OP_TLBI_NH_ASID:
285                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
286                 fallthrough;
287         case CMDQ_OP_TLBI_S12_VMALL:
288                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289                 break;
290         case CMDQ_OP_TLBI_EL2_ASID:
291                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
292                 break;
293         case CMDQ_OP_ATC_INV:
294                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
295                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
296                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
297                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
298                 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
299                 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
300                 break;
301         case CMDQ_OP_PRI_RESP:
302                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
303                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
304                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
305                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
306                 switch (ent->pri.resp) {
307                 case PRI_RESP_DENY:
308                 case PRI_RESP_FAIL:
309                 case PRI_RESP_SUCC:
310                         break;
311                 default:
312                         return -EINVAL;
313                 }
314                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
315                 break;
316         case CMDQ_OP_RESUME:
317                 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
318                 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
319                 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
320                 break;
321         case CMDQ_OP_CMD_SYNC:
322                 if (ent->sync.msiaddr) {
323                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324                         cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
325                 } else {
326                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
327                 }
328                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
330                 break;
331         default:
332                 return -ENOENT;
333         }
334
335         return 0;
336 }
337
338 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
339                                          u32 prod)
340 {
341         struct arm_smmu_queue *q = &smmu->cmdq.q;
342         struct arm_smmu_cmdq_ent ent = {
343                 .opcode = CMDQ_OP_CMD_SYNC,
344         };
345
346         /*
347          * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
348          * payload, so the write will zero the entire command on that platform.
349          */
350         if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
351                 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
352                                    q->ent_dwords * 8;
353         }
354
355         arm_smmu_cmdq_build_cmd(cmd, &ent);
356 }
357
358 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
359 {
360         static const char * const cerror_str[] = {
361                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
362                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
363                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
364                 [CMDQ_ERR_CERROR_ATC_INV_IDX]   = "ATC invalidate timeout",
365         };
366
367         int i;
368         u64 cmd[CMDQ_ENT_DWORDS];
369         struct arm_smmu_queue *q = &smmu->cmdq.q;
370         u32 cons = readl_relaxed(q->cons_reg);
371         u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
372         struct arm_smmu_cmdq_ent cmd_sync = {
373                 .opcode = CMDQ_OP_CMD_SYNC,
374         };
375
376         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
377                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
378
379         switch (idx) {
380         case CMDQ_ERR_CERROR_ABT_IDX:
381                 dev_err(smmu->dev, "retrying command fetch\n");
382         case CMDQ_ERR_CERROR_NONE_IDX:
383                 return;
384         case CMDQ_ERR_CERROR_ATC_INV_IDX:
385                 /*
386                  * ATC Invalidation Completion timeout. CONS is still pointing
387                  * at the CMD_SYNC. Attempt to complete other pending commands
388                  * by repeating the CMD_SYNC, though we might well end up back
389                  * here since the ATC invalidation may still be pending.
390                  */
391                 return;
392         case CMDQ_ERR_CERROR_ILL_IDX:
393         default:
394                 break;
395         }
396
397         /*
398          * We may have concurrent producers, so we need to be careful
399          * not to touch any of the shadow cmdq state.
400          */
401         queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
402         dev_err(smmu->dev, "skipping command in error state:\n");
403         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
404                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
405
406         /* Convert the erroneous command into a CMD_SYNC */
407         if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
408                 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
409                 return;
410         }
411
412         queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
413 }
414
415 /*
416  * Command queue locking.
417  * This is a form of bastardised rwlock with the following major changes:
418  *
419  * - The only LOCK routines are exclusive_trylock() and shared_lock().
420  *   Neither have barrier semantics, and instead provide only a control
421  *   dependency.
422  *
423  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
424  *   fails if the caller appears to be the last lock holder (yes, this is
425  *   racy). All successful UNLOCK routines have RELEASE semantics.
426  */
427 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
428 {
429         int val;
430
431         /*
432          * We can try to avoid the cmpxchg() loop by simply incrementing the
433          * lock counter. When held in exclusive state, the lock counter is set
434          * to INT_MIN so these increments won't hurt as the value will remain
435          * negative.
436          */
437         if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
438                 return;
439
440         do {
441                 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
442         } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
443 }
444
445 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
446 {
447         (void)atomic_dec_return_release(&cmdq->lock);
448 }
449
450 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
451 {
452         if (atomic_read(&cmdq->lock) == 1)
453                 return false;
454
455         arm_smmu_cmdq_shared_unlock(cmdq);
456         return true;
457 }
458
459 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)            \
460 ({                                                                      \
461         bool __ret;                                                     \
462         local_irq_save(flags);                                          \
463         __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);       \
464         if (!__ret)                                                     \
465                 local_irq_restore(flags);                               \
466         __ret;                                                          \
467 })
468
469 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)          \
470 ({                                                                      \
471         atomic_set_release(&cmdq->lock, 0);                             \
472         local_irq_restore(flags);                                       \
473 })
474
475
476 /*
477  * Command queue insertion.
478  * This is made fiddly by our attempts to achieve some sort of scalability
479  * since there is one queue shared amongst all of the CPUs in the system.  If
480  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
481  * then you'll *love* this monstrosity.
482  *
483  * The basic idea is to split the queue up into ranges of commands that are
484  * owned by a given CPU; the owner may not have written all of the commands
485  * itself, but is responsible for advancing the hardware prod pointer when
486  * the time comes. The algorithm is roughly:
487  *
488  *      1. Allocate some space in the queue. At this point we also discover
489  *         whether the head of the queue is currently owned by another CPU,
490  *         or whether we are the owner.
491  *
492  *      2. Write our commands into our allocated slots in the queue.
493  *
494  *      3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
495  *
496  *      4. If we are an owner:
497  *              a. Wait for the previous owner to finish.
498  *              b. Mark the queue head as unowned, which tells us the range
499  *                 that we are responsible for publishing.
500  *              c. Wait for all commands in our owned range to become valid.
501  *              d. Advance the hardware prod pointer.
502  *              e. Tell the next owner we've finished.
503  *
504  *      5. If we are inserting a CMD_SYNC (we may or may not have been an
505  *         owner), then we need to stick around until it has completed:
506  *              a. If we have MSIs, the SMMU can write back into the CMD_SYNC
507  *                 to clear the first 4 bytes.
508  *              b. Otherwise, we spin waiting for the hardware cons pointer to
509  *                 advance past our command.
510  *
511  * The devil is in the details, particularly the use of locking for handling
512  * SYNC completion and freeing up space in the queue before we think that it is
513  * full.
514  */
515 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
516                                                u32 sprod, u32 eprod, bool set)
517 {
518         u32 swidx, sbidx, ewidx, ebidx;
519         struct arm_smmu_ll_queue llq = {
520                 .max_n_shift    = cmdq->q.llq.max_n_shift,
521                 .prod           = sprod,
522         };
523
524         ewidx = BIT_WORD(Q_IDX(&llq, eprod));
525         ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
526
527         while (llq.prod != eprod) {
528                 unsigned long mask;
529                 atomic_long_t *ptr;
530                 u32 limit = BITS_PER_LONG;
531
532                 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
533                 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
534
535                 ptr = &cmdq->valid_map[swidx];
536
537                 if ((swidx == ewidx) && (sbidx < ebidx))
538                         limit = ebidx;
539
540                 mask = GENMASK(limit - 1, sbidx);
541
542                 /*
543                  * The valid bit is the inverse of the wrap bit. This means
544                  * that a zero-initialised queue is invalid and, after marking
545                  * all entries as valid, they become invalid again when we
546                  * wrap.
547                  */
548                 if (set) {
549                         atomic_long_xor(mask, ptr);
550                 } else { /* Poll */
551                         unsigned long valid;
552
553                         valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
554                         atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
555                 }
556
557                 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
558         }
559 }
560
561 /* Mark all entries in the range [sprod, eprod) as valid */
562 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
563                                         u32 sprod, u32 eprod)
564 {
565         __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
566 }
567
568 /* Wait for all entries in the range [sprod, eprod) to become valid */
569 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
570                                          u32 sprod, u32 eprod)
571 {
572         __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
573 }
574
575 /* Wait for the command queue to become non-full */
576 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
577                                              struct arm_smmu_ll_queue *llq)
578 {
579         unsigned long flags;
580         struct arm_smmu_queue_poll qp;
581         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
582         int ret = 0;
583
584         /*
585          * Try to update our copy of cons by grabbing exclusive cmdq access. If
586          * that fails, spin until somebody else updates it for us.
587          */
588         if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
589                 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
590                 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
591                 llq->val = READ_ONCE(cmdq->q.llq.val);
592                 return 0;
593         }
594
595         queue_poll_init(smmu, &qp);
596         do {
597                 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
598                 if (!queue_full(llq))
599                         break;
600
601                 ret = queue_poll(&qp);
602         } while (!ret);
603
604         return ret;
605 }
606
607 /*
608  * Wait until the SMMU signals a CMD_SYNC completion MSI.
609  * Must be called with the cmdq lock held in some capacity.
610  */
611 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
612                                           struct arm_smmu_ll_queue *llq)
613 {
614         int ret = 0;
615         struct arm_smmu_queue_poll qp;
616         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
617         u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
618
619         queue_poll_init(smmu, &qp);
620
621         /*
622          * The MSI won't generate an event, since it's being written back
623          * into the command queue.
624          */
625         qp.wfe = false;
626         smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
627         llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
628         return ret;
629 }
630
631 /*
632  * Wait until the SMMU cons index passes llq->prod.
633  * Must be called with the cmdq lock held in some capacity.
634  */
635 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
636                                                struct arm_smmu_ll_queue *llq)
637 {
638         struct arm_smmu_queue_poll qp;
639         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
640         u32 prod = llq->prod;
641         int ret = 0;
642
643         queue_poll_init(smmu, &qp);
644         llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
645         do {
646                 if (queue_consumed(llq, prod))
647                         break;
648
649                 ret = queue_poll(&qp);
650
651                 /*
652                  * This needs to be a readl() so that our subsequent call
653                  * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
654                  *
655                  * Specifically, we need to ensure that we observe all
656                  * shared_lock()s by other CMD_SYNCs that share our owner,
657                  * so that a failing call to tryunlock() means that we're
658                  * the last one out and therefore we can safely advance
659                  * cmdq->q.llq.cons. Roughly speaking:
660                  *
661                  * CPU 0                CPU1                    CPU2 (us)
662                  *
663                  * if (sync)
664                  *      shared_lock();
665                  *
666                  * dma_wmb();
667                  * set_valid_map();
668                  *
669                  *                      if (owner) {
670                  *                              poll_valid_map();
671                  *                              <control dependency>
672                  *                              writel(prod_reg);
673                  *
674                  *                                              readl(cons_reg);
675                  *                                              tryunlock();
676                  *
677                  * Requires us to see CPU 0's shared_lock() acquisition.
678                  */
679                 llq->cons = readl(cmdq->q.cons_reg);
680         } while (!ret);
681
682         return ret;
683 }
684
685 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
686                                          struct arm_smmu_ll_queue *llq)
687 {
688         if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
689                 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
690
691         return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
692 }
693
694 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
695                                         u32 prod, int n)
696 {
697         int i;
698         struct arm_smmu_ll_queue llq = {
699                 .max_n_shift    = cmdq->q.llq.max_n_shift,
700                 .prod           = prod,
701         };
702
703         for (i = 0; i < n; ++i) {
704                 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
705
706                 prod = queue_inc_prod_n(&llq, i);
707                 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
708         }
709 }
710
711 /*
712  * This is the actual insertion function, and provides the following
713  * ordering guarantees to callers:
714  *
715  * - There is a dma_wmb() before publishing any commands to the queue.
716  *   This can be relied upon to order prior writes to data structures
717  *   in memory (such as a CD or an STE) before the command.
718  *
719  * - On completion of a CMD_SYNC, there is a control dependency.
720  *   This can be relied upon to order subsequent writes to memory (e.g.
721  *   freeing an IOVA) after completion of the CMD_SYNC.
722  *
723  * - Command insertion is totally ordered, so if two CPUs each race to
724  *   insert their own list of commands then all of the commands from one
725  *   CPU will appear before any of the commands from the other CPU.
726  */
727 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
728                                        u64 *cmds, int n, bool sync)
729 {
730         u64 cmd_sync[CMDQ_ENT_DWORDS];
731         u32 prod;
732         unsigned long flags;
733         bool owner;
734         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
735         struct arm_smmu_ll_queue llq = {
736                 .max_n_shift = cmdq->q.llq.max_n_shift,
737         }, head = llq;
738         int ret = 0;
739
740         /* 1. Allocate some space in the queue */
741         local_irq_save(flags);
742         llq.val = READ_ONCE(cmdq->q.llq.val);
743         do {
744                 u64 old;
745
746                 while (!queue_has_space(&llq, n + sync)) {
747                         local_irq_restore(flags);
748                         if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
749                                 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
750                         local_irq_save(flags);
751                 }
752
753                 head.cons = llq.cons;
754                 head.prod = queue_inc_prod_n(&llq, n + sync) |
755                                              CMDQ_PROD_OWNED_FLAG;
756
757                 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
758                 if (old == llq.val)
759                         break;
760
761                 llq.val = old;
762         } while (1);
763         owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
764         head.prod &= ~CMDQ_PROD_OWNED_FLAG;
765         llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
766
767         /*
768          * 2. Write our commands into the queue
769          * Dependency ordering from the cmpxchg() loop above.
770          */
771         arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
772         if (sync) {
773                 prod = queue_inc_prod_n(&llq, n);
774                 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
775                 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
776
777                 /*
778                  * In order to determine completion of our CMD_SYNC, we must
779                  * ensure that the queue can't wrap twice without us noticing.
780                  * We achieve that by taking the cmdq lock as shared before
781                  * marking our slot as valid.
782                  */
783                 arm_smmu_cmdq_shared_lock(cmdq);
784         }
785
786         /* 3. Mark our slots as valid, ensuring commands are visible first */
787         dma_wmb();
788         arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
789
790         /* 4. If we are the owner, take control of the SMMU hardware */
791         if (owner) {
792                 /* a. Wait for previous owner to finish */
793                 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
794
795                 /* b. Stop gathering work by clearing the owned flag */
796                 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
797                                                    &cmdq->q.llq.atomic.prod);
798                 prod &= ~CMDQ_PROD_OWNED_FLAG;
799
800                 /*
801                  * c. Wait for any gathered work to be written to the queue.
802                  * Note that we read our own entries so that we have the control
803                  * dependency required by (d).
804                  */
805                 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
806
807                 /*
808                  * d. Advance the hardware prod pointer
809                  * Control dependency ordering from the entries becoming valid.
810                  */
811                 writel_relaxed(prod, cmdq->q.prod_reg);
812
813                 /*
814                  * e. Tell the next owner we're done
815                  * Make sure we've updated the hardware first, so that we don't
816                  * race to update prod and potentially move it backwards.
817                  */
818                 atomic_set_release(&cmdq->owner_prod, prod);
819         }
820
821         /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
822         if (sync) {
823                 llq.prod = queue_inc_prod_n(&llq, n);
824                 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
825                 if (ret) {
826                         dev_err_ratelimited(smmu->dev,
827                                             "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
828                                             llq.prod,
829                                             readl_relaxed(cmdq->q.prod_reg),
830                                             readl_relaxed(cmdq->q.cons_reg));
831                 }
832
833                 /*
834                  * Try to unlock the cmdq lock. This will fail if we're the last
835                  * reader, in which case we can safely update cmdq->q.llq.cons
836                  */
837                 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
838                         WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
839                         arm_smmu_cmdq_shared_unlock(cmdq);
840                 }
841         }
842
843         local_irq_restore(flags);
844         return ret;
845 }
846
847 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
848                                    struct arm_smmu_cmdq_ent *ent)
849 {
850         u64 cmd[CMDQ_ENT_DWORDS];
851
852         if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
853                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
854                          ent->opcode);
855                 return -EINVAL;
856         }
857
858         return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
859 }
860
861 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
862 {
863         return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
864 }
865
866 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
867                                     struct arm_smmu_cmdq_batch *cmds,
868                                     struct arm_smmu_cmdq_ent *cmd)
869 {
870         if (cmds->num == CMDQ_BATCH_ENTRIES) {
871                 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
872                 cmds->num = 0;
873         }
874         arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
875         cmds->num++;
876 }
877
878 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
879                                       struct arm_smmu_cmdq_batch *cmds)
880 {
881         return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
882 }
883
884 static int arm_smmu_page_response(struct device *dev,
885                                   struct iommu_fault_event *unused,
886                                   struct iommu_page_response *resp)
887 {
888         struct arm_smmu_cmdq_ent cmd = {0};
889         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
890         int sid = master->streams[0].id;
891
892         if (master->stall_enabled) {
893                 cmd.opcode              = CMDQ_OP_RESUME;
894                 cmd.resume.sid          = sid;
895                 cmd.resume.stag         = resp->grpid;
896                 switch (resp->code) {
897                 case IOMMU_PAGE_RESP_INVALID:
898                 case IOMMU_PAGE_RESP_FAILURE:
899                         cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
900                         break;
901                 case IOMMU_PAGE_RESP_SUCCESS:
902                         cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
903                         break;
904                 default:
905                         return -EINVAL;
906                 }
907         } else {
908                 return -ENODEV;
909         }
910
911         arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
912         /*
913          * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
914          * RESUME consumption guarantees that the stalled transaction will be
915          * terminated... at some point in the future. PRI_RESP is fire and
916          * forget.
917          */
918
919         return 0;
920 }
921
922 /* Context descriptor manipulation functions */
923 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
924 {
925         struct arm_smmu_cmdq_ent cmd = {
926                 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
927                         CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
928                 .tlbi.asid = asid,
929         };
930
931         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
932         arm_smmu_cmdq_issue_sync(smmu);
933 }
934
935 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
936                              int ssid, bool leaf)
937 {
938         size_t i;
939         unsigned long flags;
940         struct arm_smmu_master *master;
941         struct arm_smmu_cmdq_batch cmds = {};
942         struct arm_smmu_device *smmu = smmu_domain->smmu;
943         struct arm_smmu_cmdq_ent cmd = {
944                 .opcode = CMDQ_OP_CFGI_CD,
945                 .cfgi   = {
946                         .ssid   = ssid,
947                         .leaf   = leaf,
948                 },
949         };
950
951         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
952         list_for_each_entry(master, &smmu_domain->devices, domain_head) {
953                 for (i = 0; i < master->num_streams; i++) {
954                         cmd.cfgi.sid = master->streams[i].id;
955                         arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
956                 }
957         }
958         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
959
960         arm_smmu_cmdq_batch_submit(smmu, &cmds);
961 }
962
963 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
964                                         struct arm_smmu_l1_ctx_desc *l1_desc)
965 {
966         size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
967
968         l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
969                                              &l1_desc->l2ptr_dma, GFP_KERNEL);
970         if (!l1_desc->l2ptr) {
971                 dev_warn(smmu->dev,
972                          "failed to allocate context descriptor table\n");
973                 return -ENOMEM;
974         }
975         return 0;
976 }
977
978 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
979                                       struct arm_smmu_l1_ctx_desc *l1_desc)
980 {
981         u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
982                   CTXDESC_L1_DESC_V;
983
984         /* See comment in arm_smmu_write_ctx_desc() */
985         WRITE_ONCE(*dst, cpu_to_le64(val));
986 }
987
988 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
989                                    u32 ssid)
990 {
991         __le64 *l1ptr;
992         unsigned int idx;
993         struct arm_smmu_l1_ctx_desc *l1_desc;
994         struct arm_smmu_device *smmu = smmu_domain->smmu;
995         struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
996
997         if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
998                 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
999
1000         idx = ssid >> CTXDESC_SPLIT;
1001         l1_desc = &cdcfg->l1_desc[idx];
1002         if (!l1_desc->l2ptr) {
1003                 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1004                         return NULL;
1005
1006                 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1007                 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1008                 /* An invalid L1CD can be cached */
1009                 arm_smmu_sync_cd(smmu_domain, ssid, false);
1010         }
1011         idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1012         return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1013 }
1014
1015 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1016                             struct arm_smmu_ctx_desc *cd)
1017 {
1018         /*
1019          * This function handles the following cases:
1020          *
1021          * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1022          * (2) Install a secondary CD, for SID+SSID traffic.
1023          * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1024          *     CD, then invalidate the old entry and mappings.
1025          * (4) Quiesce the context without clearing the valid bit. Disable
1026          *     translation, and ignore any translation fault.
1027          * (5) Remove a secondary CD.
1028          */
1029         u64 val;
1030         bool cd_live;
1031         __le64 *cdptr;
1032
1033         if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1034                 return -E2BIG;
1035
1036         cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1037         if (!cdptr)
1038                 return -ENOMEM;
1039
1040         val = le64_to_cpu(cdptr[0]);
1041         cd_live = !!(val & CTXDESC_CD_0_V);
1042
1043         if (!cd) { /* (5) */
1044                 val = 0;
1045         } else if (cd == &quiet_cd) { /* (4) */
1046                 val |= CTXDESC_CD_0_TCR_EPD0;
1047         } else if (cd_live) { /* (3) */
1048                 val &= ~CTXDESC_CD_0_ASID;
1049                 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1050                 /*
1051                  * Until CD+TLB invalidation, both ASIDs may be used for tagging
1052                  * this substream's traffic
1053                  */
1054         } else { /* (1) and (2) */
1055                 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1056                 cdptr[2] = 0;
1057                 cdptr[3] = cpu_to_le64(cd->mair);
1058
1059                 /*
1060                  * STE is live, and the SMMU might read dwords of this CD in any
1061                  * order. Ensure that it observes valid values before reading
1062                  * V=1.
1063                  */
1064                 arm_smmu_sync_cd(smmu_domain, ssid, true);
1065
1066                 val = cd->tcr |
1067 #ifdef __BIG_ENDIAN
1068                         CTXDESC_CD_0_ENDI |
1069 #endif
1070                         CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1071                         (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1072                         CTXDESC_CD_0_AA64 |
1073                         FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1074                         CTXDESC_CD_0_V;
1075
1076                 if (smmu_domain->stall_enabled)
1077                         val |= CTXDESC_CD_0_S;
1078         }
1079
1080         /*
1081          * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1082          * "Configuration structures and configuration invalidation completion"
1083          *
1084          *   The size of single-copy atomic reads made by the SMMU is
1085          *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1086          *   field within an aligned 64-bit span of a structure can be altered
1087          *   without first making the structure invalid.
1088          */
1089         WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1090         arm_smmu_sync_cd(smmu_domain, ssid, true);
1091         return 0;
1092 }
1093
1094 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1095 {
1096         int ret;
1097         size_t l1size;
1098         size_t max_contexts;
1099         struct arm_smmu_device *smmu = smmu_domain->smmu;
1100         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1101         struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1102
1103         max_contexts = 1 << cfg->s1cdmax;
1104
1105         if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1106             max_contexts <= CTXDESC_L2_ENTRIES) {
1107                 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1108                 cdcfg->num_l1_ents = max_contexts;
1109
1110                 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1111         } else {
1112                 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1113                 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1114                                                   CTXDESC_L2_ENTRIES);
1115
1116                 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1117                                               sizeof(*cdcfg->l1_desc),
1118                                               GFP_KERNEL);
1119                 if (!cdcfg->l1_desc)
1120                         return -ENOMEM;
1121
1122                 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1123         }
1124
1125         cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1126                                            GFP_KERNEL);
1127         if (!cdcfg->cdtab) {
1128                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1129                 ret = -ENOMEM;
1130                 goto err_free_l1;
1131         }
1132
1133         return 0;
1134
1135 err_free_l1:
1136         if (cdcfg->l1_desc) {
1137                 devm_kfree(smmu->dev, cdcfg->l1_desc);
1138                 cdcfg->l1_desc = NULL;
1139         }
1140         return ret;
1141 }
1142
1143 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1144 {
1145         int i;
1146         size_t size, l1size;
1147         struct arm_smmu_device *smmu = smmu_domain->smmu;
1148         struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1149
1150         if (cdcfg->l1_desc) {
1151                 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1152
1153                 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1154                         if (!cdcfg->l1_desc[i].l2ptr)
1155                                 continue;
1156
1157                         dmam_free_coherent(smmu->dev, size,
1158                                            cdcfg->l1_desc[i].l2ptr,
1159                                            cdcfg->l1_desc[i].l2ptr_dma);
1160                 }
1161                 devm_kfree(smmu->dev, cdcfg->l1_desc);
1162                 cdcfg->l1_desc = NULL;
1163
1164                 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1165         } else {
1166                 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1167         }
1168
1169         dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1170         cdcfg->cdtab_dma = 0;
1171         cdcfg->cdtab = NULL;
1172 }
1173
1174 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1175 {
1176         bool free;
1177         struct arm_smmu_ctx_desc *old_cd;
1178
1179         if (!cd->asid)
1180                 return false;
1181
1182         free = refcount_dec_and_test(&cd->refs);
1183         if (free) {
1184                 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1185                 WARN_ON(old_cd != cd);
1186         }
1187         return free;
1188 }
1189
1190 /* Stream table manipulation functions */
1191 static void
1192 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1193 {
1194         u64 val = 0;
1195
1196         val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1197         val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1198
1199         /* See comment in arm_smmu_write_ctx_desc() */
1200         WRITE_ONCE(*dst, cpu_to_le64(val));
1201 }
1202
1203 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1204 {
1205         struct arm_smmu_cmdq_ent cmd = {
1206                 .opcode = CMDQ_OP_CFGI_STE,
1207                 .cfgi   = {
1208                         .sid    = sid,
1209                         .leaf   = true,
1210                 },
1211         };
1212
1213         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1214         arm_smmu_cmdq_issue_sync(smmu);
1215 }
1216
1217 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1218                                       __le64 *dst)
1219 {
1220         /*
1221          * This is hideously complicated, but we only really care about
1222          * three cases at the moment:
1223          *
1224          * 1. Invalid (all zero) -> bypass/fault (init)
1225          * 2. Bypass/fault -> translation/bypass (attach)
1226          * 3. Translation/bypass -> bypass/fault (detach)
1227          *
1228          * Given that we can't update the STE atomically and the SMMU
1229          * doesn't read the thing in a defined order, that leaves us
1230          * with the following maintenance requirements:
1231          *
1232          * 1. Update Config, return (init time STEs aren't live)
1233          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1234          * 3. Update Config, sync
1235          */
1236         u64 val = le64_to_cpu(dst[0]);
1237         bool ste_live = false;
1238         struct arm_smmu_device *smmu = NULL;
1239         struct arm_smmu_s1_cfg *s1_cfg = NULL;
1240         struct arm_smmu_s2_cfg *s2_cfg = NULL;
1241         struct arm_smmu_domain *smmu_domain = NULL;
1242         struct arm_smmu_cmdq_ent prefetch_cmd = {
1243                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1244                 .prefetch       = {
1245                         .sid    = sid,
1246                 },
1247         };
1248
1249         if (master) {
1250                 smmu_domain = master->domain;
1251                 smmu = master->smmu;
1252         }
1253
1254         if (smmu_domain) {
1255                 switch (smmu_domain->stage) {
1256                 case ARM_SMMU_DOMAIN_S1:
1257                         s1_cfg = &smmu_domain->s1_cfg;
1258                         break;
1259                 case ARM_SMMU_DOMAIN_S2:
1260                 case ARM_SMMU_DOMAIN_NESTED:
1261                         s2_cfg = &smmu_domain->s2_cfg;
1262                         break;
1263                 default:
1264                         break;
1265                 }
1266         }
1267
1268         if (val & STRTAB_STE_0_V) {
1269                 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1270                 case STRTAB_STE_0_CFG_BYPASS:
1271                         break;
1272                 case STRTAB_STE_0_CFG_S1_TRANS:
1273                 case STRTAB_STE_0_CFG_S2_TRANS:
1274                         ste_live = true;
1275                         break;
1276                 case STRTAB_STE_0_CFG_ABORT:
1277                         BUG_ON(!disable_bypass);
1278                         break;
1279                 default:
1280                         BUG(); /* STE corruption */
1281                 }
1282         }
1283
1284         /* Nuke the existing STE_0 value, as we're going to rewrite it */
1285         val = STRTAB_STE_0_V;
1286
1287         /* Bypass/fault */
1288         if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1289                 if (!smmu_domain && disable_bypass)
1290                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1291                 else
1292                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1293
1294                 dst[0] = cpu_to_le64(val);
1295                 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1296                                                 STRTAB_STE_1_SHCFG_INCOMING));
1297                 dst[2] = 0; /* Nuke the VMID */
1298                 /*
1299                  * The SMMU can perform negative caching, so we must sync
1300                  * the STE regardless of whether the old value was live.
1301                  */
1302                 if (smmu)
1303                         arm_smmu_sync_ste_for_sid(smmu, sid);
1304                 return;
1305         }
1306
1307         if (s1_cfg) {
1308                 u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1309                         STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1310
1311                 BUG_ON(ste_live);
1312                 dst[1] = cpu_to_le64(
1313                          FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1314                          FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1315                          FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1316                          FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1317                          FIELD_PREP(STRTAB_STE_1_STRW, strw));
1318
1319                 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1320                     !master->stall_enabled)
1321                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1322
1323                 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1324                         FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1325                         FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1326                         FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1327         }
1328
1329         if (s2_cfg) {
1330                 BUG_ON(ste_live);
1331                 dst[2] = cpu_to_le64(
1332                          FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1333                          FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1334 #ifdef __BIG_ENDIAN
1335                          STRTAB_STE_2_S2ENDI |
1336 #endif
1337                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1338                          STRTAB_STE_2_S2R);
1339
1340                 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1341
1342                 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1343         }
1344
1345         if (master->ats_enabled)
1346                 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1347                                                  STRTAB_STE_1_EATS_TRANS));
1348
1349         arm_smmu_sync_ste_for_sid(smmu, sid);
1350         /* See comment in arm_smmu_write_ctx_desc() */
1351         WRITE_ONCE(dst[0], cpu_to_le64(val));
1352         arm_smmu_sync_ste_for_sid(smmu, sid);
1353
1354         /* It's likely that we'll want to use the new STE soon */
1355         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1356                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1357 }
1358
1359 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1360 {
1361         unsigned int i;
1362
1363         for (i = 0; i < nent; ++i) {
1364                 arm_smmu_write_strtab_ent(NULL, -1, strtab);
1365                 strtab += STRTAB_STE_DWORDS;
1366         }
1367 }
1368
1369 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1370 {
1371         size_t size;
1372         void *strtab;
1373         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1374         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1375
1376         if (desc->l2ptr)
1377                 return 0;
1378
1379         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1380         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1381
1382         desc->span = STRTAB_SPLIT + 1;
1383         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1384                                           GFP_KERNEL);
1385         if (!desc->l2ptr) {
1386                 dev_err(smmu->dev,
1387                         "failed to allocate l2 stream table for SID %u\n",
1388                         sid);
1389                 return -ENOMEM;
1390         }
1391
1392         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1393         arm_smmu_write_strtab_l1_desc(strtab, desc);
1394         return 0;
1395 }
1396
1397 static struct arm_smmu_master *
1398 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1399 {
1400         struct rb_node *node;
1401         struct arm_smmu_stream *stream;
1402
1403         lockdep_assert_held(&smmu->streams_mutex);
1404
1405         node = smmu->streams.rb_node;
1406         while (node) {
1407                 stream = rb_entry(node, struct arm_smmu_stream, node);
1408                 if (stream->id < sid)
1409                         node = node->rb_right;
1410                 else if (stream->id > sid)
1411                         node = node->rb_left;
1412                 else
1413                         return stream->master;
1414         }
1415
1416         return NULL;
1417 }
1418
1419 /* IRQ and event handlers */
1420 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1421 {
1422         int ret;
1423         u32 reason;
1424         u32 perm = 0;
1425         struct arm_smmu_master *master;
1426         bool ssid_valid = evt[0] & EVTQ_0_SSV;
1427         u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1428         struct iommu_fault_event fault_evt = { };
1429         struct iommu_fault *flt = &fault_evt.fault;
1430
1431         switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1432         case EVT_ID_TRANSLATION_FAULT:
1433                 reason = IOMMU_FAULT_REASON_PTE_FETCH;
1434                 break;
1435         case EVT_ID_ADDR_SIZE_FAULT:
1436                 reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1437                 break;
1438         case EVT_ID_ACCESS_FAULT:
1439                 reason = IOMMU_FAULT_REASON_ACCESS;
1440                 break;
1441         case EVT_ID_PERMISSION_FAULT:
1442                 reason = IOMMU_FAULT_REASON_PERMISSION;
1443                 break;
1444         default:
1445                 return -EOPNOTSUPP;
1446         }
1447
1448         /* Stage-2 is always pinned at the moment */
1449         if (evt[1] & EVTQ_1_S2)
1450                 return -EFAULT;
1451
1452         if (evt[1] & EVTQ_1_RnW)
1453                 perm |= IOMMU_FAULT_PERM_READ;
1454         else
1455                 perm |= IOMMU_FAULT_PERM_WRITE;
1456
1457         if (evt[1] & EVTQ_1_InD)
1458                 perm |= IOMMU_FAULT_PERM_EXEC;
1459
1460         if (evt[1] & EVTQ_1_PnU)
1461                 perm |= IOMMU_FAULT_PERM_PRIV;
1462
1463         if (evt[1] & EVTQ_1_STALL) {
1464                 flt->type = IOMMU_FAULT_PAGE_REQ;
1465                 flt->prm = (struct iommu_fault_page_request) {
1466                         .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1467                         .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1468                         .perm = perm,
1469                         .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1470                 };
1471
1472                 if (ssid_valid) {
1473                         flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1474                         flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1475                 }
1476         } else {
1477                 flt->type = IOMMU_FAULT_DMA_UNRECOV;
1478                 flt->event = (struct iommu_fault_unrecoverable) {
1479                         .reason = reason,
1480                         .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1481                         .perm = perm,
1482                         .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1483                 };
1484
1485                 if (ssid_valid) {
1486                         flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1487                         flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1488                 }
1489         }
1490
1491         mutex_lock(&smmu->streams_mutex);
1492         master = arm_smmu_find_master(smmu, sid);
1493         if (!master) {
1494                 ret = -EINVAL;
1495                 goto out_unlock;
1496         }
1497
1498         ret = iommu_report_device_fault(master->dev, &fault_evt);
1499         if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1500                 /* Nobody cared, abort the access */
1501                 struct iommu_page_response resp = {
1502                         .pasid          = flt->prm.pasid,
1503                         .grpid          = flt->prm.grpid,
1504                         .code           = IOMMU_PAGE_RESP_FAILURE,
1505                 };
1506                 arm_smmu_page_response(master->dev, &fault_evt, &resp);
1507         }
1508
1509 out_unlock:
1510         mutex_unlock(&smmu->streams_mutex);
1511         return ret;
1512 }
1513
1514 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1515 {
1516         int i, ret;
1517         struct arm_smmu_device *smmu = dev;
1518         struct arm_smmu_queue *q = &smmu->evtq.q;
1519         struct arm_smmu_ll_queue *llq = &q->llq;
1520         static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1521                                       DEFAULT_RATELIMIT_BURST);
1522         u64 evt[EVTQ_ENT_DWORDS];
1523
1524         do {
1525                 while (!queue_remove_raw(q, evt)) {
1526                         u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1527
1528                         ret = arm_smmu_handle_evt(smmu, evt);
1529                         if (!ret || !__ratelimit(&rs))
1530                                 continue;
1531
1532                         dev_info(smmu->dev, "event 0x%02x received:\n", id);
1533                         for (i = 0; i < ARRAY_SIZE(evt); ++i)
1534                                 dev_info(smmu->dev, "\t0x%016llx\n",
1535                                          (unsigned long long)evt[i]);
1536
1537                 }
1538
1539                 /*
1540                  * Not much we can do on overflow, so scream and pretend we're
1541                  * trying harder.
1542                  */
1543                 if (queue_sync_prod_in(q) == -EOVERFLOW)
1544                         dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1545         } while (!queue_empty(llq));
1546
1547         /* Sync our overflow flag, as we believe we're up to speed */
1548         llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1549                     Q_IDX(llq, llq->cons);
1550         return IRQ_HANDLED;
1551 }
1552
1553 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1554 {
1555         u32 sid, ssid;
1556         u16 grpid;
1557         bool ssv, last;
1558
1559         sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1560         ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1561         ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1562         last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1563         grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1564
1565         dev_info(smmu->dev, "unexpected PRI request received:\n");
1566         dev_info(smmu->dev,
1567                  "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1568                  sid, ssid, grpid, last ? "L" : "",
1569                  evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1570                  evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1571                  evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1572                  evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1573                  evt[1] & PRIQ_1_ADDR_MASK);
1574
1575         if (last) {
1576                 struct arm_smmu_cmdq_ent cmd = {
1577                         .opcode                 = CMDQ_OP_PRI_RESP,
1578                         .substream_valid        = ssv,
1579                         .pri                    = {
1580                                 .sid    = sid,
1581                                 .ssid   = ssid,
1582                                 .grpid  = grpid,
1583                                 .resp   = PRI_RESP_DENY,
1584                         },
1585                 };
1586
1587                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1588         }
1589 }
1590
1591 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1592 {
1593         struct arm_smmu_device *smmu = dev;
1594         struct arm_smmu_queue *q = &smmu->priq.q;
1595         struct arm_smmu_ll_queue *llq = &q->llq;
1596         u64 evt[PRIQ_ENT_DWORDS];
1597
1598         do {
1599                 while (!queue_remove_raw(q, evt))
1600                         arm_smmu_handle_ppr(smmu, evt);
1601
1602                 if (queue_sync_prod_in(q) == -EOVERFLOW)
1603                         dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1604         } while (!queue_empty(llq));
1605
1606         /* Sync our overflow flag, as we believe we're up to speed */
1607         llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1608                       Q_IDX(llq, llq->cons);
1609         queue_sync_cons_out(q);
1610         return IRQ_HANDLED;
1611 }
1612
1613 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1614
1615 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1616 {
1617         u32 gerror, gerrorn, active;
1618         struct arm_smmu_device *smmu = dev;
1619
1620         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1621         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1622
1623         active = gerror ^ gerrorn;
1624         if (!(active & GERROR_ERR_MASK))
1625                 return IRQ_NONE; /* No errors pending */
1626
1627         dev_warn(smmu->dev,
1628                  "unexpected global error reported (0x%08x), this could be serious\n",
1629                  active);
1630
1631         if (active & GERROR_SFM_ERR) {
1632                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1633                 arm_smmu_device_disable(smmu);
1634         }
1635
1636         if (active & GERROR_MSI_GERROR_ABT_ERR)
1637                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1638
1639         if (active & GERROR_MSI_PRIQ_ABT_ERR)
1640                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1641
1642         if (active & GERROR_MSI_EVTQ_ABT_ERR)
1643                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1644
1645         if (active & GERROR_MSI_CMDQ_ABT_ERR)
1646                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1647
1648         if (active & GERROR_PRIQ_ABT_ERR)
1649                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1650
1651         if (active & GERROR_EVTQ_ABT_ERR)
1652                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1653
1654         if (active & GERROR_CMDQ_ERR)
1655                 arm_smmu_cmdq_skip_err(smmu);
1656
1657         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1658         return IRQ_HANDLED;
1659 }
1660
1661 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1662 {
1663         struct arm_smmu_device *smmu = dev;
1664
1665         arm_smmu_evtq_thread(irq, dev);
1666         if (smmu->features & ARM_SMMU_FEAT_PRI)
1667                 arm_smmu_priq_thread(irq, dev);
1668
1669         return IRQ_HANDLED;
1670 }
1671
1672 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1673 {
1674         arm_smmu_gerror_handler(irq, dev);
1675         return IRQ_WAKE_THREAD;
1676 }
1677
1678 static void
1679 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1680                         struct arm_smmu_cmdq_ent *cmd)
1681 {
1682         size_t log2_span;
1683         size_t span_mask;
1684         /* ATC invalidates are always on 4096-bytes pages */
1685         size_t inval_grain_shift = 12;
1686         unsigned long page_start, page_end;
1687
1688         /*
1689          * ATS and PASID:
1690          *
1691          * If substream_valid is clear, the PCIe TLP is sent without a PASID
1692          * prefix. In that case all ATC entries within the address range are
1693          * invalidated, including those that were requested with a PASID! There
1694          * is no way to invalidate only entries without PASID.
1695          *
1696          * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1697          * traffic), translation requests without PASID create ATC entries
1698          * without PASID, which must be invalidated with substream_valid clear.
1699          * This has the unpleasant side-effect of invalidating all PASID-tagged
1700          * ATC entries within the address range.
1701          */
1702         *cmd = (struct arm_smmu_cmdq_ent) {
1703                 .opcode                 = CMDQ_OP_ATC_INV,
1704                 .substream_valid        = !!ssid,
1705                 .atc.ssid               = ssid,
1706         };
1707
1708         if (!size) {
1709                 cmd->atc.size = ATC_INV_SIZE_ALL;
1710                 return;
1711         }
1712
1713         page_start      = iova >> inval_grain_shift;
1714         page_end        = (iova + size - 1) >> inval_grain_shift;
1715
1716         /*
1717          * In an ATS Invalidate Request, the address must be aligned on the
1718          * range size, which must be a power of two number of page sizes. We
1719          * thus have to choose between grossly over-invalidating the region, or
1720          * splitting the invalidation into multiple commands. For simplicity
1721          * we'll go with the first solution, but should refine it in the future
1722          * if multiple commands are shown to be more efficient.
1723          *
1724          * Find the smallest power of two that covers the range. The most
1725          * significant differing bit between the start and end addresses,
1726          * fls(start ^ end), indicates the required span. For example:
1727          *
1728          * We want to invalidate pages [8; 11]. This is already the ideal range:
1729          *              x = 0b1000 ^ 0b1011 = 0b11
1730          *              span = 1 << fls(x) = 4
1731          *
1732          * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1733          *              x = 0b0111 ^ 0b1010 = 0b1101
1734          *              span = 1 << fls(x) = 16
1735          */
1736         log2_span       = fls_long(page_start ^ page_end);
1737         span_mask       = (1ULL << log2_span) - 1;
1738
1739         page_start      &= ~span_mask;
1740
1741         cmd->atc.addr   = page_start << inval_grain_shift;
1742         cmd->atc.size   = log2_span;
1743 }
1744
1745 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1746 {
1747         int i;
1748         struct arm_smmu_cmdq_ent cmd;
1749
1750         arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1751
1752         for (i = 0; i < master->num_streams; i++) {
1753                 cmd.atc.sid = master->streams[i].id;
1754                 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1755         }
1756
1757         return arm_smmu_cmdq_issue_sync(master->smmu);
1758 }
1759
1760 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1761                             unsigned long iova, size_t size)
1762 {
1763         int i;
1764         unsigned long flags;
1765         struct arm_smmu_cmdq_ent cmd;
1766         struct arm_smmu_master *master;
1767         struct arm_smmu_cmdq_batch cmds = {};
1768
1769         if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1770                 return 0;
1771
1772         /*
1773          * Ensure that we've completed prior invalidation of the main TLBs
1774          * before we read 'nr_ats_masters' in case of a concurrent call to
1775          * arm_smmu_enable_ats():
1776          *
1777          *      // unmap()                      // arm_smmu_enable_ats()
1778          *      TLBI+SYNC                       atomic_inc(&nr_ats_masters);
1779          *      smp_mb();                       [...]
1780          *      atomic_read(&nr_ats_masters);   pci_enable_ats() // writel()
1781          *
1782          * Ensures that we always see the incremented 'nr_ats_masters' count if
1783          * ATS was enabled at the PCI device before completion of the TLBI.
1784          */
1785         smp_mb();
1786         if (!atomic_read(&smmu_domain->nr_ats_masters))
1787                 return 0;
1788
1789         arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1790
1791         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1792         list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1793                 if (!master->ats_enabled)
1794                         continue;
1795
1796                 for (i = 0; i < master->num_streams; i++) {
1797                         cmd.atc.sid = master->streams[i].id;
1798                         arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1799                 }
1800         }
1801         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1802
1803         return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1804 }
1805
1806 /* IO_PGTABLE API */
1807 static void arm_smmu_tlb_inv_context(void *cookie)
1808 {
1809         struct arm_smmu_domain *smmu_domain = cookie;
1810         struct arm_smmu_device *smmu = smmu_domain->smmu;
1811         struct arm_smmu_cmdq_ent cmd;
1812
1813         /*
1814          * NOTE: when io-pgtable is in non-strict mode, we may get here with
1815          * PTEs previously cleared by unmaps on the current CPU not yet visible
1816          * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1817          * insertion to guarantee those are observed before the TLBI. Do be
1818          * careful, 007.
1819          */
1820         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1821                 arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1822         } else {
1823                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1824                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1825                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1826                 arm_smmu_cmdq_issue_sync(smmu);
1827         }
1828         arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1829 }
1830
1831 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1832                                      unsigned long iova, size_t size,
1833                                      size_t granule,
1834                                      struct arm_smmu_domain *smmu_domain)
1835 {
1836         struct arm_smmu_device *smmu = smmu_domain->smmu;
1837         unsigned long end = iova + size, num_pages = 0, tg = 0;
1838         size_t inv_range = granule;
1839         struct arm_smmu_cmdq_batch cmds = {};
1840
1841         if (!size)
1842                 return;
1843
1844         if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1845                 /* Get the leaf page size */
1846                 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1847
1848                 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1849                 cmd->tlbi.tg = (tg - 10) / 2;
1850
1851                 /* Determine what level the granule is at */
1852                 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1853
1854                 num_pages = size >> tg;
1855         }
1856
1857         while (iova < end) {
1858                 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1859                         /*
1860                          * On each iteration of the loop, the range is 5 bits
1861                          * worth of the aligned size remaining.
1862                          * The range in pages is:
1863                          *
1864                          * range = (num_pages & (0x1f << __ffs(num_pages)))
1865                          */
1866                         unsigned long scale, num;
1867
1868                         /* Determine the power of 2 multiple number of pages */
1869                         scale = __ffs(num_pages);
1870                         cmd->tlbi.scale = scale;
1871
1872                         /* Determine how many chunks of 2^scale size we have */
1873                         num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1874                         cmd->tlbi.num = num - 1;
1875
1876                         /* range is num * 2^scale * pgsize */
1877                         inv_range = num << (scale + tg);
1878
1879                         /* Clear out the lower order bits for the next iteration */
1880                         num_pages -= num << scale;
1881                 }
1882
1883                 cmd->tlbi.addr = iova;
1884                 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1885                 iova += inv_range;
1886         }
1887         arm_smmu_cmdq_batch_submit(smmu, &cmds);
1888 }
1889
1890 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1891                                           size_t granule, bool leaf,
1892                                           struct arm_smmu_domain *smmu_domain)
1893 {
1894         struct arm_smmu_cmdq_ent cmd = {
1895                 .tlbi = {
1896                         .leaf   = leaf,
1897                 },
1898         };
1899
1900         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1901                 cmd.opcode      = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1902                                   CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1903                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1904         } else {
1905                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1906                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1907         }
1908         __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1909
1910         /*
1911          * Unfortunately, this can't be leaf-only since we may have
1912          * zapped an entire table.
1913          */
1914         arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1915 }
1916
1917 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1918                                  size_t granule, bool leaf,
1919                                  struct arm_smmu_domain *smmu_domain)
1920 {
1921         struct arm_smmu_cmdq_ent cmd = {
1922                 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1923                           CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1924                 .tlbi = {
1925                         .asid   = asid,
1926                         .leaf   = leaf,
1927                 },
1928         };
1929
1930         __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1931 }
1932
1933 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1934                                          unsigned long iova, size_t granule,
1935                                          void *cookie)
1936 {
1937         struct arm_smmu_domain *smmu_domain = cookie;
1938         struct iommu_domain *domain = &smmu_domain->domain;
1939
1940         iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1941 }
1942
1943 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1944                                   size_t granule, void *cookie)
1945 {
1946         arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1947 }
1948
1949 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1950         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1951         .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1952         .tlb_add_page   = arm_smmu_tlb_inv_page_nosync,
1953 };
1954
1955 /* IOMMU API */
1956 static bool arm_smmu_capable(enum iommu_cap cap)
1957 {
1958         switch (cap) {
1959         case IOMMU_CAP_CACHE_COHERENCY:
1960                 return true;
1961         case IOMMU_CAP_NOEXEC:
1962                 return true;
1963         default:
1964                 return false;
1965         }
1966 }
1967
1968 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1969 {
1970         struct arm_smmu_domain *smmu_domain;
1971
1972         if (type != IOMMU_DOMAIN_UNMANAGED &&
1973             type != IOMMU_DOMAIN_DMA &&
1974             type != IOMMU_DOMAIN_IDENTITY)
1975                 return NULL;
1976
1977         /*
1978          * Allocate the domain and initialise some of its data structures.
1979          * We can't really do anything meaningful until we've added a
1980          * master.
1981          */
1982         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1983         if (!smmu_domain)
1984                 return NULL;
1985
1986         if (type == IOMMU_DOMAIN_DMA &&
1987             iommu_get_dma_cookie(&smmu_domain->domain)) {
1988                 kfree(smmu_domain);
1989                 return NULL;
1990         }
1991
1992         mutex_init(&smmu_domain->init_mutex);
1993         INIT_LIST_HEAD(&smmu_domain->devices);
1994         spin_lock_init(&smmu_domain->devices_lock);
1995         INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
1996
1997         return &smmu_domain->domain;
1998 }
1999
2000 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2001 {
2002         int idx, size = 1 << span;
2003
2004         do {
2005                 idx = find_first_zero_bit(map, size);
2006                 if (idx == size)
2007                         return -ENOSPC;
2008         } while (test_and_set_bit(idx, map));
2009
2010         return idx;
2011 }
2012
2013 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2014 {
2015         clear_bit(idx, map);
2016 }
2017
2018 static void arm_smmu_domain_free(struct iommu_domain *domain)
2019 {
2020         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2021         struct arm_smmu_device *smmu = smmu_domain->smmu;
2022
2023         iommu_put_dma_cookie(domain);
2024         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2025
2026         /* Free the CD and ASID, if we allocated them */
2027         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2028                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2029
2030                 /* Prevent SVA from touching the CD while we're freeing it */
2031                 mutex_lock(&arm_smmu_asid_lock);
2032                 if (cfg->cdcfg.cdtab)
2033                         arm_smmu_free_cd_tables(smmu_domain);
2034                 arm_smmu_free_asid(&cfg->cd);
2035                 mutex_unlock(&arm_smmu_asid_lock);
2036         } else {
2037                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2038                 if (cfg->vmid)
2039                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2040         }
2041
2042         kfree(smmu_domain);
2043 }
2044
2045 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2046                                        struct arm_smmu_master *master,
2047                                        struct io_pgtable_cfg *pgtbl_cfg)
2048 {
2049         int ret;
2050         u32 asid;
2051         struct arm_smmu_device *smmu = smmu_domain->smmu;
2052         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2053         typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2054
2055         refcount_set(&cfg->cd.refs, 1);
2056
2057         /* Prevent SVA from modifying the ASID until it is written to the CD */
2058         mutex_lock(&arm_smmu_asid_lock);
2059         ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2060                        XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2061         if (ret)
2062                 goto out_unlock;
2063
2064         cfg->s1cdmax = master->ssid_bits;
2065
2066         smmu_domain->stall_enabled = master->stall_enabled;
2067
2068         ret = arm_smmu_alloc_cd_tables(smmu_domain);
2069         if (ret)
2070                 goto out_free_asid;
2071
2072         cfg->cd.asid    = (u16)asid;
2073         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2074         cfg->cd.tcr     = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2075                           FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2076                           FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2077                           FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2078                           FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2079                           FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2080                           CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2081         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2082
2083         /*
2084          * Note that this will end up calling arm_smmu_sync_cd() before
2085          * the master has been added to the devices list for this domain.
2086          * This isn't an issue because the STE hasn't been installed yet.
2087          */
2088         ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2089         if (ret)
2090                 goto out_free_cd_tables;
2091
2092         mutex_unlock(&arm_smmu_asid_lock);
2093         return 0;
2094
2095 out_free_cd_tables:
2096         arm_smmu_free_cd_tables(smmu_domain);
2097 out_free_asid:
2098         arm_smmu_free_asid(&cfg->cd);
2099 out_unlock:
2100         mutex_unlock(&arm_smmu_asid_lock);
2101         return ret;
2102 }
2103
2104 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2105                                        struct arm_smmu_master *master,
2106                                        struct io_pgtable_cfg *pgtbl_cfg)
2107 {
2108         int vmid;
2109         struct arm_smmu_device *smmu = smmu_domain->smmu;
2110         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2111         typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2112
2113         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2114         if (vmid < 0)
2115                 return vmid;
2116
2117         vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2118         cfg->vmid       = (u16)vmid;
2119         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2120         cfg->vtcr       = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2121                           FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2122                           FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2123                           FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2124                           FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2125                           FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2126                           FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2127         return 0;
2128 }
2129
2130 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2131                                     struct arm_smmu_master *master)
2132 {
2133         int ret;
2134         unsigned long ias, oas;
2135         enum io_pgtable_fmt fmt;
2136         struct io_pgtable_cfg pgtbl_cfg;
2137         struct io_pgtable_ops *pgtbl_ops;
2138         int (*finalise_stage_fn)(struct arm_smmu_domain *,
2139                                  struct arm_smmu_master *,
2140                                  struct io_pgtable_cfg *);
2141         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2142         struct arm_smmu_device *smmu = smmu_domain->smmu;
2143
2144         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2145                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2146                 return 0;
2147         }
2148
2149         /* Restrict the stage to what we can actually support */
2150         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2151                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2152         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2153                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2154
2155         switch (smmu_domain->stage) {
2156         case ARM_SMMU_DOMAIN_S1:
2157                 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2158                 ias = min_t(unsigned long, ias, VA_BITS);
2159                 oas = smmu->ias;
2160                 fmt = ARM_64_LPAE_S1;
2161                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2162                 break;
2163         case ARM_SMMU_DOMAIN_NESTED:
2164         case ARM_SMMU_DOMAIN_S2:
2165                 ias = smmu->ias;
2166                 oas = smmu->oas;
2167                 fmt = ARM_64_LPAE_S2;
2168                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2169                 break;
2170         default:
2171                 return -EINVAL;
2172         }
2173
2174         pgtbl_cfg = (struct io_pgtable_cfg) {
2175                 .pgsize_bitmap  = smmu->pgsize_bitmap,
2176                 .ias            = ias,
2177                 .oas            = oas,
2178                 .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2179                 .tlb            = &arm_smmu_flush_ops,
2180                 .iommu_dev      = smmu->dev,
2181         };
2182
2183         if (!iommu_get_dma_strict(domain))
2184                 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2185
2186         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2187         if (!pgtbl_ops)
2188                 return -ENOMEM;
2189
2190         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2191         domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2192         domain->geometry.force_aperture = true;
2193
2194         ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2195         if (ret < 0) {
2196                 free_io_pgtable_ops(pgtbl_ops);
2197                 return ret;
2198         }
2199
2200         smmu_domain->pgtbl_ops = pgtbl_ops;
2201         return 0;
2202 }
2203
2204 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2205 {
2206         __le64 *step;
2207         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2208
2209         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2210                 struct arm_smmu_strtab_l1_desc *l1_desc;
2211                 int idx;
2212
2213                 /* Two-level walk */
2214                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2215                 l1_desc = &cfg->l1_desc[idx];
2216                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2217                 step = &l1_desc->l2ptr[idx];
2218         } else {
2219                 /* Simple linear lookup */
2220                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2221         }
2222
2223         return step;
2224 }
2225
2226 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2227 {
2228         int i, j;
2229         struct arm_smmu_device *smmu = master->smmu;
2230
2231         for (i = 0; i < master->num_streams; ++i) {
2232                 u32 sid = master->streams[i].id;
2233                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2234
2235                 /* Bridged PCI devices may end up with duplicated IDs */
2236                 for (j = 0; j < i; j++)
2237                         if (master->streams[j].id == sid)
2238                                 break;
2239                 if (j < i)
2240                         continue;
2241
2242                 arm_smmu_write_strtab_ent(master, sid, step);
2243         }
2244 }
2245
2246 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2247 {
2248         struct device *dev = master->dev;
2249         struct arm_smmu_device *smmu = master->smmu;
2250         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2251
2252         if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2253                 return false;
2254
2255         if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2256                 return false;
2257
2258         return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2259 }
2260
2261 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2262 {
2263         size_t stu;
2264         struct pci_dev *pdev;
2265         struct arm_smmu_device *smmu = master->smmu;
2266         struct arm_smmu_domain *smmu_domain = master->domain;
2267
2268         /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2269         if (!master->ats_enabled)
2270                 return;
2271
2272         /* Smallest Translation Unit: log2 of the smallest supported granule */
2273         stu = __ffs(smmu->pgsize_bitmap);
2274         pdev = to_pci_dev(master->dev);
2275
2276         atomic_inc(&smmu_domain->nr_ats_masters);
2277         arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2278         if (pci_enable_ats(pdev, stu))
2279                 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2280 }
2281
2282 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2283 {
2284         struct arm_smmu_domain *smmu_domain = master->domain;
2285
2286         if (!master->ats_enabled)
2287                 return;
2288
2289         pci_disable_ats(to_pci_dev(master->dev));
2290         /*
2291          * Ensure ATS is disabled at the endpoint before we issue the
2292          * ATC invalidation via the SMMU.
2293          */
2294         wmb();
2295         arm_smmu_atc_inv_master(master);
2296         atomic_dec(&smmu_domain->nr_ats_masters);
2297 }
2298
2299 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2300 {
2301         int ret;
2302         int features;
2303         int num_pasids;
2304         struct pci_dev *pdev;
2305
2306         if (!dev_is_pci(master->dev))
2307                 return -ENODEV;
2308
2309         pdev = to_pci_dev(master->dev);
2310
2311         features = pci_pasid_features(pdev);
2312         if (features < 0)
2313                 return features;
2314
2315         num_pasids = pci_max_pasids(pdev);
2316         if (num_pasids <= 0)
2317                 return num_pasids;
2318
2319         ret = pci_enable_pasid(pdev, features);
2320         if (ret) {
2321                 dev_err(&pdev->dev, "Failed to enable PASID\n");
2322                 return ret;
2323         }
2324
2325         master->ssid_bits = min_t(u8, ilog2(num_pasids),
2326                                   master->smmu->ssid_bits);
2327         return 0;
2328 }
2329
2330 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2331 {
2332         struct pci_dev *pdev;
2333
2334         if (!dev_is_pci(master->dev))
2335                 return;
2336
2337         pdev = to_pci_dev(master->dev);
2338
2339         if (!pdev->pasid_enabled)
2340                 return;
2341
2342         master->ssid_bits = 0;
2343         pci_disable_pasid(pdev);
2344 }
2345
2346 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2347 {
2348         unsigned long flags;
2349         struct arm_smmu_domain *smmu_domain = master->domain;
2350
2351         if (!smmu_domain)
2352                 return;
2353
2354         arm_smmu_disable_ats(master);
2355
2356         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2357         list_del(&master->domain_head);
2358         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2359
2360         master->domain = NULL;
2361         master->ats_enabled = false;
2362         arm_smmu_install_ste_for_dev(master);
2363 }
2364
2365 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2366 {
2367         int ret = 0;
2368         unsigned long flags;
2369         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2370         struct arm_smmu_device *smmu;
2371         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2372         struct arm_smmu_master *master;
2373
2374         if (!fwspec)
2375                 return -ENOENT;
2376
2377         master = dev_iommu_priv_get(dev);
2378         smmu = master->smmu;
2379
2380         /*
2381          * Checking that SVA is disabled ensures that this device isn't bound to
2382          * any mm, and can be safely detached from its old domain. Bonds cannot
2383          * be removed concurrently since we're holding the group mutex.
2384          */
2385         if (arm_smmu_master_sva_enabled(master)) {
2386                 dev_err(dev, "cannot attach - SVA enabled\n");
2387                 return -EBUSY;
2388         }
2389
2390         arm_smmu_detach_dev(master);
2391
2392         mutex_lock(&smmu_domain->init_mutex);
2393
2394         if (!smmu_domain->smmu) {
2395                 smmu_domain->smmu = smmu;
2396                 ret = arm_smmu_domain_finalise(domain, master);
2397                 if (ret) {
2398                         smmu_domain->smmu = NULL;
2399                         goto out_unlock;
2400                 }
2401         } else if (smmu_domain->smmu != smmu) {
2402                 dev_err(dev,
2403                         "cannot attach to SMMU %s (upstream of %s)\n",
2404                         dev_name(smmu_domain->smmu->dev),
2405                         dev_name(smmu->dev));
2406                 ret = -ENXIO;
2407                 goto out_unlock;
2408         } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2409                    master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2410                 dev_err(dev,
2411                         "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2412                         smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2413                 ret = -EINVAL;
2414                 goto out_unlock;
2415         } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2416                    smmu_domain->stall_enabled != master->stall_enabled) {
2417                 dev_err(dev, "cannot attach to stall-%s domain\n",
2418                         smmu_domain->stall_enabled ? "enabled" : "disabled");
2419                 ret = -EINVAL;
2420                 goto out_unlock;
2421         }
2422
2423         master->domain = smmu_domain;
2424
2425         if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2426                 master->ats_enabled = arm_smmu_ats_supported(master);
2427
2428         arm_smmu_install_ste_for_dev(master);
2429
2430         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2431         list_add(&master->domain_head, &smmu_domain->devices);
2432         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2433
2434         arm_smmu_enable_ats(master);
2435
2436 out_unlock:
2437         mutex_unlock(&smmu_domain->init_mutex);
2438         return ret;
2439 }
2440
2441 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2442                         phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2443 {
2444         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2445
2446         if (!ops)
2447                 return -ENODEV;
2448
2449         return ops->map(ops, iova, paddr, size, prot, gfp);
2450 }
2451
2452 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2453                              size_t size, struct iommu_iotlb_gather *gather)
2454 {
2455         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2456         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2457
2458         if (!ops)
2459                 return 0;
2460
2461         return ops->unmap(ops, iova, size, gather);
2462 }
2463
2464 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2465 {
2466         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2467
2468         if (smmu_domain->smmu)
2469                 arm_smmu_tlb_inv_context(smmu_domain);
2470 }
2471
2472 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2473                                 struct iommu_iotlb_gather *gather)
2474 {
2475         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2476
2477         if (!gather->pgsize)
2478                 return;
2479
2480         arm_smmu_tlb_inv_range_domain(gather->start,
2481                                       gather->end - gather->start + 1,
2482                                       gather->pgsize, true, smmu_domain);
2483 }
2484
2485 static phys_addr_t
2486 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2487 {
2488         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2489
2490         if (domain->type == IOMMU_DOMAIN_IDENTITY)
2491                 return iova;
2492
2493         if (!ops)
2494                 return 0;
2495
2496         return ops->iova_to_phys(ops, iova);
2497 }
2498
2499 static struct platform_driver arm_smmu_driver;
2500
2501 static
2502 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2503 {
2504         struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2505                                                           fwnode);
2506         put_device(dev);
2507         return dev ? dev_get_drvdata(dev) : NULL;
2508 }
2509
2510 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2511 {
2512         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2513
2514         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2515                 limit *= 1UL << STRTAB_SPLIT;
2516
2517         return sid < limit;
2518 }
2519
2520 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2521                                   struct arm_smmu_master *master)
2522 {
2523         int i;
2524         int ret = 0;
2525         struct arm_smmu_stream *new_stream, *cur_stream;
2526         struct rb_node **new_node, *parent_node = NULL;
2527         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2528
2529         master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2530                                   GFP_KERNEL);
2531         if (!master->streams)
2532                 return -ENOMEM;
2533         master->num_streams = fwspec->num_ids;
2534
2535         mutex_lock(&smmu->streams_mutex);
2536         for (i = 0; i < fwspec->num_ids; i++) {
2537                 u32 sid = fwspec->ids[i];
2538
2539                 new_stream = &master->streams[i];
2540                 new_stream->id = sid;
2541                 new_stream->master = master;
2542
2543                 /*
2544                  * Check the SIDs are in range of the SMMU and our stream table
2545                  */
2546                 if (!arm_smmu_sid_in_range(smmu, sid)) {
2547                         ret = -ERANGE;
2548                         break;
2549                 }
2550
2551                 /* Ensure l2 strtab is initialised */
2552                 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2553                         ret = arm_smmu_init_l2_strtab(smmu, sid);
2554                         if (ret)
2555                                 break;
2556                 }
2557
2558                 /* Insert into SID tree */
2559                 new_node = &(smmu->streams.rb_node);
2560                 while (*new_node) {
2561                         cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2562                                               node);
2563                         parent_node = *new_node;
2564                         if (cur_stream->id > new_stream->id) {
2565                                 new_node = &((*new_node)->rb_left);
2566                         } else if (cur_stream->id < new_stream->id) {
2567                                 new_node = &((*new_node)->rb_right);
2568                         } else {
2569                                 dev_warn(master->dev,
2570                                          "stream %u already in tree\n",
2571                                          cur_stream->id);
2572                                 ret = -EINVAL;
2573                                 break;
2574                         }
2575                 }
2576                 if (ret)
2577                         break;
2578
2579                 rb_link_node(&new_stream->node, parent_node, new_node);
2580                 rb_insert_color(&new_stream->node, &smmu->streams);
2581         }
2582
2583         if (ret) {
2584                 for (i--; i >= 0; i--)
2585                         rb_erase(&master->streams[i].node, &smmu->streams);
2586                 kfree(master->streams);
2587         }
2588         mutex_unlock(&smmu->streams_mutex);
2589
2590         return ret;
2591 }
2592
2593 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2594 {
2595         int i;
2596         struct arm_smmu_device *smmu = master->smmu;
2597         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2598
2599         if (!smmu || !master->streams)
2600                 return;
2601
2602         mutex_lock(&smmu->streams_mutex);
2603         for (i = 0; i < fwspec->num_ids; i++)
2604                 rb_erase(&master->streams[i].node, &smmu->streams);
2605         mutex_unlock(&smmu->streams_mutex);
2606
2607         kfree(master->streams);
2608 }
2609
2610 static struct iommu_ops arm_smmu_ops;
2611
2612 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2613 {
2614         int ret;
2615         struct arm_smmu_device *smmu;
2616         struct arm_smmu_master *master;
2617         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2618
2619         if (!fwspec || fwspec->ops != &arm_smmu_ops)
2620                 return ERR_PTR(-ENODEV);
2621
2622         if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2623                 return ERR_PTR(-EBUSY);
2624
2625         smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2626         if (!smmu)
2627                 return ERR_PTR(-ENODEV);
2628
2629         master = kzalloc(sizeof(*master), GFP_KERNEL);
2630         if (!master)
2631                 return ERR_PTR(-ENOMEM);
2632
2633         master->dev = dev;
2634         master->smmu = smmu;
2635         INIT_LIST_HEAD(&master->bonds);
2636         dev_iommu_priv_set(dev, master);
2637
2638         ret = arm_smmu_insert_master(smmu, master);
2639         if (ret)
2640                 goto err_free_master;
2641
2642         device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2643         master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2644
2645         /*
2646          * Note that PASID must be enabled before, and disabled after ATS:
2647          * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2648          *
2649          *   Behavior is undefined if this bit is Set and the value of the PASID
2650          *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2651          *   are changed.
2652          */
2653         arm_smmu_enable_pasid(master);
2654
2655         if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2656                 master->ssid_bits = min_t(u8, master->ssid_bits,
2657                                           CTXDESC_LINEAR_CDMAX);
2658
2659         if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2660              device_property_read_bool(dev, "dma-can-stall")) ||
2661             smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2662                 master->stall_enabled = true;
2663
2664         return &smmu->iommu;
2665
2666 err_free_master:
2667         kfree(master);
2668         dev_iommu_priv_set(dev, NULL);
2669         return ERR_PTR(ret);
2670 }
2671
2672 static void arm_smmu_release_device(struct device *dev)
2673 {
2674         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2675         struct arm_smmu_master *master;
2676
2677         if (!fwspec || fwspec->ops != &arm_smmu_ops)
2678                 return;
2679
2680         master = dev_iommu_priv_get(dev);
2681         if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2682                 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2683         arm_smmu_detach_dev(master);
2684         arm_smmu_disable_pasid(master);
2685         arm_smmu_remove_master(master);
2686         kfree(master);
2687         iommu_fwspec_free(dev);
2688 }
2689
2690 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2691 {
2692         struct iommu_group *group;
2693
2694         /*
2695          * We don't support devices sharing stream IDs other than PCI RID
2696          * aliases, since the necessary ID-to-device lookup becomes rather
2697          * impractical given a potential sparse 32-bit stream ID space.
2698          */
2699         if (dev_is_pci(dev))
2700                 group = pci_device_group(dev);
2701         else
2702                 group = generic_device_group(dev);
2703
2704         return group;
2705 }
2706
2707 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2708 {
2709         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2710         int ret = 0;
2711
2712         mutex_lock(&smmu_domain->init_mutex);
2713         if (smmu_domain->smmu)
2714                 ret = -EPERM;
2715         else
2716                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2717         mutex_unlock(&smmu_domain->init_mutex);
2718
2719         return ret;
2720 }
2721
2722 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2723 {
2724         return iommu_fwspec_add_ids(dev, args->args, 1);
2725 }
2726
2727 static void arm_smmu_get_resv_regions(struct device *dev,
2728                                       struct list_head *head)
2729 {
2730         struct iommu_resv_region *region;
2731         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2732
2733         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2734                                          prot, IOMMU_RESV_SW_MSI);
2735         if (!region)
2736                 return;
2737
2738         list_add_tail(&region->list, head);
2739
2740         iommu_dma_get_resv_regions(dev, head);
2741 }
2742
2743 static bool arm_smmu_dev_has_feature(struct device *dev,
2744                                      enum iommu_dev_features feat)
2745 {
2746         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2747
2748         if (!master)
2749                 return false;
2750
2751         switch (feat) {
2752         case IOMMU_DEV_FEAT_IOPF:
2753                 return arm_smmu_master_iopf_supported(master);
2754         case IOMMU_DEV_FEAT_SVA:
2755                 return arm_smmu_master_sva_supported(master);
2756         default:
2757                 return false;
2758         }
2759 }
2760
2761 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2762                                          enum iommu_dev_features feat)
2763 {
2764         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2765
2766         if (!master)
2767                 return false;
2768
2769         switch (feat) {
2770         case IOMMU_DEV_FEAT_IOPF:
2771                 return master->iopf_enabled;
2772         case IOMMU_DEV_FEAT_SVA:
2773                 return arm_smmu_master_sva_enabled(master);
2774         default:
2775                 return false;
2776         }
2777 }
2778
2779 static int arm_smmu_dev_enable_feature(struct device *dev,
2780                                        enum iommu_dev_features feat)
2781 {
2782         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2783
2784         if (!arm_smmu_dev_has_feature(dev, feat))
2785                 return -ENODEV;
2786
2787         if (arm_smmu_dev_feature_enabled(dev, feat))
2788                 return -EBUSY;
2789
2790         switch (feat) {
2791         case IOMMU_DEV_FEAT_IOPF:
2792                 master->iopf_enabled = true;
2793                 return 0;
2794         case IOMMU_DEV_FEAT_SVA:
2795                 return arm_smmu_master_enable_sva(master);
2796         default:
2797                 return -EINVAL;
2798         }
2799 }
2800
2801 static int arm_smmu_dev_disable_feature(struct device *dev,
2802                                         enum iommu_dev_features feat)
2803 {
2804         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2805
2806         if (!arm_smmu_dev_feature_enabled(dev, feat))
2807                 return -EINVAL;
2808
2809         switch (feat) {
2810         case IOMMU_DEV_FEAT_IOPF:
2811                 if (master->sva_enabled)
2812                         return -EBUSY;
2813                 master->iopf_enabled = false;
2814                 return 0;
2815         case IOMMU_DEV_FEAT_SVA:
2816                 return arm_smmu_master_disable_sva(master);
2817         default:
2818                 return -EINVAL;
2819         }
2820 }
2821
2822 static struct iommu_ops arm_smmu_ops = {
2823         .capable                = arm_smmu_capable,
2824         .domain_alloc           = arm_smmu_domain_alloc,
2825         .domain_free            = arm_smmu_domain_free,
2826         .attach_dev             = arm_smmu_attach_dev,
2827         .map                    = arm_smmu_map,
2828         .unmap                  = arm_smmu_unmap,
2829         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
2830         .iotlb_sync             = arm_smmu_iotlb_sync,
2831         .iova_to_phys           = arm_smmu_iova_to_phys,
2832         .probe_device           = arm_smmu_probe_device,
2833         .release_device         = arm_smmu_release_device,
2834         .device_group           = arm_smmu_device_group,
2835         .enable_nesting         = arm_smmu_enable_nesting,
2836         .of_xlate               = arm_smmu_of_xlate,
2837         .get_resv_regions       = arm_smmu_get_resv_regions,
2838         .put_resv_regions       = generic_iommu_put_resv_regions,
2839         .dev_has_feat           = arm_smmu_dev_has_feature,
2840         .dev_feat_enabled       = arm_smmu_dev_feature_enabled,
2841         .dev_enable_feat        = arm_smmu_dev_enable_feature,
2842         .dev_disable_feat       = arm_smmu_dev_disable_feature,
2843         .sva_bind               = arm_smmu_sva_bind,
2844         .sva_unbind             = arm_smmu_sva_unbind,
2845         .sva_get_pasid          = arm_smmu_sva_get_pasid,
2846         .page_response          = arm_smmu_page_response,
2847         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
2848         .owner                  = THIS_MODULE,
2849 };
2850
2851 /* Probing and initialisation functions */
2852 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2853                                    struct arm_smmu_queue *q,
2854                                    void __iomem *page,
2855                                    unsigned long prod_off,
2856                                    unsigned long cons_off,
2857                                    size_t dwords, const char *name)
2858 {
2859         size_t qsz;
2860
2861         do {
2862                 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2863                 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2864                                               GFP_KERNEL);
2865                 if (q->base || qsz < PAGE_SIZE)
2866                         break;
2867
2868                 q->llq.max_n_shift--;
2869         } while (1);
2870
2871         if (!q->base) {
2872                 dev_err(smmu->dev,
2873                         "failed to allocate queue (0x%zx bytes) for %s\n",
2874                         qsz, name);
2875                 return -ENOMEM;
2876         }
2877
2878         if (!WARN_ON(q->base_dma & (qsz - 1))) {
2879                 dev_info(smmu->dev, "allocated %u entries for %s\n",
2880                          1 << q->llq.max_n_shift, name);
2881         }
2882
2883         q->prod_reg     = page + prod_off;
2884         q->cons_reg     = page + cons_off;
2885         q->ent_dwords   = dwords;
2886
2887         q->q_base  = Q_BASE_RWA;
2888         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2889         q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2890
2891         q->llq.prod = q->llq.cons = 0;
2892         return 0;
2893 }
2894
2895 static void arm_smmu_cmdq_free_bitmap(void *data)
2896 {
2897         unsigned long *bitmap = data;
2898         bitmap_free(bitmap);
2899 }
2900
2901 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2902 {
2903         int ret = 0;
2904         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2905         unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2906         atomic_long_t *bitmap;
2907
2908         atomic_set(&cmdq->owner_prod, 0);
2909         atomic_set(&cmdq->lock, 0);
2910
2911         bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2912         if (!bitmap) {
2913                 dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2914                 ret = -ENOMEM;
2915         } else {
2916                 cmdq->valid_map = bitmap;
2917                 devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2918         }
2919
2920         return ret;
2921 }
2922
2923 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2924 {
2925         int ret;
2926
2927         /* cmdq */
2928         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2929                                       ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2930                                       CMDQ_ENT_DWORDS, "cmdq");
2931         if (ret)
2932                 return ret;
2933
2934         ret = arm_smmu_cmdq_init(smmu);
2935         if (ret)
2936                 return ret;
2937
2938         /* evtq */
2939         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2940                                       ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2941                                       EVTQ_ENT_DWORDS, "evtq");
2942         if (ret)
2943                 return ret;
2944
2945         if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2946             (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2947                 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2948                 if (!smmu->evtq.iopf)
2949                         return -ENOMEM;
2950         }
2951
2952         /* priq */
2953         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2954                 return 0;
2955
2956         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2957                                        ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2958                                        PRIQ_ENT_DWORDS, "priq");
2959 }
2960
2961 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2962 {
2963         unsigned int i;
2964         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2965         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2966         void *strtab = smmu->strtab_cfg.strtab;
2967
2968         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2969         if (!cfg->l1_desc)
2970                 return -ENOMEM;
2971
2972         for (i = 0; i < cfg->num_l1_ents; ++i) {
2973                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2974                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2975         }
2976
2977         return 0;
2978 }
2979
2980 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2981 {
2982         void *strtab;
2983         u64 reg;
2984         u32 size, l1size;
2985         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2986
2987         /* Calculate the L1 size, capped to the SIDSIZE. */
2988         size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2989         size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2990         cfg->num_l1_ents = 1 << size;
2991
2992         size += STRTAB_SPLIT;
2993         if (size < smmu->sid_bits)
2994                 dev_warn(smmu->dev,
2995                          "2-level strtab only covers %u/%u bits of SID\n",
2996                          size, smmu->sid_bits);
2997
2998         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2999         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3000                                      GFP_KERNEL);
3001         if (!strtab) {
3002                 dev_err(smmu->dev,
3003                         "failed to allocate l1 stream table (%u bytes)\n",
3004                         l1size);
3005                 return -ENOMEM;
3006         }
3007         cfg->strtab = strtab;
3008
3009         /* Configure strtab_base_cfg for 2 levels */
3010         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3011         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3012         reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3013         cfg->strtab_base_cfg = reg;
3014
3015         return arm_smmu_init_l1_strtab(smmu);
3016 }
3017
3018 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3019 {
3020         void *strtab;
3021         u64 reg;
3022         u32 size;
3023         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3024
3025         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3026         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3027                                      GFP_KERNEL);
3028         if (!strtab) {
3029                 dev_err(smmu->dev,
3030                         "failed to allocate linear stream table (%u bytes)\n",
3031                         size);
3032                 return -ENOMEM;
3033         }
3034         cfg->strtab = strtab;
3035         cfg->num_l1_ents = 1 << smmu->sid_bits;
3036
3037         /* Configure strtab_base_cfg for a linear table covering all SIDs */
3038         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3039         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3040         cfg->strtab_base_cfg = reg;
3041
3042         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3043         return 0;
3044 }
3045
3046 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3047 {
3048         u64 reg;
3049         int ret;
3050
3051         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3052                 ret = arm_smmu_init_strtab_2lvl(smmu);
3053         else
3054                 ret = arm_smmu_init_strtab_linear(smmu);
3055
3056         if (ret)
3057                 return ret;
3058
3059         /* Set the strtab base address */
3060         reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3061         reg |= STRTAB_BASE_RA;
3062         smmu->strtab_cfg.strtab_base = reg;
3063
3064         /* Allocate the first VMID for stage-2 bypass STEs */
3065         set_bit(0, smmu->vmid_map);
3066         return 0;
3067 }
3068
3069 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3070 {
3071         int ret;
3072
3073         mutex_init(&smmu->streams_mutex);
3074         smmu->streams = RB_ROOT;
3075
3076         ret = arm_smmu_init_queues(smmu);
3077         if (ret)
3078                 return ret;
3079
3080         return arm_smmu_init_strtab(smmu);
3081 }
3082
3083 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3084                                    unsigned int reg_off, unsigned int ack_off)
3085 {
3086         u32 reg;
3087
3088         writel_relaxed(val, smmu->base + reg_off);
3089         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3090                                           1, ARM_SMMU_POLL_TIMEOUT_US);
3091 }
3092
3093 /* GBPA is "special" */
3094 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3095 {
3096         int ret;
3097         u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3098
3099         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3100                                          1, ARM_SMMU_POLL_TIMEOUT_US);
3101         if (ret)
3102                 return ret;
3103
3104         reg &= ~clr;
3105         reg |= set;
3106         writel_relaxed(reg | GBPA_UPDATE, gbpa);
3107         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3108                                          1, ARM_SMMU_POLL_TIMEOUT_US);
3109
3110         if (ret)
3111                 dev_err(smmu->dev, "GBPA not responding to update\n");
3112         return ret;
3113 }
3114
3115 static void arm_smmu_free_msis(void *data)
3116 {
3117         struct device *dev = data;
3118         platform_msi_domain_free_irqs(dev);
3119 }
3120
3121 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3122 {
3123         phys_addr_t doorbell;
3124         struct device *dev = msi_desc_to_dev(desc);
3125         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3126         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3127
3128         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3129         doorbell &= MSI_CFG0_ADDR_MASK;
3130
3131         writeq_relaxed(doorbell, smmu->base + cfg[0]);
3132         writel_relaxed(msg->data, smmu->base + cfg[1]);
3133         writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3134 }
3135
3136 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3137 {
3138         struct msi_desc *desc;
3139         int ret, nvec = ARM_SMMU_MAX_MSIS;
3140         struct device *dev = smmu->dev;
3141
3142         /* Clear the MSI address regs */
3143         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3144         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3145
3146         if (smmu->features & ARM_SMMU_FEAT_PRI)
3147                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3148         else
3149                 nvec--;
3150
3151         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3152                 return;
3153
3154         if (!dev->msi_domain) {
3155                 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3156                 return;
3157         }
3158
3159         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3160         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3161         if (ret) {
3162                 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3163                 return;
3164         }
3165
3166         for_each_msi_entry(desc, dev) {
3167                 switch (desc->platform.msi_index) {
3168                 case EVTQ_MSI_INDEX:
3169                         smmu->evtq.q.irq = desc->irq;
3170                         break;
3171                 case GERROR_MSI_INDEX:
3172                         smmu->gerr_irq = desc->irq;
3173                         break;
3174                 case PRIQ_MSI_INDEX:
3175                         smmu->priq.q.irq = desc->irq;
3176                         break;
3177                 default:        /* Unknown */
3178                         continue;
3179                 }
3180         }
3181
3182         /* Add callback to free MSIs on teardown */
3183         devm_add_action(dev, arm_smmu_free_msis, dev);
3184 }
3185
3186 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3187 {
3188         int irq, ret;
3189
3190         arm_smmu_setup_msis(smmu);
3191
3192         /* Request interrupt lines */
3193         irq = smmu->evtq.q.irq;
3194         if (irq) {
3195                 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3196                                                 arm_smmu_evtq_thread,
3197                                                 IRQF_ONESHOT,
3198                                                 "arm-smmu-v3-evtq", smmu);
3199                 if (ret < 0)
3200                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
3201         } else {
3202                 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3203         }
3204
3205         irq = smmu->gerr_irq;
3206         if (irq) {
3207                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3208                                        0, "arm-smmu-v3-gerror", smmu);
3209                 if (ret < 0)
3210                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
3211         } else {
3212                 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3213         }
3214
3215         if (smmu->features & ARM_SMMU_FEAT_PRI) {
3216                 irq = smmu->priq.q.irq;
3217                 if (irq) {
3218                         ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3219                                                         arm_smmu_priq_thread,
3220                                                         IRQF_ONESHOT,
3221                                                         "arm-smmu-v3-priq",
3222                                                         smmu);
3223                         if (ret < 0)
3224                                 dev_warn(smmu->dev,
3225                                          "failed to enable priq irq\n");
3226                 } else {
3227                         dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3228                 }
3229         }
3230 }
3231
3232 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3233 {
3234         int ret, irq;
3235         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3236
3237         /* Disable IRQs first */
3238         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3239                                       ARM_SMMU_IRQ_CTRLACK);
3240         if (ret) {
3241                 dev_err(smmu->dev, "failed to disable irqs\n");
3242                 return ret;
3243         }
3244
3245         irq = smmu->combined_irq;
3246         if (irq) {
3247                 /*
3248                  * Cavium ThunderX2 implementation doesn't support unique irq
3249                  * lines. Use a single irq line for all the SMMUv3 interrupts.
3250                  */
3251                 ret = devm_request_threaded_irq(smmu->dev, irq,
3252                                         arm_smmu_combined_irq_handler,
3253                                         arm_smmu_combined_irq_thread,
3254                                         IRQF_ONESHOT,
3255                                         "arm-smmu-v3-combined-irq", smmu);
3256                 if (ret < 0)
3257                         dev_warn(smmu->dev, "failed to enable combined irq\n");
3258         } else
3259                 arm_smmu_setup_unique_irqs(smmu);
3260
3261         if (smmu->features & ARM_SMMU_FEAT_PRI)
3262                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3263
3264         /* Enable interrupt generation on the SMMU */
3265         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3266                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3267         if (ret)
3268                 dev_warn(smmu->dev, "failed to enable irqs\n");
3269
3270         return 0;
3271 }
3272
3273 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3274 {
3275         int ret;
3276
3277         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3278         if (ret)
3279                 dev_err(smmu->dev, "failed to clear cr0\n");
3280
3281         return ret;
3282 }
3283
3284 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3285 {
3286         int ret;
3287         u32 reg, enables;
3288         struct arm_smmu_cmdq_ent cmd;
3289
3290         /* Clear CR0 and sync (disables SMMU and queue processing) */
3291         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3292         if (reg & CR0_SMMUEN) {
3293                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3294                 WARN_ON(is_kdump_kernel() && !disable_bypass);
3295                 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3296         }
3297
3298         ret = arm_smmu_device_disable(smmu);
3299         if (ret)
3300                 return ret;
3301
3302         /* CR1 (table and queue memory attributes) */
3303         reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3304               FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3305               FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3306               FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3307               FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3308               FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3309         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3310
3311         /* CR2 (random crap) */
3312         reg = CR2_PTM | CR2_RECINVSID;
3313
3314         if (smmu->features & ARM_SMMU_FEAT_E2H)
3315                 reg |= CR2_E2H;
3316
3317         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3318
3319         /* Stream table */
3320         writeq_relaxed(smmu->strtab_cfg.strtab_base,
3321                        smmu->base + ARM_SMMU_STRTAB_BASE);
3322         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3323                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3324
3325         /* Command queue */
3326         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3327         writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3328         writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3329
3330         enables = CR0_CMDQEN;
3331         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3332                                       ARM_SMMU_CR0ACK);
3333         if (ret) {
3334                 dev_err(smmu->dev, "failed to enable command queue\n");
3335                 return ret;
3336         }
3337
3338         /* Invalidate any cached configuration */
3339         cmd.opcode = CMDQ_OP_CFGI_ALL;
3340         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3341         arm_smmu_cmdq_issue_sync(smmu);
3342
3343         /* Invalidate any stale TLB entries */
3344         if (smmu->features & ARM_SMMU_FEAT_HYP) {
3345                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3346                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3347         }
3348
3349         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3350         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3351         arm_smmu_cmdq_issue_sync(smmu);
3352
3353         /* Event queue */
3354         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3355         writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3356         writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3357
3358         enables |= CR0_EVTQEN;
3359         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3360                                       ARM_SMMU_CR0ACK);
3361         if (ret) {
3362                 dev_err(smmu->dev, "failed to enable event queue\n");
3363                 return ret;
3364         }
3365
3366         /* PRI queue */
3367         if (smmu->features & ARM_SMMU_FEAT_PRI) {
3368                 writeq_relaxed(smmu->priq.q.q_base,
3369                                smmu->base + ARM_SMMU_PRIQ_BASE);
3370                 writel_relaxed(smmu->priq.q.llq.prod,
3371                                smmu->page1 + ARM_SMMU_PRIQ_PROD);
3372                 writel_relaxed(smmu->priq.q.llq.cons,
3373                                smmu->page1 + ARM_SMMU_PRIQ_CONS);
3374
3375                 enables |= CR0_PRIQEN;
3376                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3377                                               ARM_SMMU_CR0ACK);
3378                 if (ret) {
3379                         dev_err(smmu->dev, "failed to enable PRI queue\n");
3380                         return ret;
3381                 }
3382         }
3383
3384         if (smmu->features & ARM_SMMU_FEAT_ATS) {
3385                 enables |= CR0_ATSCHK;
3386                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3387                                               ARM_SMMU_CR0ACK);
3388                 if (ret) {
3389                         dev_err(smmu->dev, "failed to enable ATS check\n");
3390                         return ret;
3391                 }
3392         }
3393
3394         ret = arm_smmu_setup_irqs(smmu);
3395         if (ret) {
3396                 dev_err(smmu->dev, "failed to setup irqs\n");
3397                 return ret;
3398         }
3399
3400         if (is_kdump_kernel())
3401                 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3402
3403         /* Enable the SMMU interface, or ensure bypass */
3404         if (!bypass || disable_bypass) {
3405                 enables |= CR0_SMMUEN;
3406         } else {
3407                 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3408                 if (ret)
3409                         return ret;
3410         }
3411         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3412                                       ARM_SMMU_CR0ACK);
3413         if (ret) {
3414                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3415                 return ret;
3416         }
3417
3418         return 0;
3419 }
3420
3421 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3422 {
3423         u32 reg;
3424         bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3425
3426         /* IDR0 */
3427         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3428
3429         /* 2-level structures */
3430         if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3431                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3432
3433         if (reg & IDR0_CD2L)
3434                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3435
3436         /*
3437          * Translation table endianness.
3438          * We currently require the same endianness as the CPU, but this
3439          * could be changed later by adding a new IO_PGTABLE_QUIRK.
3440          */
3441         switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3442         case IDR0_TTENDIAN_MIXED:
3443                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3444                 break;
3445 #ifdef __BIG_ENDIAN
3446         case IDR0_TTENDIAN_BE:
3447                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3448                 break;
3449 #else
3450         case IDR0_TTENDIAN_LE:
3451                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3452                 break;
3453 #endif
3454         default:
3455                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3456                 return -ENXIO;
3457         }
3458
3459         /* Boolean feature flags */
3460         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3461                 smmu->features |= ARM_SMMU_FEAT_PRI;
3462
3463         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3464                 smmu->features |= ARM_SMMU_FEAT_ATS;
3465
3466         if (reg & IDR0_SEV)
3467                 smmu->features |= ARM_SMMU_FEAT_SEV;
3468
3469         if (reg & IDR0_MSI) {
3470                 smmu->features |= ARM_SMMU_FEAT_MSI;
3471                 if (coherent && !disable_msipolling)
3472                         smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3473         }
3474
3475         if (reg & IDR0_HYP) {
3476                 smmu->features |= ARM_SMMU_FEAT_HYP;
3477                 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3478                         smmu->features |= ARM_SMMU_FEAT_E2H;
3479         }
3480
3481         /*
3482          * The coherency feature as set by FW is used in preference to the ID
3483          * register, but warn on mismatch.
3484          */
3485         if (!!(reg & IDR0_COHACC) != coherent)
3486                 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3487                          coherent ? "true" : "false");
3488
3489         switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3490         case IDR0_STALL_MODEL_FORCE:
3491                 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3492                 fallthrough;
3493         case IDR0_STALL_MODEL_STALL:
3494                 smmu->features |= ARM_SMMU_FEAT_STALLS;
3495         }
3496
3497         if (reg & IDR0_S1P)
3498                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3499
3500         if (reg & IDR0_S2P)
3501                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3502
3503         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3504                 dev_err(smmu->dev, "no translation support!\n");
3505                 return -ENXIO;
3506         }
3507
3508         /* We only support the AArch64 table format at present */
3509         switch (FIELD_GET(IDR0_TTF, reg)) {
3510         case IDR0_TTF_AARCH32_64:
3511                 smmu->ias = 40;
3512                 fallthrough;
3513         case IDR0_TTF_AARCH64:
3514                 break;
3515         default:
3516                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3517                 return -ENXIO;
3518         }
3519
3520         /* ASID/VMID sizes */
3521         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3522         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3523
3524         /* IDR1 */
3525         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3526         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3527                 dev_err(smmu->dev, "embedded implementation not supported\n");
3528                 return -ENXIO;
3529         }
3530
3531         /* Queue sizes, capped to ensure natural alignment */
3532         smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3533                                              FIELD_GET(IDR1_CMDQS, reg));
3534         if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3535                 /*
3536                  * We don't support splitting up batches, so one batch of
3537                  * commands plus an extra sync needs to fit inside the command
3538                  * queue. There's also no way we can handle the weird alignment
3539                  * restrictions on the base pointer for a unit-length queue.
3540                  */
3541                 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3542                         CMDQ_BATCH_ENTRIES);
3543                 return -ENXIO;
3544         }
3545
3546         smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3547                                              FIELD_GET(IDR1_EVTQS, reg));
3548         smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3549                                              FIELD_GET(IDR1_PRIQS, reg));
3550
3551         /* SID/SSID sizes */
3552         smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3553         smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3554
3555         /*
3556          * If the SMMU supports fewer bits than would fill a single L2 stream
3557          * table, use a linear table instead.
3558          */
3559         if (smmu->sid_bits <= STRTAB_SPLIT)
3560                 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3561
3562         /* IDR3 */
3563         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3564         if (FIELD_GET(IDR3_RIL, reg))
3565                 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3566
3567         /* IDR5 */
3568         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3569
3570         /* Maximum number of outstanding stalls */
3571         smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3572
3573         /* Page sizes */
3574         if (reg & IDR5_GRAN64K)
3575                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3576         if (reg & IDR5_GRAN16K)
3577                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3578         if (reg & IDR5_GRAN4K)
3579                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3580
3581         /* Input address size */
3582         if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3583                 smmu->features |= ARM_SMMU_FEAT_VAX;
3584
3585         /* Output address size */
3586         switch (FIELD_GET(IDR5_OAS, reg)) {
3587         case IDR5_OAS_32_BIT:
3588                 smmu->oas = 32;
3589                 break;
3590         case IDR5_OAS_36_BIT:
3591                 smmu->oas = 36;
3592                 break;
3593         case IDR5_OAS_40_BIT:
3594                 smmu->oas = 40;
3595                 break;
3596         case IDR5_OAS_42_BIT:
3597                 smmu->oas = 42;
3598                 break;
3599         case IDR5_OAS_44_BIT:
3600                 smmu->oas = 44;
3601                 break;
3602         case IDR5_OAS_52_BIT:
3603                 smmu->oas = 52;
3604                 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3605                 break;
3606         default:
3607                 dev_info(smmu->dev,
3608                         "unknown output address size. Truncating to 48-bit\n");
3609                 fallthrough;
3610         case IDR5_OAS_48_BIT:
3611                 smmu->oas = 48;
3612         }
3613
3614         if (arm_smmu_ops.pgsize_bitmap == -1UL)
3615                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3616         else
3617                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3618
3619         /* Set the DMA mask for our table walker */
3620         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3621                 dev_warn(smmu->dev,
3622                          "failed to set DMA mask for table walker\n");
3623
3624         smmu->ias = max(smmu->ias, smmu->oas);
3625
3626         if (arm_smmu_sva_supported(smmu))
3627                 smmu->features |= ARM_SMMU_FEAT_SVA;
3628
3629         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3630                  smmu->ias, smmu->oas, smmu->features);
3631         return 0;
3632 }
3633
3634 #ifdef CONFIG_ACPI
3635 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3636 {
3637         switch (model) {
3638         case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3639                 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3640                 break;
3641         case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3642                 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3643                 break;
3644         }
3645
3646         dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3647 }
3648
3649 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3650                                       struct arm_smmu_device *smmu)
3651 {
3652         struct acpi_iort_smmu_v3 *iort_smmu;
3653         struct device *dev = smmu->dev;
3654         struct acpi_iort_node *node;
3655
3656         node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3657
3658         /* Retrieve SMMUv3 specific data */
3659         iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3660
3661         acpi_smmu_get_options(iort_smmu->model, smmu);
3662
3663         if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3664                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3665
3666         return 0;
3667 }
3668 #else
3669 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3670                                              struct arm_smmu_device *smmu)
3671 {
3672         return -ENODEV;
3673 }
3674 #endif
3675
3676 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3677                                     struct arm_smmu_device *smmu)
3678 {
3679         struct device *dev = &pdev->dev;
3680         u32 cells;
3681         int ret = -EINVAL;
3682
3683         if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3684                 dev_err(dev, "missing #iommu-cells property\n");
3685         else if (cells != 1)
3686                 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3687         else
3688                 ret = 0;
3689
3690         parse_driver_options(smmu);
3691
3692         if (of_dma_is_coherent(dev->of_node))
3693                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3694
3695         return ret;
3696 }
3697
3698 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3699 {
3700         if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3701                 return SZ_64K;
3702         else
3703                 return SZ_128K;
3704 }
3705
3706 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3707 {
3708         int err;
3709
3710 #ifdef CONFIG_PCI
3711         if (pci_bus_type.iommu_ops != ops) {
3712                 err = bus_set_iommu(&pci_bus_type, ops);
3713                 if (err)
3714                         return err;
3715         }
3716 #endif
3717 #ifdef CONFIG_ARM_AMBA
3718         if (amba_bustype.iommu_ops != ops) {
3719                 err = bus_set_iommu(&amba_bustype, ops);
3720                 if (err)
3721                         goto err_reset_pci_ops;
3722         }
3723 #endif
3724         if (platform_bus_type.iommu_ops != ops) {
3725                 err = bus_set_iommu(&platform_bus_type, ops);
3726                 if (err)
3727                         goto err_reset_amba_ops;
3728         }
3729
3730         return 0;
3731
3732 err_reset_amba_ops:
3733 #ifdef CONFIG_ARM_AMBA
3734         bus_set_iommu(&amba_bustype, NULL);
3735 #endif
3736 err_reset_pci_ops: __maybe_unused;
3737 #ifdef CONFIG_PCI
3738         bus_set_iommu(&pci_bus_type, NULL);
3739 #endif
3740         return err;
3741 }
3742
3743 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3744                                       resource_size_t size)
3745 {
3746         struct resource res = DEFINE_RES_MEM(start, size);
3747
3748         return devm_ioremap_resource(dev, &res);
3749 }
3750
3751 static int arm_smmu_device_probe(struct platform_device *pdev)
3752 {
3753         int irq, ret;
3754         struct resource *res;
3755         resource_size_t ioaddr;
3756         struct arm_smmu_device *smmu;
3757         struct device *dev = &pdev->dev;
3758         bool bypass;
3759
3760         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3761         if (!smmu)
3762                 return -ENOMEM;
3763         smmu->dev = dev;
3764
3765         if (dev->of_node) {
3766                 ret = arm_smmu_device_dt_probe(pdev, smmu);
3767         } else {
3768                 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3769                 if (ret == -ENODEV)
3770                         return ret;
3771         }
3772
3773         /* Set bypass mode according to firmware probing result */
3774         bypass = !!ret;
3775
3776         /* Base address */
3777         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3778         if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3779                 dev_err(dev, "MMIO region too small (%pr)\n", res);
3780                 return -EINVAL;
3781         }
3782         ioaddr = res->start;
3783
3784         /*
3785          * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3786          * the PMCG registers which are reserved by the PMU driver.
3787          */
3788         smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3789         if (IS_ERR(smmu->base))
3790                 return PTR_ERR(smmu->base);
3791
3792         if (arm_smmu_resource_size(smmu) > SZ_64K) {
3793                 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3794                                                ARM_SMMU_REG_SZ);
3795                 if (IS_ERR(smmu->page1))
3796                         return PTR_ERR(smmu->page1);
3797         } else {
3798                 smmu->page1 = smmu->base;
3799         }
3800
3801         /* Interrupt lines */
3802
3803         irq = platform_get_irq_byname_optional(pdev, "combined");
3804         if (irq > 0)
3805                 smmu->combined_irq = irq;
3806         else {
3807                 irq = platform_get_irq_byname_optional(pdev, "eventq");
3808                 if (irq > 0)
3809                         smmu->evtq.q.irq = irq;
3810
3811                 irq = platform_get_irq_byname_optional(pdev, "priq");
3812                 if (irq > 0)
3813                         smmu->priq.q.irq = irq;
3814
3815                 irq = platform_get_irq_byname_optional(pdev, "gerror");
3816                 if (irq > 0)
3817                         smmu->gerr_irq = irq;
3818         }
3819         /* Probe the h/w */
3820         ret = arm_smmu_device_hw_probe(smmu);
3821         if (ret)
3822                 return ret;
3823
3824         /* Initialise in-memory data structures */
3825         ret = arm_smmu_init_structures(smmu);
3826         if (ret)
3827                 return ret;
3828
3829         /* Record our private device structure */
3830         platform_set_drvdata(pdev, smmu);
3831
3832         /* Reset the device */
3833         ret = arm_smmu_device_reset(smmu, bypass);
3834         if (ret)
3835                 return ret;
3836
3837         /* And we're up. Go go go! */
3838         ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3839                                      "smmu3.%pa", &ioaddr);
3840         if (ret)
3841                 return ret;
3842
3843         ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3844         if (ret) {
3845                 dev_err(dev, "Failed to register iommu\n");
3846                 goto err_sysfs_remove;
3847         }
3848
3849         ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3850         if (ret)
3851                 goto err_unregister_device;
3852
3853         return 0;
3854
3855 err_unregister_device:
3856         iommu_device_unregister(&smmu->iommu);
3857 err_sysfs_remove:
3858         iommu_device_sysfs_remove(&smmu->iommu);
3859         return ret;
3860 }
3861
3862 static int arm_smmu_device_remove(struct platform_device *pdev)
3863 {
3864         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3865
3866         arm_smmu_set_bus_ops(NULL);
3867         iommu_device_unregister(&smmu->iommu);
3868         iommu_device_sysfs_remove(&smmu->iommu);
3869         arm_smmu_device_disable(smmu);
3870         iopf_queue_free(smmu->evtq.iopf);
3871
3872         return 0;
3873 }
3874
3875 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3876 {
3877         arm_smmu_device_remove(pdev);
3878 }
3879
3880 static const struct of_device_id arm_smmu_of_match[] = {
3881         { .compatible = "arm,smmu-v3", },
3882         { },
3883 };
3884 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3885
3886 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3887 {
3888         arm_smmu_sva_notifier_synchronize();
3889         platform_driver_unregister(drv);
3890 }
3891
3892 static struct platform_driver arm_smmu_driver = {
3893         .driver = {
3894                 .name                   = "arm-smmu-v3",
3895                 .of_match_table         = arm_smmu_of_match,
3896                 .suppress_bind_attrs    = true,
3897         },
3898         .probe  = arm_smmu_device_probe,
3899         .remove = arm_smmu_device_remove,
3900         .shutdown = arm_smmu_device_shutdown,
3901 };
3902 module_driver(arm_smmu_driver, platform_driver_register,
3903               arm_smmu_driver_unregister);
3904
3905 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3906 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3907 MODULE_ALIAS("platform:arm-smmu-v3");
3908 MODULE_LICENSE("GPL v2");