iommu/arm-smmu-v3: Remove unnecessary oom message
[linux-2.6-microblaze.git] / drivers / iommu / arm / arm-smmu-v3 / arm-smmu-v3.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_iommu.h>
27 #include <linux/of_platform.h>
28 #include <linux/pci.h>
29 #include <linux/pci-ats.h>
30 #include <linux/platform_device.h>
31
32 #include <linux/amba/bus.h>
33
34 #include "arm-smmu-v3.h"
35 #include "../../iommu-sva-lib.h"
36
37 static bool disable_bypass = true;
38 module_param(disable_bypass, bool, 0444);
39 MODULE_PARM_DESC(disable_bypass,
40         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
41
42 static bool disable_msipolling;
43 module_param(disable_msipolling, bool, 0444);
44 MODULE_PARM_DESC(disable_msipolling,
45         "Disable MSI-based polling for CMD_SYNC completion.");
46
47 enum arm_smmu_msi_index {
48         EVTQ_MSI_INDEX,
49         GERROR_MSI_INDEX,
50         PRIQ_MSI_INDEX,
51         ARM_SMMU_MAX_MSIS,
52 };
53
54 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
55         [EVTQ_MSI_INDEX] = {
56                 ARM_SMMU_EVTQ_IRQ_CFG0,
57                 ARM_SMMU_EVTQ_IRQ_CFG1,
58                 ARM_SMMU_EVTQ_IRQ_CFG2,
59         },
60         [GERROR_MSI_INDEX] = {
61                 ARM_SMMU_GERROR_IRQ_CFG0,
62                 ARM_SMMU_GERROR_IRQ_CFG1,
63                 ARM_SMMU_GERROR_IRQ_CFG2,
64         },
65         [PRIQ_MSI_INDEX] = {
66                 ARM_SMMU_PRIQ_IRQ_CFG0,
67                 ARM_SMMU_PRIQ_IRQ_CFG1,
68                 ARM_SMMU_PRIQ_IRQ_CFG2,
69         },
70 };
71
72 struct arm_smmu_option_prop {
73         u32 opt;
74         const char *prop;
75 };
76
77 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
78 DEFINE_MUTEX(arm_smmu_asid_lock);
79
80 /*
81  * Special value used by SVA when a process dies, to quiesce a CD without
82  * disabling it.
83  */
84 struct arm_smmu_ctx_desc quiet_cd = { 0 };
85
86 static struct arm_smmu_option_prop arm_smmu_options[] = {
87         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
88         { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
89         { 0, NULL},
90 };
91
92 static void parse_driver_options(struct arm_smmu_device *smmu)
93 {
94         int i = 0;
95
96         do {
97                 if (of_property_read_bool(smmu->dev->of_node,
98                                                 arm_smmu_options[i].prop)) {
99                         smmu->options |= arm_smmu_options[i].opt;
100                         dev_notice(smmu->dev, "option %s\n",
101                                 arm_smmu_options[i].prop);
102                 }
103         } while (arm_smmu_options[++i].opt);
104 }
105
106 /* Low-level queue manipulation functions */
107 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
108 {
109         u32 space, prod, cons;
110
111         prod = Q_IDX(q, q->prod);
112         cons = Q_IDX(q, q->cons);
113
114         if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
115                 space = (1 << q->max_n_shift) - (prod - cons);
116         else
117                 space = cons - prod;
118
119         return space >= n;
120 }
121
122 static bool queue_full(struct arm_smmu_ll_queue *q)
123 {
124         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
125                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
126 }
127
128 static bool queue_empty(struct arm_smmu_ll_queue *q)
129 {
130         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
131                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
132 }
133
134 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
135 {
136         return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
137                 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
138                ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
139                 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
140 }
141
142 static void queue_sync_cons_out(struct arm_smmu_queue *q)
143 {
144         /*
145          * Ensure that all CPU accesses (reads and writes) to the queue
146          * are complete before we update the cons pointer.
147          */
148         __iomb();
149         writel_relaxed(q->llq.cons, q->cons_reg);
150 }
151
152 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
153 {
154         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
155         q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
156 }
157
158 static int queue_sync_prod_in(struct arm_smmu_queue *q)
159 {
160         u32 prod;
161         int ret = 0;
162
163         /*
164          * We can't use the _relaxed() variant here, as we must prevent
165          * speculative reads of the queue before we have determined that
166          * prod has indeed moved.
167          */
168         prod = readl(q->prod_reg);
169
170         if (Q_OVF(prod) != Q_OVF(q->llq.prod))
171                 ret = -EOVERFLOW;
172
173         q->llq.prod = prod;
174         return ret;
175 }
176
177 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
178 {
179         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
180         return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
181 }
182
183 static void queue_poll_init(struct arm_smmu_device *smmu,
184                             struct arm_smmu_queue_poll *qp)
185 {
186         qp->delay = 1;
187         qp->spin_cnt = 0;
188         qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
189         qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
190 }
191
192 static int queue_poll(struct arm_smmu_queue_poll *qp)
193 {
194         if (ktime_compare(ktime_get(), qp->timeout) > 0)
195                 return -ETIMEDOUT;
196
197         if (qp->wfe) {
198                 wfe();
199         } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
200                 cpu_relax();
201         } else {
202                 udelay(qp->delay);
203                 qp->delay *= 2;
204                 qp->spin_cnt = 0;
205         }
206
207         return 0;
208 }
209
210 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
211 {
212         int i;
213
214         for (i = 0; i < n_dwords; ++i)
215                 *dst++ = cpu_to_le64(*src++);
216 }
217
218 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
219 {
220         int i;
221
222         for (i = 0; i < n_dwords; ++i)
223                 *dst++ = le64_to_cpu(*src++);
224 }
225
226 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
227 {
228         if (queue_empty(&q->llq))
229                 return -EAGAIN;
230
231         queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
232         queue_inc_cons(&q->llq);
233         queue_sync_cons_out(q);
234         return 0;
235 }
236
237 /* High-level queue accessors */
238 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
239 {
240         memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
241         cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
242
243         switch (ent->opcode) {
244         case CMDQ_OP_TLBI_EL2_ALL:
245         case CMDQ_OP_TLBI_NSNH_ALL:
246                 break;
247         case CMDQ_OP_PREFETCH_CFG:
248                 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
249                 break;
250         case CMDQ_OP_CFGI_CD:
251                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
252                 fallthrough;
253         case CMDQ_OP_CFGI_STE:
254                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
255                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
256                 break;
257         case CMDQ_OP_CFGI_CD_ALL:
258                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
259                 break;
260         case CMDQ_OP_CFGI_ALL:
261                 /* Cover the entire SID range */
262                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
263                 break;
264         case CMDQ_OP_TLBI_NH_VA:
265                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
266                 fallthrough;
267         case CMDQ_OP_TLBI_EL2_VA:
268                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
269                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
270                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
271                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
272                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
273                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
274                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
275                 break;
276         case CMDQ_OP_TLBI_S2_IPA:
277                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
278                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
279                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
280                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
281                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
282                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
283                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
284                 break;
285         case CMDQ_OP_TLBI_NH_ASID:
286                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
287                 fallthrough;
288         case CMDQ_OP_TLBI_S12_VMALL:
289                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
290                 break;
291         case CMDQ_OP_TLBI_EL2_ASID:
292                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
293                 break;
294         case CMDQ_OP_ATC_INV:
295                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
296                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
297                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
298                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
299                 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
300                 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
301                 break;
302         case CMDQ_OP_PRI_RESP:
303                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
304                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
305                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
306                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
307                 switch (ent->pri.resp) {
308                 case PRI_RESP_DENY:
309                 case PRI_RESP_FAIL:
310                 case PRI_RESP_SUCC:
311                         break;
312                 default:
313                         return -EINVAL;
314                 }
315                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
316                 break;
317         case CMDQ_OP_RESUME:
318                 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
319                 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
320                 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
321                 break;
322         case CMDQ_OP_CMD_SYNC:
323                 if (ent->sync.msiaddr) {
324                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
325                         cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
326                 } else {
327                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
328                 }
329                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
330                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
331                 break;
332         default:
333                 return -ENOENT;
334         }
335
336         return 0;
337 }
338
339 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
340                                          u32 prod)
341 {
342         struct arm_smmu_queue *q = &smmu->cmdq.q;
343         struct arm_smmu_cmdq_ent ent = {
344                 .opcode = CMDQ_OP_CMD_SYNC,
345         };
346
347         /*
348          * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
349          * payload, so the write will zero the entire command on that platform.
350          */
351         if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
352                 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
353                                    q->ent_dwords * 8;
354         }
355
356         arm_smmu_cmdq_build_cmd(cmd, &ent);
357 }
358
359 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
360 {
361         static const char * const cerror_str[] = {
362                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
363                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
364                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
365                 [CMDQ_ERR_CERROR_ATC_INV_IDX]   = "ATC invalidate timeout",
366         };
367
368         int i;
369         u64 cmd[CMDQ_ENT_DWORDS];
370         struct arm_smmu_queue *q = &smmu->cmdq.q;
371         u32 cons = readl_relaxed(q->cons_reg);
372         u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
373         struct arm_smmu_cmdq_ent cmd_sync = {
374                 .opcode = CMDQ_OP_CMD_SYNC,
375         };
376
377         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
378                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
379
380         switch (idx) {
381         case CMDQ_ERR_CERROR_ABT_IDX:
382                 dev_err(smmu->dev, "retrying command fetch\n");
383         case CMDQ_ERR_CERROR_NONE_IDX:
384                 return;
385         case CMDQ_ERR_CERROR_ATC_INV_IDX:
386                 /*
387                  * ATC Invalidation Completion timeout. CONS is still pointing
388                  * at the CMD_SYNC. Attempt to complete other pending commands
389                  * by repeating the CMD_SYNC, though we might well end up back
390                  * here since the ATC invalidation may still be pending.
391                  */
392                 return;
393         case CMDQ_ERR_CERROR_ILL_IDX:
394         default:
395                 break;
396         }
397
398         /*
399          * We may have concurrent producers, so we need to be careful
400          * not to touch any of the shadow cmdq state.
401          */
402         queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
403         dev_err(smmu->dev, "skipping command in error state:\n");
404         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
405                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
406
407         /* Convert the erroneous command into a CMD_SYNC */
408         if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
409                 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
410                 return;
411         }
412
413         queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
414 }
415
416 /*
417  * Command queue locking.
418  * This is a form of bastardised rwlock with the following major changes:
419  *
420  * - The only LOCK routines are exclusive_trylock() and shared_lock().
421  *   Neither have barrier semantics, and instead provide only a control
422  *   dependency.
423  *
424  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
425  *   fails if the caller appears to be the last lock holder (yes, this is
426  *   racy). All successful UNLOCK routines have RELEASE semantics.
427  */
428 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
429 {
430         int val;
431
432         /*
433          * We can try to avoid the cmpxchg() loop by simply incrementing the
434          * lock counter. When held in exclusive state, the lock counter is set
435          * to INT_MIN so these increments won't hurt as the value will remain
436          * negative.
437          */
438         if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
439                 return;
440
441         do {
442                 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
443         } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
444 }
445
446 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
447 {
448         (void)atomic_dec_return_release(&cmdq->lock);
449 }
450
451 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
452 {
453         if (atomic_read(&cmdq->lock) == 1)
454                 return false;
455
456         arm_smmu_cmdq_shared_unlock(cmdq);
457         return true;
458 }
459
460 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)            \
461 ({                                                                      \
462         bool __ret;                                                     \
463         local_irq_save(flags);                                          \
464         __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);       \
465         if (!__ret)                                                     \
466                 local_irq_restore(flags);                               \
467         __ret;                                                          \
468 })
469
470 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)          \
471 ({                                                                      \
472         atomic_set_release(&cmdq->lock, 0);                             \
473         local_irq_restore(flags);                                       \
474 })
475
476
477 /*
478  * Command queue insertion.
479  * This is made fiddly by our attempts to achieve some sort of scalability
480  * since there is one queue shared amongst all of the CPUs in the system.  If
481  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
482  * then you'll *love* this monstrosity.
483  *
484  * The basic idea is to split the queue up into ranges of commands that are
485  * owned by a given CPU; the owner may not have written all of the commands
486  * itself, but is responsible for advancing the hardware prod pointer when
487  * the time comes. The algorithm is roughly:
488  *
489  *      1. Allocate some space in the queue. At this point we also discover
490  *         whether the head of the queue is currently owned by another CPU,
491  *         or whether we are the owner.
492  *
493  *      2. Write our commands into our allocated slots in the queue.
494  *
495  *      3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
496  *
497  *      4. If we are an owner:
498  *              a. Wait for the previous owner to finish.
499  *              b. Mark the queue head as unowned, which tells us the range
500  *                 that we are responsible for publishing.
501  *              c. Wait for all commands in our owned range to become valid.
502  *              d. Advance the hardware prod pointer.
503  *              e. Tell the next owner we've finished.
504  *
505  *      5. If we are inserting a CMD_SYNC (we may or may not have been an
506  *         owner), then we need to stick around until it has completed:
507  *              a. If we have MSIs, the SMMU can write back into the CMD_SYNC
508  *                 to clear the first 4 bytes.
509  *              b. Otherwise, we spin waiting for the hardware cons pointer to
510  *                 advance past our command.
511  *
512  * The devil is in the details, particularly the use of locking for handling
513  * SYNC completion and freeing up space in the queue before we think that it is
514  * full.
515  */
516 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
517                                                u32 sprod, u32 eprod, bool set)
518 {
519         u32 swidx, sbidx, ewidx, ebidx;
520         struct arm_smmu_ll_queue llq = {
521                 .max_n_shift    = cmdq->q.llq.max_n_shift,
522                 .prod           = sprod,
523         };
524
525         ewidx = BIT_WORD(Q_IDX(&llq, eprod));
526         ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
527
528         while (llq.prod != eprod) {
529                 unsigned long mask;
530                 atomic_long_t *ptr;
531                 u32 limit = BITS_PER_LONG;
532
533                 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
534                 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
535
536                 ptr = &cmdq->valid_map[swidx];
537
538                 if ((swidx == ewidx) && (sbidx < ebidx))
539                         limit = ebidx;
540
541                 mask = GENMASK(limit - 1, sbidx);
542
543                 /*
544                  * The valid bit is the inverse of the wrap bit. This means
545                  * that a zero-initialised queue is invalid and, after marking
546                  * all entries as valid, they become invalid again when we
547                  * wrap.
548                  */
549                 if (set) {
550                         atomic_long_xor(mask, ptr);
551                 } else { /* Poll */
552                         unsigned long valid;
553
554                         valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
555                         atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
556                 }
557
558                 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
559         }
560 }
561
562 /* Mark all entries in the range [sprod, eprod) as valid */
563 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
564                                         u32 sprod, u32 eprod)
565 {
566         __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
567 }
568
569 /* Wait for all entries in the range [sprod, eprod) to become valid */
570 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
571                                          u32 sprod, u32 eprod)
572 {
573         __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
574 }
575
576 /* Wait for the command queue to become non-full */
577 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
578                                              struct arm_smmu_ll_queue *llq)
579 {
580         unsigned long flags;
581         struct arm_smmu_queue_poll qp;
582         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
583         int ret = 0;
584
585         /*
586          * Try to update our copy of cons by grabbing exclusive cmdq access. If
587          * that fails, spin until somebody else updates it for us.
588          */
589         if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
590                 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
591                 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
592                 llq->val = READ_ONCE(cmdq->q.llq.val);
593                 return 0;
594         }
595
596         queue_poll_init(smmu, &qp);
597         do {
598                 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
599                 if (!queue_full(llq))
600                         break;
601
602                 ret = queue_poll(&qp);
603         } while (!ret);
604
605         return ret;
606 }
607
608 /*
609  * Wait until the SMMU signals a CMD_SYNC completion MSI.
610  * Must be called with the cmdq lock held in some capacity.
611  */
612 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
613                                           struct arm_smmu_ll_queue *llq)
614 {
615         int ret = 0;
616         struct arm_smmu_queue_poll qp;
617         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
618         u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
619
620         queue_poll_init(smmu, &qp);
621
622         /*
623          * The MSI won't generate an event, since it's being written back
624          * into the command queue.
625          */
626         qp.wfe = false;
627         smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
628         llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
629         return ret;
630 }
631
632 /*
633  * Wait until the SMMU cons index passes llq->prod.
634  * Must be called with the cmdq lock held in some capacity.
635  */
636 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
637                                                struct arm_smmu_ll_queue *llq)
638 {
639         struct arm_smmu_queue_poll qp;
640         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
641         u32 prod = llq->prod;
642         int ret = 0;
643
644         queue_poll_init(smmu, &qp);
645         llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
646         do {
647                 if (queue_consumed(llq, prod))
648                         break;
649
650                 ret = queue_poll(&qp);
651
652                 /*
653                  * This needs to be a readl() so that our subsequent call
654                  * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
655                  *
656                  * Specifically, we need to ensure that we observe all
657                  * shared_lock()s by other CMD_SYNCs that share our owner,
658                  * so that a failing call to tryunlock() means that we're
659                  * the last one out and therefore we can safely advance
660                  * cmdq->q.llq.cons. Roughly speaking:
661                  *
662                  * CPU 0                CPU1                    CPU2 (us)
663                  *
664                  * if (sync)
665                  *      shared_lock();
666                  *
667                  * dma_wmb();
668                  * set_valid_map();
669                  *
670                  *                      if (owner) {
671                  *                              poll_valid_map();
672                  *                              <control dependency>
673                  *                              writel(prod_reg);
674                  *
675                  *                                              readl(cons_reg);
676                  *                                              tryunlock();
677                  *
678                  * Requires us to see CPU 0's shared_lock() acquisition.
679                  */
680                 llq->cons = readl(cmdq->q.cons_reg);
681         } while (!ret);
682
683         return ret;
684 }
685
686 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
687                                          struct arm_smmu_ll_queue *llq)
688 {
689         if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
690                 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
691
692         return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
693 }
694
695 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
696                                         u32 prod, int n)
697 {
698         int i;
699         struct arm_smmu_ll_queue llq = {
700                 .max_n_shift    = cmdq->q.llq.max_n_shift,
701                 .prod           = prod,
702         };
703
704         for (i = 0; i < n; ++i) {
705                 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
706
707                 prod = queue_inc_prod_n(&llq, i);
708                 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
709         }
710 }
711
712 /*
713  * This is the actual insertion function, and provides the following
714  * ordering guarantees to callers:
715  *
716  * - There is a dma_wmb() before publishing any commands to the queue.
717  *   This can be relied upon to order prior writes to data structures
718  *   in memory (such as a CD or an STE) before the command.
719  *
720  * - On completion of a CMD_SYNC, there is a control dependency.
721  *   This can be relied upon to order subsequent writes to memory (e.g.
722  *   freeing an IOVA) after completion of the CMD_SYNC.
723  *
724  * - Command insertion is totally ordered, so if two CPUs each race to
725  *   insert their own list of commands then all of the commands from one
726  *   CPU will appear before any of the commands from the other CPU.
727  */
728 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
729                                        u64 *cmds, int n, bool sync)
730 {
731         u64 cmd_sync[CMDQ_ENT_DWORDS];
732         u32 prod;
733         unsigned long flags;
734         bool owner;
735         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
736         struct arm_smmu_ll_queue llq = {
737                 .max_n_shift = cmdq->q.llq.max_n_shift,
738         }, head = llq;
739         int ret = 0;
740
741         /* 1. Allocate some space in the queue */
742         local_irq_save(flags);
743         llq.val = READ_ONCE(cmdq->q.llq.val);
744         do {
745                 u64 old;
746
747                 while (!queue_has_space(&llq, n + sync)) {
748                         local_irq_restore(flags);
749                         if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
750                                 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
751                         local_irq_save(flags);
752                 }
753
754                 head.cons = llq.cons;
755                 head.prod = queue_inc_prod_n(&llq, n + sync) |
756                                              CMDQ_PROD_OWNED_FLAG;
757
758                 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
759                 if (old == llq.val)
760                         break;
761
762                 llq.val = old;
763         } while (1);
764         owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
765         head.prod &= ~CMDQ_PROD_OWNED_FLAG;
766         llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
767
768         /*
769          * 2. Write our commands into the queue
770          * Dependency ordering from the cmpxchg() loop above.
771          */
772         arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
773         if (sync) {
774                 prod = queue_inc_prod_n(&llq, n);
775                 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
776                 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
777
778                 /*
779                  * In order to determine completion of our CMD_SYNC, we must
780                  * ensure that the queue can't wrap twice without us noticing.
781                  * We achieve that by taking the cmdq lock as shared before
782                  * marking our slot as valid.
783                  */
784                 arm_smmu_cmdq_shared_lock(cmdq);
785         }
786
787         /* 3. Mark our slots as valid, ensuring commands are visible first */
788         dma_wmb();
789         arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
790
791         /* 4. If we are the owner, take control of the SMMU hardware */
792         if (owner) {
793                 /* a. Wait for previous owner to finish */
794                 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
795
796                 /* b. Stop gathering work by clearing the owned flag */
797                 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
798                                                    &cmdq->q.llq.atomic.prod);
799                 prod &= ~CMDQ_PROD_OWNED_FLAG;
800
801                 /*
802                  * c. Wait for any gathered work to be written to the queue.
803                  * Note that we read our own entries so that we have the control
804                  * dependency required by (d).
805                  */
806                 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
807
808                 /*
809                  * d. Advance the hardware prod pointer
810                  * Control dependency ordering from the entries becoming valid.
811                  */
812                 writel_relaxed(prod, cmdq->q.prod_reg);
813
814                 /*
815                  * e. Tell the next owner we're done
816                  * Make sure we've updated the hardware first, so that we don't
817                  * race to update prod and potentially move it backwards.
818                  */
819                 atomic_set_release(&cmdq->owner_prod, prod);
820         }
821
822         /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
823         if (sync) {
824                 llq.prod = queue_inc_prod_n(&llq, n);
825                 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
826                 if (ret) {
827                         dev_err_ratelimited(smmu->dev,
828                                             "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
829                                             llq.prod,
830                                             readl_relaxed(cmdq->q.prod_reg),
831                                             readl_relaxed(cmdq->q.cons_reg));
832                 }
833
834                 /*
835                  * Try to unlock the cmdq lock. This will fail if we're the last
836                  * reader, in which case we can safely update cmdq->q.llq.cons
837                  */
838                 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
839                         WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
840                         arm_smmu_cmdq_shared_unlock(cmdq);
841                 }
842         }
843
844         local_irq_restore(flags);
845         return ret;
846 }
847
848 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
849                                    struct arm_smmu_cmdq_ent *ent)
850 {
851         u64 cmd[CMDQ_ENT_DWORDS];
852
853         if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
854                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
855                          ent->opcode);
856                 return -EINVAL;
857         }
858
859         return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
860 }
861
862 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
863 {
864         return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
865 }
866
867 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
868                                     struct arm_smmu_cmdq_batch *cmds,
869                                     struct arm_smmu_cmdq_ent *cmd)
870 {
871         if (cmds->num == CMDQ_BATCH_ENTRIES) {
872                 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
873                 cmds->num = 0;
874         }
875         arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
876         cmds->num++;
877 }
878
879 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
880                                       struct arm_smmu_cmdq_batch *cmds)
881 {
882         return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
883 }
884
885 static int arm_smmu_page_response(struct device *dev,
886                                   struct iommu_fault_event *unused,
887                                   struct iommu_page_response *resp)
888 {
889         struct arm_smmu_cmdq_ent cmd = {0};
890         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
891         int sid = master->streams[0].id;
892
893         if (master->stall_enabled) {
894                 cmd.opcode              = CMDQ_OP_RESUME;
895                 cmd.resume.sid          = sid;
896                 cmd.resume.stag         = resp->grpid;
897                 switch (resp->code) {
898                 case IOMMU_PAGE_RESP_INVALID:
899                 case IOMMU_PAGE_RESP_FAILURE:
900                         cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
901                         break;
902                 case IOMMU_PAGE_RESP_SUCCESS:
903                         cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
904                         break;
905                 default:
906                         return -EINVAL;
907                 }
908         } else {
909                 return -ENODEV;
910         }
911
912         arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
913         /*
914          * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
915          * RESUME consumption guarantees that the stalled transaction will be
916          * terminated... at some point in the future. PRI_RESP is fire and
917          * forget.
918          */
919
920         return 0;
921 }
922
923 /* Context descriptor manipulation functions */
924 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
925 {
926         struct arm_smmu_cmdq_ent cmd = {
927                 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
928                         CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
929                 .tlbi.asid = asid,
930         };
931
932         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
933         arm_smmu_cmdq_issue_sync(smmu);
934 }
935
936 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
937                              int ssid, bool leaf)
938 {
939         size_t i;
940         unsigned long flags;
941         struct arm_smmu_master *master;
942         struct arm_smmu_cmdq_batch cmds = {};
943         struct arm_smmu_device *smmu = smmu_domain->smmu;
944         struct arm_smmu_cmdq_ent cmd = {
945                 .opcode = CMDQ_OP_CFGI_CD,
946                 .cfgi   = {
947                         .ssid   = ssid,
948                         .leaf   = leaf,
949                 },
950         };
951
952         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
953         list_for_each_entry(master, &smmu_domain->devices, domain_head) {
954                 for (i = 0; i < master->num_streams; i++) {
955                         cmd.cfgi.sid = master->streams[i].id;
956                         arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
957                 }
958         }
959         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
960
961         arm_smmu_cmdq_batch_submit(smmu, &cmds);
962 }
963
964 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
965                                         struct arm_smmu_l1_ctx_desc *l1_desc)
966 {
967         size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
968
969         l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
970                                              &l1_desc->l2ptr_dma, GFP_KERNEL);
971         if (!l1_desc->l2ptr) {
972                 dev_warn(smmu->dev,
973                          "failed to allocate context descriptor table\n");
974                 return -ENOMEM;
975         }
976         return 0;
977 }
978
979 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
980                                       struct arm_smmu_l1_ctx_desc *l1_desc)
981 {
982         u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
983                   CTXDESC_L1_DESC_V;
984
985         /* See comment in arm_smmu_write_ctx_desc() */
986         WRITE_ONCE(*dst, cpu_to_le64(val));
987 }
988
989 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
990                                    u32 ssid)
991 {
992         __le64 *l1ptr;
993         unsigned int idx;
994         struct arm_smmu_l1_ctx_desc *l1_desc;
995         struct arm_smmu_device *smmu = smmu_domain->smmu;
996         struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
997
998         if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
999                 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1000
1001         idx = ssid >> CTXDESC_SPLIT;
1002         l1_desc = &cdcfg->l1_desc[idx];
1003         if (!l1_desc->l2ptr) {
1004                 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1005                         return NULL;
1006
1007                 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1008                 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1009                 /* An invalid L1CD can be cached */
1010                 arm_smmu_sync_cd(smmu_domain, ssid, false);
1011         }
1012         idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1013         return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1014 }
1015
1016 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1017                             struct arm_smmu_ctx_desc *cd)
1018 {
1019         /*
1020          * This function handles the following cases:
1021          *
1022          * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1023          * (2) Install a secondary CD, for SID+SSID traffic.
1024          * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1025          *     CD, then invalidate the old entry and mappings.
1026          * (4) Quiesce the context without clearing the valid bit. Disable
1027          *     translation, and ignore any translation fault.
1028          * (5) Remove a secondary CD.
1029          */
1030         u64 val;
1031         bool cd_live;
1032         __le64 *cdptr;
1033
1034         if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1035                 return -E2BIG;
1036
1037         cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1038         if (!cdptr)
1039                 return -ENOMEM;
1040
1041         val = le64_to_cpu(cdptr[0]);
1042         cd_live = !!(val & CTXDESC_CD_0_V);
1043
1044         if (!cd) { /* (5) */
1045                 val = 0;
1046         } else if (cd == &quiet_cd) { /* (4) */
1047                 val |= CTXDESC_CD_0_TCR_EPD0;
1048         } else if (cd_live) { /* (3) */
1049                 val &= ~CTXDESC_CD_0_ASID;
1050                 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1051                 /*
1052                  * Until CD+TLB invalidation, both ASIDs may be used for tagging
1053                  * this substream's traffic
1054                  */
1055         } else { /* (1) and (2) */
1056                 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1057                 cdptr[2] = 0;
1058                 cdptr[3] = cpu_to_le64(cd->mair);
1059
1060                 /*
1061                  * STE is live, and the SMMU might read dwords of this CD in any
1062                  * order. Ensure that it observes valid values before reading
1063                  * V=1.
1064                  */
1065                 arm_smmu_sync_cd(smmu_domain, ssid, true);
1066
1067                 val = cd->tcr |
1068 #ifdef __BIG_ENDIAN
1069                         CTXDESC_CD_0_ENDI |
1070 #endif
1071                         CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1072                         (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1073                         CTXDESC_CD_0_AA64 |
1074                         FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1075                         CTXDESC_CD_0_V;
1076
1077                 if (smmu_domain->stall_enabled)
1078                         val |= CTXDESC_CD_0_S;
1079         }
1080
1081         /*
1082          * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1083          * "Configuration structures and configuration invalidation completion"
1084          *
1085          *   The size of single-copy atomic reads made by the SMMU is
1086          *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1087          *   field within an aligned 64-bit span of a structure can be altered
1088          *   without first making the structure invalid.
1089          */
1090         WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1091         arm_smmu_sync_cd(smmu_domain, ssid, true);
1092         return 0;
1093 }
1094
1095 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1096 {
1097         int ret;
1098         size_t l1size;
1099         size_t max_contexts;
1100         struct arm_smmu_device *smmu = smmu_domain->smmu;
1101         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1102         struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1103
1104         max_contexts = 1 << cfg->s1cdmax;
1105
1106         if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1107             max_contexts <= CTXDESC_L2_ENTRIES) {
1108                 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1109                 cdcfg->num_l1_ents = max_contexts;
1110
1111                 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1112         } else {
1113                 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1114                 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1115                                                   CTXDESC_L2_ENTRIES);
1116
1117                 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1118                                               sizeof(*cdcfg->l1_desc),
1119                                               GFP_KERNEL);
1120                 if (!cdcfg->l1_desc)
1121                         return -ENOMEM;
1122
1123                 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1124         }
1125
1126         cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1127                                            GFP_KERNEL);
1128         if (!cdcfg->cdtab) {
1129                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1130                 ret = -ENOMEM;
1131                 goto err_free_l1;
1132         }
1133
1134         return 0;
1135
1136 err_free_l1:
1137         if (cdcfg->l1_desc) {
1138                 devm_kfree(smmu->dev, cdcfg->l1_desc);
1139                 cdcfg->l1_desc = NULL;
1140         }
1141         return ret;
1142 }
1143
1144 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1145 {
1146         int i;
1147         size_t size, l1size;
1148         struct arm_smmu_device *smmu = smmu_domain->smmu;
1149         struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1150
1151         if (cdcfg->l1_desc) {
1152                 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1153
1154                 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1155                         if (!cdcfg->l1_desc[i].l2ptr)
1156                                 continue;
1157
1158                         dmam_free_coherent(smmu->dev, size,
1159                                            cdcfg->l1_desc[i].l2ptr,
1160                                            cdcfg->l1_desc[i].l2ptr_dma);
1161                 }
1162                 devm_kfree(smmu->dev, cdcfg->l1_desc);
1163                 cdcfg->l1_desc = NULL;
1164
1165                 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1166         } else {
1167                 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1168         }
1169
1170         dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1171         cdcfg->cdtab_dma = 0;
1172         cdcfg->cdtab = NULL;
1173 }
1174
1175 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1176 {
1177         bool free;
1178         struct arm_smmu_ctx_desc *old_cd;
1179
1180         if (!cd->asid)
1181                 return false;
1182
1183         free = refcount_dec_and_test(&cd->refs);
1184         if (free) {
1185                 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1186                 WARN_ON(old_cd != cd);
1187         }
1188         return free;
1189 }
1190
1191 /* Stream table manipulation functions */
1192 static void
1193 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1194 {
1195         u64 val = 0;
1196
1197         val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1198         val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1199
1200         /* See comment in arm_smmu_write_ctx_desc() */
1201         WRITE_ONCE(*dst, cpu_to_le64(val));
1202 }
1203
1204 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1205 {
1206         struct arm_smmu_cmdq_ent cmd = {
1207                 .opcode = CMDQ_OP_CFGI_STE,
1208                 .cfgi   = {
1209                         .sid    = sid,
1210                         .leaf   = true,
1211                 },
1212         };
1213
1214         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1215         arm_smmu_cmdq_issue_sync(smmu);
1216 }
1217
1218 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1219                                       __le64 *dst)
1220 {
1221         /*
1222          * This is hideously complicated, but we only really care about
1223          * three cases at the moment:
1224          *
1225          * 1. Invalid (all zero) -> bypass/fault (init)
1226          * 2. Bypass/fault -> translation/bypass (attach)
1227          * 3. Translation/bypass -> bypass/fault (detach)
1228          *
1229          * Given that we can't update the STE atomically and the SMMU
1230          * doesn't read the thing in a defined order, that leaves us
1231          * with the following maintenance requirements:
1232          *
1233          * 1. Update Config, return (init time STEs aren't live)
1234          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1235          * 3. Update Config, sync
1236          */
1237         u64 val = le64_to_cpu(dst[0]);
1238         bool ste_live = false;
1239         struct arm_smmu_device *smmu = NULL;
1240         struct arm_smmu_s1_cfg *s1_cfg = NULL;
1241         struct arm_smmu_s2_cfg *s2_cfg = NULL;
1242         struct arm_smmu_domain *smmu_domain = NULL;
1243         struct arm_smmu_cmdq_ent prefetch_cmd = {
1244                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1245                 .prefetch       = {
1246                         .sid    = sid,
1247                 },
1248         };
1249
1250         if (master) {
1251                 smmu_domain = master->domain;
1252                 smmu = master->smmu;
1253         }
1254
1255         if (smmu_domain) {
1256                 switch (smmu_domain->stage) {
1257                 case ARM_SMMU_DOMAIN_S1:
1258                         s1_cfg = &smmu_domain->s1_cfg;
1259                         break;
1260                 case ARM_SMMU_DOMAIN_S2:
1261                 case ARM_SMMU_DOMAIN_NESTED:
1262                         s2_cfg = &smmu_domain->s2_cfg;
1263                         break;
1264                 default:
1265                         break;
1266                 }
1267         }
1268
1269         if (val & STRTAB_STE_0_V) {
1270                 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1271                 case STRTAB_STE_0_CFG_BYPASS:
1272                         break;
1273                 case STRTAB_STE_0_CFG_S1_TRANS:
1274                 case STRTAB_STE_0_CFG_S2_TRANS:
1275                         ste_live = true;
1276                         break;
1277                 case STRTAB_STE_0_CFG_ABORT:
1278                         BUG_ON(!disable_bypass);
1279                         break;
1280                 default:
1281                         BUG(); /* STE corruption */
1282                 }
1283         }
1284
1285         /* Nuke the existing STE_0 value, as we're going to rewrite it */
1286         val = STRTAB_STE_0_V;
1287
1288         /* Bypass/fault */
1289         if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1290                 if (!smmu_domain && disable_bypass)
1291                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1292                 else
1293                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1294
1295                 dst[0] = cpu_to_le64(val);
1296                 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1297                                                 STRTAB_STE_1_SHCFG_INCOMING));
1298                 dst[2] = 0; /* Nuke the VMID */
1299                 /*
1300                  * The SMMU can perform negative caching, so we must sync
1301                  * the STE regardless of whether the old value was live.
1302                  */
1303                 if (smmu)
1304                         arm_smmu_sync_ste_for_sid(smmu, sid);
1305                 return;
1306         }
1307
1308         if (s1_cfg) {
1309                 u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1310                         STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1311
1312                 BUG_ON(ste_live);
1313                 dst[1] = cpu_to_le64(
1314                          FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1315                          FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1316                          FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1317                          FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1318                          FIELD_PREP(STRTAB_STE_1_STRW, strw));
1319
1320                 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1321                     !master->stall_enabled)
1322                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1323
1324                 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1325                         FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1326                         FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1327                         FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1328         }
1329
1330         if (s2_cfg) {
1331                 BUG_ON(ste_live);
1332                 dst[2] = cpu_to_le64(
1333                          FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1334                          FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1335 #ifdef __BIG_ENDIAN
1336                          STRTAB_STE_2_S2ENDI |
1337 #endif
1338                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1339                          STRTAB_STE_2_S2R);
1340
1341                 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1342
1343                 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1344         }
1345
1346         if (master->ats_enabled)
1347                 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1348                                                  STRTAB_STE_1_EATS_TRANS));
1349
1350         arm_smmu_sync_ste_for_sid(smmu, sid);
1351         /* See comment in arm_smmu_write_ctx_desc() */
1352         WRITE_ONCE(dst[0], cpu_to_le64(val));
1353         arm_smmu_sync_ste_for_sid(smmu, sid);
1354
1355         /* It's likely that we'll want to use the new STE soon */
1356         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1357                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1358 }
1359
1360 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1361 {
1362         unsigned int i;
1363
1364         for (i = 0; i < nent; ++i) {
1365                 arm_smmu_write_strtab_ent(NULL, -1, strtab);
1366                 strtab += STRTAB_STE_DWORDS;
1367         }
1368 }
1369
1370 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1371 {
1372         size_t size;
1373         void *strtab;
1374         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1375         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1376
1377         if (desc->l2ptr)
1378                 return 0;
1379
1380         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1381         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1382
1383         desc->span = STRTAB_SPLIT + 1;
1384         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1385                                           GFP_KERNEL);
1386         if (!desc->l2ptr) {
1387                 dev_err(smmu->dev,
1388                         "failed to allocate l2 stream table for SID %u\n",
1389                         sid);
1390                 return -ENOMEM;
1391         }
1392
1393         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1394         arm_smmu_write_strtab_l1_desc(strtab, desc);
1395         return 0;
1396 }
1397
1398 static struct arm_smmu_master *
1399 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1400 {
1401         struct rb_node *node;
1402         struct arm_smmu_stream *stream;
1403
1404         lockdep_assert_held(&smmu->streams_mutex);
1405
1406         node = smmu->streams.rb_node;
1407         while (node) {
1408                 stream = rb_entry(node, struct arm_smmu_stream, node);
1409                 if (stream->id < sid)
1410                         node = node->rb_right;
1411                 else if (stream->id > sid)
1412                         node = node->rb_left;
1413                 else
1414                         return stream->master;
1415         }
1416
1417         return NULL;
1418 }
1419
1420 /* IRQ and event handlers */
1421 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1422 {
1423         int ret;
1424         u32 reason;
1425         u32 perm = 0;
1426         struct arm_smmu_master *master;
1427         bool ssid_valid = evt[0] & EVTQ_0_SSV;
1428         u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1429         struct iommu_fault_event fault_evt = { };
1430         struct iommu_fault *flt = &fault_evt.fault;
1431
1432         switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1433         case EVT_ID_TRANSLATION_FAULT:
1434                 reason = IOMMU_FAULT_REASON_PTE_FETCH;
1435                 break;
1436         case EVT_ID_ADDR_SIZE_FAULT:
1437                 reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1438                 break;
1439         case EVT_ID_ACCESS_FAULT:
1440                 reason = IOMMU_FAULT_REASON_ACCESS;
1441                 break;
1442         case EVT_ID_PERMISSION_FAULT:
1443                 reason = IOMMU_FAULT_REASON_PERMISSION;
1444                 break;
1445         default:
1446                 return -EOPNOTSUPP;
1447         }
1448
1449         /* Stage-2 is always pinned at the moment */
1450         if (evt[1] & EVTQ_1_S2)
1451                 return -EFAULT;
1452
1453         if (evt[1] & EVTQ_1_RnW)
1454                 perm |= IOMMU_FAULT_PERM_READ;
1455         else
1456                 perm |= IOMMU_FAULT_PERM_WRITE;
1457
1458         if (evt[1] & EVTQ_1_InD)
1459                 perm |= IOMMU_FAULT_PERM_EXEC;
1460
1461         if (evt[1] & EVTQ_1_PnU)
1462                 perm |= IOMMU_FAULT_PERM_PRIV;
1463
1464         if (evt[1] & EVTQ_1_STALL) {
1465                 flt->type = IOMMU_FAULT_PAGE_REQ;
1466                 flt->prm = (struct iommu_fault_page_request) {
1467                         .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1468                         .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1469                         .perm = perm,
1470                         .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1471                 };
1472
1473                 if (ssid_valid) {
1474                         flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1475                         flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1476                 }
1477         } else {
1478                 flt->type = IOMMU_FAULT_DMA_UNRECOV;
1479                 flt->event = (struct iommu_fault_unrecoverable) {
1480                         .reason = reason,
1481                         .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1482                         .perm = perm,
1483                         .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1484                 };
1485
1486                 if (ssid_valid) {
1487                         flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1488                         flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1489                 }
1490         }
1491
1492         mutex_lock(&smmu->streams_mutex);
1493         master = arm_smmu_find_master(smmu, sid);
1494         if (!master) {
1495                 ret = -EINVAL;
1496                 goto out_unlock;
1497         }
1498
1499         ret = iommu_report_device_fault(master->dev, &fault_evt);
1500         if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1501                 /* Nobody cared, abort the access */
1502                 struct iommu_page_response resp = {
1503                         .pasid          = flt->prm.pasid,
1504                         .grpid          = flt->prm.grpid,
1505                         .code           = IOMMU_PAGE_RESP_FAILURE,
1506                 };
1507                 arm_smmu_page_response(master->dev, &fault_evt, &resp);
1508         }
1509
1510 out_unlock:
1511         mutex_unlock(&smmu->streams_mutex);
1512         return ret;
1513 }
1514
1515 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1516 {
1517         int i, ret;
1518         struct arm_smmu_device *smmu = dev;
1519         struct arm_smmu_queue *q = &smmu->evtq.q;
1520         struct arm_smmu_ll_queue *llq = &q->llq;
1521         static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1522                                       DEFAULT_RATELIMIT_BURST);
1523         u64 evt[EVTQ_ENT_DWORDS];
1524
1525         do {
1526                 while (!queue_remove_raw(q, evt)) {
1527                         u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1528
1529                         ret = arm_smmu_handle_evt(smmu, evt);
1530                         if (!ret || !__ratelimit(&rs))
1531                                 continue;
1532
1533                         dev_info(smmu->dev, "event 0x%02x received:\n", id);
1534                         for (i = 0; i < ARRAY_SIZE(evt); ++i)
1535                                 dev_info(smmu->dev, "\t0x%016llx\n",
1536                                          (unsigned long long)evt[i]);
1537
1538                 }
1539
1540                 /*
1541                  * Not much we can do on overflow, so scream and pretend we're
1542                  * trying harder.
1543                  */
1544                 if (queue_sync_prod_in(q) == -EOVERFLOW)
1545                         dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1546         } while (!queue_empty(llq));
1547
1548         /* Sync our overflow flag, as we believe we're up to speed */
1549         llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1550                     Q_IDX(llq, llq->cons);
1551         return IRQ_HANDLED;
1552 }
1553
1554 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1555 {
1556         u32 sid, ssid;
1557         u16 grpid;
1558         bool ssv, last;
1559
1560         sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1561         ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1562         ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1563         last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1564         grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1565
1566         dev_info(smmu->dev, "unexpected PRI request received:\n");
1567         dev_info(smmu->dev,
1568                  "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1569                  sid, ssid, grpid, last ? "L" : "",
1570                  evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1571                  evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1572                  evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1573                  evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1574                  evt[1] & PRIQ_1_ADDR_MASK);
1575
1576         if (last) {
1577                 struct arm_smmu_cmdq_ent cmd = {
1578                         .opcode                 = CMDQ_OP_PRI_RESP,
1579                         .substream_valid        = ssv,
1580                         .pri                    = {
1581                                 .sid    = sid,
1582                                 .ssid   = ssid,
1583                                 .grpid  = grpid,
1584                                 .resp   = PRI_RESP_DENY,
1585                         },
1586                 };
1587
1588                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1589         }
1590 }
1591
1592 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1593 {
1594         struct arm_smmu_device *smmu = dev;
1595         struct arm_smmu_queue *q = &smmu->priq.q;
1596         struct arm_smmu_ll_queue *llq = &q->llq;
1597         u64 evt[PRIQ_ENT_DWORDS];
1598
1599         do {
1600                 while (!queue_remove_raw(q, evt))
1601                         arm_smmu_handle_ppr(smmu, evt);
1602
1603                 if (queue_sync_prod_in(q) == -EOVERFLOW)
1604                         dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1605         } while (!queue_empty(llq));
1606
1607         /* Sync our overflow flag, as we believe we're up to speed */
1608         llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1609                       Q_IDX(llq, llq->cons);
1610         queue_sync_cons_out(q);
1611         return IRQ_HANDLED;
1612 }
1613
1614 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1615
1616 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1617 {
1618         u32 gerror, gerrorn, active;
1619         struct arm_smmu_device *smmu = dev;
1620
1621         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1622         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1623
1624         active = gerror ^ gerrorn;
1625         if (!(active & GERROR_ERR_MASK))
1626                 return IRQ_NONE; /* No errors pending */
1627
1628         dev_warn(smmu->dev,
1629                  "unexpected global error reported (0x%08x), this could be serious\n",
1630                  active);
1631
1632         if (active & GERROR_SFM_ERR) {
1633                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1634                 arm_smmu_device_disable(smmu);
1635         }
1636
1637         if (active & GERROR_MSI_GERROR_ABT_ERR)
1638                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1639
1640         if (active & GERROR_MSI_PRIQ_ABT_ERR)
1641                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1642
1643         if (active & GERROR_MSI_EVTQ_ABT_ERR)
1644                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1645
1646         if (active & GERROR_MSI_CMDQ_ABT_ERR)
1647                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1648
1649         if (active & GERROR_PRIQ_ABT_ERR)
1650                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1651
1652         if (active & GERROR_EVTQ_ABT_ERR)
1653                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1654
1655         if (active & GERROR_CMDQ_ERR)
1656                 arm_smmu_cmdq_skip_err(smmu);
1657
1658         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1659         return IRQ_HANDLED;
1660 }
1661
1662 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1663 {
1664         struct arm_smmu_device *smmu = dev;
1665
1666         arm_smmu_evtq_thread(irq, dev);
1667         if (smmu->features & ARM_SMMU_FEAT_PRI)
1668                 arm_smmu_priq_thread(irq, dev);
1669
1670         return IRQ_HANDLED;
1671 }
1672
1673 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1674 {
1675         arm_smmu_gerror_handler(irq, dev);
1676         return IRQ_WAKE_THREAD;
1677 }
1678
1679 static void
1680 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1681                         struct arm_smmu_cmdq_ent *cmd)
1682 {
1683         size_t log2_span;
1684         size_t span_mask;
1685         /* ATC invalidates are always on 4096-bytes pages */
1686         size_t inval_grain_shift = 12;
1687         unsigned long page_start, page_end;
1688
1689         /*
1690          * ATS and PASID:
1691          *
1692          * If substream_valid is clear, the PCIe TLP is sent without a PASID
1693          * prefix. In that case all ATC entries within the address range are
1694          * invalidated, including those that were requested with a PASID! There
1695          * is no way to invalidate only entries without PASID.
1696          *
1697          * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1698          * traffic), translation requests without PASID create ATC entries
1699          * without PASID, which must be invalidated with substream_valid clear.
1700          * This has the unpleasant side-effect of invalidating all PASID-tagged
1701          * ATC entries within the address range.
1702          */
1703         *cmd = (struct arm_smmu_cmdq_ent) {
1704                 .opcode                 = CMDQ_OP_ATC_INV,
1705                 .substream_valid        = !!ssid,
1706                 .atc.ssid               = ssid,
1707         };
1708
1709         if (!size) {
1710                 cmd->atc.size = ATC_INV_SIZE_ALL;
1711                 return;
1712         }
1713
1714         page_start      = iova >> inval_grain_shift;
1715         page_end        = (iova + size - 1) >> inval_grain_shift;
1716
1717         /*
1718          * In an ATS Invalidate Request, the address must be aligned on the
1719          * range size, which must be a power of two number of page sizes. We
1720          * thus have to choose between grossly over-invalidating the region, or
1721          * splitting the invalidation into multiple commands. For simplicity
1722          * we'll go with the first solution, but should refine it in the future
1723          * if multiple commands are shown to be more efficient.
1724          *
1725          * Find the smallest power of two that covers the range. The most
1726          * significant differing bit between the start and end addresses,
1727          * fls(start ^ end), indicates the required span. For example:
1728          *
1729          * We want to invalidate pages [8; 11]. This is already the ideal range:
1730          *              x = 0b1000 ^ 0b1011 = 0b11
1731          *              span = 1 << fls(x) = 4
1732          *
1733          * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1734          *              x = 0b0111 ^ 0b1010 = 0b1101
1735          *              span = 1 << fls(x) = 16
1736          */
1737         log2_span       = fls_long(page_start ^ page_end);
1738         span_mask       = (1ULL << log2_span) - 1;
1739
1740         page_start      &= ~span_mask;
1741
1742         cmd->atc.addr   = page_start << inval_grain_shift;
1743         cmd->atc.size   = log2_span;
1744 }
1745
1746 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1747 {
1748         int i;
1749         struct arm_smmu_cmdq_ent cmd;
1750
1751         arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1752
1753         for (i = 0; i < master->num_streams; i++) {
1754                 cmd.atc.sid = master->streams[i].id;
1755                 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1756         }
1757
1758         return arm_smmu_cmdq_issue_sync(master->smmu);
1759 }
1760
1761 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1762                             unsigned long iova, size_t size)
1763 {
1764         int i;
1765         unsigned long flags;
1766         struct arm_smmu_cmdq_ent cmd;
1767         struct arm_smmu_master *master;
1768         struct arm_smmu_cmdq_batch cmds = {};
1769
1770         if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1771                 return 0;
1772
1773         /*
1774          * Ensure that we've completed prior invalidation of the main TLBs
1775          * before we read 'nr_ats_masters' in case of a concurrent call to
1776          * arm_smmu_enable_ats():
1777          *
1778          *      // unmap()                      // arm_smmu_enable_ats()
1779          *      TLBI+SYNC                       atomic_inc(&nr_ats_masters);
1780          *      smp_mb();                       [...]
1781          *      atomic_read(&nr_ats_masters);   pci_enable_ats() // writel()
1782          *
1783          * Ensures that we always see the incremented 'nr_ats_masters' count if
1784          * ATS was enabled at the PCI device before completion of the TLBI.
1785          */
1786         smp_mb();
1787         if (!atomic_read(&smmu_domain->nr_ats_masters))
1788                 return 0;
1789
1790         arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1791
1792         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1793         list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1794                 if (!master->ats_enabled)
1795                         continue;
1796
1797                 for (i = 0; i < master->num_streams; i++) {
1798                         cmd.atc.sid = master->streams[i].id;
1799                         arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1800                 }
1801         }
1802         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1803
1804         return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1805 }
1806
1807 /* IO_PGTABLE API */
1808 static void arm_smmu_tlb_inv_context(void *cookie)
1809 {
1810         struct arm_smmu_domain *smmu_domain = cookie;
1811         struct arm_smmu_device *smmu = smmu_domain->smmu;
1812         struct arm_smmu_cmdq_ent cmd;
1813
1814         /*
1815          * NOTE: when io-pgtable is in non-strict mode, we may get here with
1816          * PTEs previously cleared by unmaps on the current CPU not yet visible
1817          * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1818          * insertion to guarantee those are observed before the TLBI. Do be
1819          * careful, 007.
1820          */
1821         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1822                 arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1823         } else {
1824                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1825                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1826                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1827                 arm_smmu_cmdq_issue_sync(smmu);
1828         }
1829         arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1830 }
1831
1832 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1833                                      unsigned long iova, size_t size,
1834                                      size_t granule,
1835                                      struct arm_smmu_domain *smmu_domain)
1836 {
1837         struct arm_smmu_device *smmu = smmu_domain->smmu;
1838         unsigned long end = iova + size, num_pages = 0, tg = 0;
1839         size_t inv_range = granule;
1840         struct arm_smmu_cmdq_batch cmds = {};
1841
1842         if (!size)
1843                 return;
1844
1845         if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1846                 /* Get the leaf page size */
1847                 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1848
1849                 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1850                 cmd->tlbi.tg = (tg - 10) / 2;
1851
1852                 /* Determine what level the granule is at */
1853                 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1854
1855                 num_pages = size >> tg;
1856         }
1857
1858         while (iova < end) {
1859                 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1860                         /*
1861                          * On each iteration of the loop, the range is 5 bits
1862                          * worth of the aligned size remaining.
1863                          * The range in pages is:
1864                          *
1865                          * range = (num_pages & (0x1f << __ffs(num_pages)))
1866                          */
1867                         unsigned long scale, num;
1868
1869                         /* Determine the power of 2 multiple number of pages */
1870                         scale = __ffs(num_pages);
1871                         cmd->tlbi.scale = scale;
1872
1873                         /* Determine how many chunks of 2^scale size we have */
1874                         num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1875                         cmd->tlbi.num = num - 1;
1876
1877                         /* range is num * 2^scale * pgsize */
1878                         inv_range = num << (scale + tg);
1879
1880                         /* Clear out the lower order bits for the next iteration */
1881                         num_pages -= num << scale;
1882                 }
1883
1884                 cmd->tlbi.addr = iova;
1885                 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1886                 iova += inv_range;
1887         }
1888         arm_smmu_cmdq_batch_submit(smmu, &cmds);
1889 }
1890
1891 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1892                                           size_t granule, bool leaf,
1893                                           struct arm_smmu_domain *smmu_domain)
1894 {
1895         struct arm_smmu_cmdq_ent cmd = {
1896                 .tlbi = {
1897                         .leaf   = leaf,
1898                 },
1899         };
1900
1901         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1902                 cmd.opcode      = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1903                                   CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1904                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1905         } else {
1906                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1907                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1908         }
1909         __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1910
1911         /*
1912          * Unfortunately, this can't be leaf-only since we may have
1913          * zapped an entire table.
1914          */
1915         arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1916 }
1917
1918 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1919                                  size_t granule, bool leaf,
1920                                  struct arm_smmu_domain *smmu_domain)
1921 {
1922         struct arm_smmu_cmdq_ent cmd = {
1923                 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1924                           CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1925                 .tlbi = {
1926                         .asid   = asid,
1927                         .leaf   = leaf,
1928                 },
1929         };
1930
1931         __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1932 }
1933
1934 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1935                                          unsigned long iova, size_t granule,
1936                                          void *cookie)
1937 {
1938         struct arm_smmu_domain *smmu_domain = cookie;
1939         struct iommu_domain *domain = &smmu_domain->domain;
1940
1941         iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1942 }
1943
1944 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1945                                   size_t granule, void *cookie)
1946 {
1947         arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1948 }
1949
1950 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1951         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1952         .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1953         .tlb_add_page   = arm_smmu_tlb_inv_page_nosync,
1954 };
1955
1956 /* IOMMU API */
1957 static bool arm_smmu_capable(enum iommu_cap cap)
1958 {
1959         switch (cap) {
1960         case IOMMU_CAP_CACHE_COHERENCY:
1961                 return true;
1962         case IOMMU_CAP_NOEXEC:
1963                 return true;
1964         default:
1965                 return false;
1966         }
1967 }
1968
1969 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1970 {
1971         struct arm_smmu_domain *smmu_domain;
1972
1973         if (type != IOMMU_DOMAIN_UNMANAGED &&
1974             type != IOMMU_DOMAIN_DMA &&
1975             type != IOMMU_DOMAIN_IDENTITY)
1976                 return NULL;
1977
1978         /*
1979          * Allocate the domain and initialise some of its data structures.
1980          * We can't really do anything meaningful until we've added a
1981          * master.
1982          */
1983         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1984         if (!smmu_domain)
1985                 return NULL;
1986
1987         if (type == IOMMU_DOMAIN_DMA &&
1988             iommu_get_dma_cookie(&smmu_domain->domain)) {
1989                 kfree(smmu_domain);
1990                 return NULL;
1991         }
1992
1993         mutex_init(&smmu_domain->init_mutex);
1994         INIT_LIST_HEAD(&smmu_domain->devices);
1995         spin_lock_init(&smmu_domain->devices_lock);
1996         INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
1997
1998         return &smmu_domain->domain;
1999 }
2000
2001 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2002 {
2003         int idx, size = 1 << span;
2004
2005         do {
2006                 idx = find_first_zero_bit(map, size);
2007                 if (idx == size)
2008                         return -ENOSPC;
2009         } while (test_and_set_bit(idx, map));
2010
2011         return idx;
2012 }
2013
2014 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2015 {
2016         clear_bit(idx, map);
2017 }
2018
2019 static void arm_smmu_domain_free(struct iommu_domain *domain)
2020 {
2021         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2022         struct arm_smmu_device *smmu = smmu_domain->smmu;
2023
2024         iommu_put_dma_cookie(domain);
2025         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2026
2027         /* Free the CD and ASID, if we allocated them */
2028         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2029                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2030
2031                 /* Prevent SVA from touching the CD while we're freeing it */
2032                 mutex_lock(&arm_smmu_asid_lock);
2033                 if (cfg->cdcfg.cdtab)
2034                         arm_smmu_free_cd_tables(smmu_domain);
2035                 arm_smmu_free_asid(&cfg->cd);
2036                 mutex_unlock(&arm_smmu_asid_lock);
2037         } else {
2038                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2039                 if (cfg->vmid)
2040                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2041         }
2042
2043         kfree(smmu_domain);
2044 }
2045
2046 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2047                                        struct arm_smmu_master *master,
2048                                        struct io_pgtable_cfg *pgtbl_cfg)
2049 {
2050         int ret;
2051         u32 asid;
2052         struct arm_smmu_device *smmu = smmu_domain->smmu;
2053         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2054         typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2055
2056         refcount_set(&cfg->cd.refs, 1);
2057
2058         /* Prevent SVA from modifying the ASID until it is written to the CD */
2059         mutex_lock(&arm_smmu_asid_lock);
2060         ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2061                        XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2062         if (ret)
2063                 goto out_unlock;
2064
2065         cfg->s1cdmax = master->ssid_bits;
2066
2067         smmu_domain->stall_enabled = master->stall_enabled;
2068
2069         ret = arm_smmu_alloc_cd_tables(smmu_domain);
2070         if (ret)
2071                 goto out_free_asid;
2072
2073         cfg->cd.asid    = (u16)asid;
2074         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2075         cfg->cd.tcr     = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2076                           FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2077                           FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2078                           FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2079                           FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2080                           FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2081                           CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2082         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2083
2084         /*
2085          * Note that this will end up calling arm_smmu_sync_cd() before
2086          * the master has been added to the devices list for this domain.
2087          * This isn't an issue because the STE hasn't been installed yet.
2088          */
2089         ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2090         if (ret)
2091                 goto out_free_cd_tables;
2092
2093         mutex_unlock(&arm_smmu_asid_lock);
2094         return 0;
2095
2096 out_free_cd_tables:
2097         arm_smmu_free_cd_tables(smmu_domain);
2098 out_free_asid:
2099         arm_smmu_free_asid(&cfg->cd);
2100 out_unlock:
2101         mutex_unlock(&arm_smmu_asid_lock);
2102         return ret;
2103 }
2104
2105 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2106                                        struct arm_smmu_master *master,
2107                                        struct io_pgtable_cfg *pgtbl_cfg)
2108 {
2109         int vmid;
2110         struct arm_smmu_device *smmu = smmu_domain->smmu;
2111         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2112         typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2113
2114         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2115         if (vmid < 0)
2116                 return vmid;
2117
2118         vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2119         cfg->vmid       = (u16)vmid;
2120         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2121         cfg->vtcr       = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2122                           FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2123                           FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2124                           FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2125                           FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2126                           FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2127                           FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2128         return 0;
2129 }
2130
2131 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2132                                     struct arm_smmu_master *master)
2133 {
2134         int ret;
2135         unsigned long ias, oas;
2136         enum io_pgtable_fmt fmt;
2137         struct io_pgtable_cfg pgtbl_cfg;
2138         struct io_pgtable_ops *pgtbl_ops;
2139         int (*finalise_stage_fn)(struct arm_smmu_domain *,
2140                                  struct arm_smmu_master *,
2141                                  struct io_pgtable_cfg *);
2142         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2143         struct arm_smmu_device *smmu = smmu_domain->smmu;
2144
2145         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2146                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2147                 return 0;
2148         }
2149
2150         /* Restrict the stage to what we can actually support */
2151         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2152                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2153         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2154                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2155
2156         switch (smmu_domain->stage) {
2157         case ARM_SMMU_DOMAIN_S1:
2158                 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2159                 ias = min_t(unsigned long, ias, VA_BITS);
2160                 oas = smmu->ias;
2161                 fmt = ARM_64_LPAE_S1;
2162                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2163                 break;
2164         case ARM_SMMU_DOMAIN_NESTED:
2165         case ARM_SMMU_DOMAIN_S2:
2166                 ias = smmu->ias;
2167                 oas = smmu->oas;
2168                 fmt = ARM_64_LPAE_S2;
2169                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2170                 break;
2171         default:
2172                 return -EINVAL;
2173         }
2174
2175         pgtbl_cfg = (struct io_pgtable_cfg) {
2176                 .pgsize_bitmap  = smmu->pgsize_bitmap,
2177                 .ias            = ias,
2178                 .oas            = oas,
2179                 .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2180                 .tlb            = &arm_smmu_flush_ops,
2181                 .iommu_dev      = smmu->dev,
2182         };
2183
2184         if (!iommu_get_dma_strict(domain))
2185                 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2186
2187         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2188         if (!pgtbl_ops)
2189                 return -ENOMEM;
2190
2191         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2192         domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2193         domain->geometry.force_aperture = true;
2194
2195         ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2196         if (ret < 0) {
2197                 free_io_pgtable_ops(pgtbl_ops);
2198                 return ret;
2199         }
2200
2201         smmu_domain->pgtbl_ops = pgtbl_ops;
2202         return 0;
2203 }
2204
2205 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2206 {
2207         __le64 *step;
2208         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2209
2210         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2211                 struct arm_smmu_strtab_l1_desc *l1_desc;
2212                 int idx;
2213
2214                 /* Two-level walk */
2215                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2216                 l1_desc = &cfg->l1_desc[idx];
2217                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2218                 step = &l1_desc->l2ptr[idx];
2219         } else {
2220                 /* Simple linear lookup */
2221                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2222         }
2223
2224         return step;
2225 }
2226
2227 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2228 {
2229         int i, j;
2230         struct arm_smmu_device *smmu = master->smmu;
2231
2232         for (i = 0; i < master->num_streams; ++i) {
2233                 u32 sid = master->streams[i].id;
2234                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2235
2236                 /* Bridged PCI devices may end up with duplicated IDs */
2237                 for (j = 0; j < i; j++)
2238                         if (master->streams[j].id == sid)
2239                                 break;
2240                 if (j < i)
2241                         continue;
2242
2243                 arm_smmu_write_strtab_ent(master, sid, step);
2244         }
2245 }
2246
2247 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2248 {
2249         struct device *dev = master->dev;
2250         struct arm_smmu_device *smmu = master->smmu;
2251         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2252
2253         if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2254                 return false;
2255
2256         if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2257                 return false;
2258
2259         return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2260 }
2261
2262 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2263 {
2264         size_t stu;
2265         struct pci_dev *pdev;
2266         struct arm_smmu_device *smmu = master->smmu;
2267         struct arm_smmu_domain *smmu_domain = master->domain;
2268
2269         /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2270         if (!master->ats_enabled)
2271                 return;
2272
2273         /* Smallest Translation Unit: log2 of the smallest supported granule */
2274         stu = __ffs(smmu->pgsize_bitmap);
2275         pdev = to_pci_dev(master->dev);
2276
2277         atomic_inc(&smmu_domain->nr_ats_masters);
2278         arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2279         if (pci_enable_ats(pdev, stu))
2280                 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2281 }
2282
2283 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2284 {
2285         struct arm_smmu_domain *smmu_domain = master->domain;
2286
2287         if (!master->ats_enabled)
2288                 return;
2289
2290         pci_disable_ats(to_pci_dev(master->dev));
2291         /*
2292          * Ensure ATS is disabled at the endpoint before we issue the
2293          * ATC invalidation via the SMMU.
2294          */
2295         wmb();
2296         arm_smmu_atc_inv_master(master);
2297         atomic_dec(&smmu_domain->nr_ats_masters);
2298 }
2299
2300 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2301 {
2302         int ret;
2303         int features;
2304         int num_pasids;
2305         struct pci_dev *pdev;
2306
2307         if (!dev_is_pci(master->dev))
2308                 return -ENODEV;
2309
2310         pdev = to_pci_dev(master->dev);
2311
2312         features = pci_pasid_features(pdev);
2313         if (features < 0)
2314                 return features;
2315
2316         num_pasids = pci_max_pasids(pdev);
2317         if (num_pasids <= 0)
2318                 return num_pasids;
2319
2320         ret = pci_enable_pasid(pdev, features);
2321         if (ret) {
2322                 dev_err(&pdev->dev, "Failed to enable PASID\n");
2323                 return ret;
2324         }
2325
2326         master->ssid_bits = min_t(u8, ilog2(num_pasids),
2327                                   master->smmu->ssid_bits);
2328         return 0;
2329 }
2330
2331 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2332 {
2333         struct pci_dev *pdev;
2334
2335         if (!dev_is_pci(master->dev))
2336                 return;
2337
2338         pdev = to_pci_dev(master->dev);
2339
2340         if (!pdev->pasid_enabled)
2341                 return;
2342
2343         master->ssid_bits = 0;
2344         pci_disable_pasid(pdev);
2345 }
2346
2347 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2348 {
2349         unsigned long flags;
2350         struct arm_smmu_domain *smmu_domain = master->domain;
2351
2352         if (!smmu_domain)
2353                 return;
2354
2355         arm_smmu_disable_ats(master);
2356
2357         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2358         list_del(&master->domain_head);
2359         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2360
2361         master->domain = NULL;
2362         master->ats_enabled = false;
2363         arm_smmu_install_ste_for_dev(master);
2364 }
2365
2366 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2367 {
2368         int ret = 0;
2369         unsigned long flags;
2370         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2371         struct arm_smmu_device *smmu;
2372         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2373         struct arm_smmu_master *master;
2374
2375         if (!fwspec)
2376                 return -ENOENT;
2377
2378         master = dev_iommu_priv_get(dev);
2379         smmu = master->smmu;
2380
2381         /*
2382          * Checking that SVA is disabled ensures that this device isn't bound to
2383          * any mm, and can be safely detached from its old domain. Bonds cannot
2384          * be removed concurrently since we're holding the group mutex.
2385          */
2386         if (arm_smmu_master_sva_enabled(master)) {
2387                 dev_err(dev, "cannot attach - SVA enabled\n");
2388                 return -EBUSY;
2389         }
2390
2391         arm_smmu_detach_dev(master);
2392
2393         mutex_lock(&smmu_domain->init_mutex);
2394
2395         if (!smmu_domain->smmu) {
2396                 smmu_domain->smmu = smmu;
2397                 ret = arm_smmu_domain_finalise(domain, master);
2398                 if (ret) {
2399                         smmu_domain->smmu = NULL;
2400                         goto out_unlock;
2401                 }
2402         } else if (smmu_domain->smmu != smmu) {
2403                 dev_err(dev,
2404                         "cannot attach to SMMU %s (upstream of %s)\n",
2405                         dev_name(smmu_domain->smmu->dev),
2406                         dev_name(smmu->dev));
2407                 ret = -ENXIO;
2408                 goto out_unlock;
2409         } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2410                    master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2411                 dev_err(dev,
2412                         "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2413                         smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2414                 ret = -EINVAL;
2415                 goto out_unlock;
2416         } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2417                    smmu_domain->stall_enabled != master->stall_enabled) {
2418                 dev_err(dev, "cannot attach to stall-%s domain\n",
2419                         smmu_domain->stall_enabled ? "enabled" : "disabled");
2420                 ret = -EINVAL;
2421                 goto out_unlock;
2422         }
2423
2424         master->domain = smmu_domain;
2425
2426         if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2427                 master->ats_enabled = arm_smmu_ats_supported(master);
2428
2429         arm_smmu_install_ste_for_dev(master);
2430
2431         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2432         list_add(&master->domain_head, &smmu_domain->devices);
2433         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2434
2435         arm_smmu_enable_ats(master);
2436
2437 out_unlock:
2438         mutex_unlock(&smmu_domain->init_mutex);
2439         return ret;
2440 }
2441
2442 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2443                         phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2444 {
2445         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2446
2447         if (!ops)
2448                 return -ENODEV;
2449
2450         return ops->map(ops, iova, paddr, size, prot, gfp);
2451 }
2452
2453 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2454                              size_t size, struct iommu_iotlb_gather *gather)
2455 {
2456         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2457         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2458
2459         if (!ops)
2460                 return 0;
2461
2462         return ops->unmap(ops, iova, size, gather);
2463 }
2464
2465 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2466 {
2467         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2468
2469         if (smmu_domain->smmu)
2470                 arm_smmu_tlb_inv_context(smmu_domain);
2471 }
2472
2473 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2474                                 struct iommu_iotlb_gather *gather)
2475 {
2476         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2477
2478         if (!gather->pgsize)
2479                 return;
2480
2481         arm_smmu_tlb_inv_range_domain(gather->start,
2482                                       gather->end - gather->start + 1,
2483                                       gather->pgsize, true, smmu_domain);
2484 }
2485
2486 static phys_addr_t
2487 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2488 {
2489         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2490
2491         if (domain->type == IOMMU_DOMAIN_IDENTITY)
2492                 return iova;
2493
2494         if (!ops)
2495                 return 0;
2496
2497         return ops->iova_to_phys(ops, iova);
2498 }
2499
2500 static struct platform_driver arm_smmu_driver;
2501
2502 static
2503 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2504 {
2505         struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2506                                                           fwnode);
2507         put_device(dev);
2508         return dev ? dev_get_drvdata(dev) : NULL;
2509 }
2510
2511 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2512 {
2513         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2514
2515         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2516                 limit *= 1UL << STRTAB_SPLIT;
2517
2518         return sid < limit;
2519 }
2520
2521 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2522                                   struct arm_smmu_master *master)
2523 {
2524         int i;
2525         int ret = 0;
2526         struct arm_smmu_stream *new_stream, *cur_stream;
2527         struct rb_node **new_node, *parent_node = NULL;
2528         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2529
2530         master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2531                                   GFP_KERNEL);
2532         if (!master->streams)
2533                 return -ENOMEM;
2534         master->num_streams = fwspec->num_ids;
2535
2536         mutex_lock(&smmu->streams_mutex);
2537         for (i = 0; i < fwspec->num_ids; i++) {
2538                 u32 sid = fwspec->ids[i];
2539
2540                 new_stream = &master->streams[i];
2541                 new_stream->id = sid;
2542                 new_stream->master = master;
2543
2544                 /*
2545                  * Check the SIDs are in range of the SMMU and our stream table
2546                  */
2547                 if (!arm_smmu_sid_in_range(smmu, sid)) {
2548                         ret = -ERANGE;
2549                         break;
2550                 }
2551
2552                 /* Ensure l2 strtab is initialised */
2553                 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2554                         ret = arm_smmu_init_l2_strtab(smmu, sid);
2555                         if (ret)
2556                                 break;
2557                 }
2558
2559                 /* Insert into SID tree */
2560                 new_node = &(smmu->streams.rb_node);
2561                 while (*new_node) {
2562                         cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2563                                               node);
2564                         parent_node = *new_node;
2565                         if (cur_stream->id > new_stream->id) {
2566                                 new_node = &((*new_node)->rb_left);
2567                         } else if (cur_stream->id < new_stream->id) {
2568                                 new_node = &((*new_node)->rb_right);
2569                         } else {
2570                                 dev_warn(master->dev,
2571                                          "stream %u already in tree\n",
2572                                          cur_stream->id);
2573                                 ret = -EINVAL;
2574                                 break;
2575                         }
2576                 }
2577                 if (ret)
2578                         break;
2579
2580                 rb_link_node(&new_stream->node, parent_node, new_node);
2581                 rb_insert_color(&new_stream->node, &smmu->streams);
2582         }
2583
2584         if (ret) {
2585                 for (i--; i >= 0; i--)
2586                         rb_erase(&master->streams[i].node, &smmu->streams);
2587                 kfree(master->streams);
2588         }
2589         mutex_unlock(&smmu->streams_mutex);
2590
2591         return ret;
2592 }
2593
2594 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2595 {
2596         int i;
2597         struct arm_smmu_device *smmu = master->smmu;
2598         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2599
2600         if (!smmu || !master->streams)
2601                 return;
2602
2603         mutex_lock(&smmu->streams_mutex);
2604         for (i = 0; i < fwspec->num_ids; i++)
2605                 rb_erase(&master->streams[i].node, &smmu->streams);
2606         mutex_unlock(&smmu->streams_mutex);
2607
2608         kfree(master->streams);
2609 }
2610
2611 static struct iommu_ops arm_smmu_ops;
2612
2613 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2614 {
2615         int ret;
2616         struct arm_smmu_device *smmu;
2617         struct arm_smmu_master *master;
2618         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2619
2620         if (!fwspec || fwspec->ops != &arm_smmu_ops)
2621                 return ERR_PTR(-ENODEV);
2622
2623         if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2624                 return ERR_PTR(-EBUSY);
2625
2626         smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2627         if (!smmu)
2628                 return ERR_PTR(-ENODEV);
2629
2630         master = kzalloc(sizeof(*master), GFP_KERNEL);
2631         if (!master)
2632                 return ERR_PTR(-ENOMEM);
2633
2634         master->dev = dev;
2635         master->smmu = smmu;
2636         INIT_LIST_HEAD(&master->bonds);
2637         dev_iommu_priv_set(dev, master);
2638
2639         ret = arm_smmu_insert_master(smmu, master);
2640         if (ret)
2641                 goto err_free_master;
2642
2643         device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2644         master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2645
2646         /*
2647          * Note that PASID must be enabled before, and disabled after ATS:
2648          * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2649          *
2650          *   Behavior is undefined if this bit is Set and the value of the PASID
2651          *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2652          *   are changed.
2653          */
2654         arm_smmu_enable_pasid(master);
2655
2656         if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2657                 master->ssid_bits = min_t(u8, master->ssid_bits,
2658                                           CTXDESC_LINEAR_CDMAX);
2659
2660         if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2661              device_property_read_bool(dev, "dma-can-stall")) ||
2662             smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2663                 master->stall_enabled = true;
2664
2665         return &smmu->iommu;
2666
2667 err_free_master:
2668         kfree(master);
2669         dev_iommu_priv_set(dev, NULL);
2670         return ERR_PTR(ret);
2671 }
2672
2673 static void arm_smmu_release_device(struct device *dev)
2674 {
2675         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2676         struct arm_smmu_master *master;
2677
2678         if (!fwspec || fwspec->ops != &arm_smmu_ops)
2679                 return;
2680
2681         master = dev_iommu_priv_get(dev);
2682         if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2683                 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2684         arm_smmu_detach_dev(master);
2685         arm_smmu_disable_pasid(master);
2686         arm_smmu_remove_master(master);
2687         kfree(master);
2688         iommu_fwspec_free(dev);
2689 }
2690
2691 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2692 {
2693         struct iommu_group *group;
2694
2695         /*
2696          * We don't support devices sharing stream IDs other than PCI RID
2697          * aliases, since the necessary ID-to-device lookup becomes rather
2698          * impractical given a potential sparse 32-bit stream ID space.
2699          */
2700         if (dev_is_pci(dev))
2701                 group = pci_device_group(dev);
2702         else
2703                 group = generic_device_group(dev);
2704
2705         return group;
2706 }
2707
2708 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2709 {
2710         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2711         int ret = 0;
2712
2713         mutex_lock(&smmu_domain->init_mutex);
2714         if (smmu_domain->smmu)
2715                 ret = -EPERM;
2716         else
2717                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2718         mutex_unlock(&smmu_domain->init_mutex);
2719
2720         return ret;
2721 }
2722
2723 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2724 {
2725         return iommu_fwspec_add_ids(dev, args->args, 1);
2726 }
2727
2728 static void arm_smmu_get_resv_regions(struct device *dev,
2729                                       struct list_head *head)
2730 {
2731         struct iommu_resv_region *region;
2732         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2733
2734         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2735                                          prot, IOMMU_RESV_SW_MSI);
2736         if (!region)
2737                 return;
2738
2739         list_add_tail(&region->list, head);
2740
2741         iommu_dma_get_resv_regions(dev, head);
2742 }
2743
2744 static bool arm_smmu_dev_has_feature(struct device *dev,
2745                                      enum iommu_dev_features feat)
2746 {
2747         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2748
2749         if (!master)
2750                 return false;
2751
2752         switch (feat) {
2753         case IOMMU_DEV_FEAT_IOPF:
2754                 return arm_smmu_master_iopf_supported(master);
2755         case IOMMU_DEV_FEAT_SVA:
2756                 return arm_smmu_master_sva_supported(master);
2757         default:
2758                 return false;
2759         }
2760 }
2761
2762 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2763                                          enum iommu_dev_features feat)
2764 {
2765         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2766
2767         if (!master)
2768                 return false;
2769
2770         switch (feat) {
2771         case IOMMU_DEV_FEAT_IOPF:
2772                 return master->iopf_enabled;
2773         case IOMMU_DEV_FEAT_SVA:
2774                 return arm_smmu_master_sva_enabled(master);
2775         default:
2776                 return false;
2777         }
2778 }
2779
2780 static int arm_smmu_dev_enable_feature(struct device *dev,
2781                                        enum iommu_dev_features feat)
2782 {
2783         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2784
2785         if (!arm_smmu_dev_has_feature(dev, feat))
2786                 return -ENODEV;
2787
2788         if (arm_smmu_dev_feature_enabled(dev, feat))
2789                 return -EBUSY;
2790
2791         switch (feat) {
2792         case IOMMU_DEV_FEAT_IOPF:
2793                 master->iopf_enabled = true;
2794                 return 0;
2795         case IOMMU_DEV_FEAT_SVA:
2796                 return arm_smmu_master_enable_sva(master);
2797         default:
2798                 return -EINVAL;
2799         }
2800 }
2801
2802 static int arm_smmu_dev_disable_feature(struct device *dev,
2803                                         enum iommu_dev_features feat)
2804 {
2805         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2806
2807         if (!arm_smmu_dev_feature_enabled(dev, feat))
2808                 return -EINVAL;
2809
2810         switch (feat) {
2811         case IOMMU_DEV_FEAT_IOPF:
2812                 if (master->sva_enabled)
2813                         return -EBUSY;
2814                 master->iopf_enabled = false;
2815                 return 0;
2816         case IOMMU_DEV_FEAT_SVA:
2817                 return arm_smmu_master_disable_sva(master);
2818         default:
2819                 return -EINVAL;
2820         }
2821 }
2822
2823 static struct iommu_ops arm_smmu_ops = {
2824         .capable                = arm_smmu_capable,
2825         .domain_alloc           = arm_smmu_domain_alloc,
2826         .domain_free            = arm_smmu_domain_free,
2827         .attach_dev             = arm_smmu_attach_dev,
2828         .map                    = arm_smmu_map,
2829         .unmap                  = arm_smmu_unmap,
2830         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
2831         .iotlb_sync             = arm_smmu_iotlb_sync,
2832         .iova_to_phys           = arm_smmu_iova_to_phys,
2833         .probe_device           = arm_smmu_probe_device,
2834         .release_device         = arm_smmu_release_device,
2835         .device_group           = arm_smmu_device_group,
2836         .enable_nesting         = arm_smmu_enable_nesting,
2837         .of_xlate               = arm_smmu_of_xlate,
2838         .get_resv_regions       = arm_smmu_get_resv_regions,
2839         .put_resv_regions       = generic_iommu_put_resv_regions,
2840         .dev_has_feat           = arm_smmu_dev_has_feature,
2841         .dev_feat_enabled       = arm_smmu_dev_feature_enabled,
2842         .dev_enable_feat        = arm_smmu_dev_enable_feature,
2843         .dev_disable_feat       = arm_smmu_dev_disable_feature,
2844         .sva_bind               = arm_smmu_sva_bind,
2845         .sva_unbind             = arm_smmu_sva_unbind,
2846         .sva_get_pasid          = arm_smmu_sva_get_pasid,
2847         .page_response          = arm_smmu_page_response,
2848         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
2849         .owner                  = THIS_MODULE,
2850 };
2851
2852 /* Probing and initialisation functions */
2853 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2854                                    struct arm_smmu_queue *q,
2855                                    void __iomem *page,
2856                                    unsigned long prod_off,
2857                                    unsigned long cons_off,
2858                                    size_t dwords, const char *name)
2859 {
2860         size_t qsz;
2861
2862         do {
2863                 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2864                 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2865                                               GFP_KERNEL);
2866                 if (q->base || qsz < PAGE_SIZE)
2867                         break;
2868
2869                 q->llq.max_n_shift--;
2870         } while (1);
2871
2872         if (!q->base) {
2873                 dev_err(smmu->dev,
2874                         "failed to allocate queue (0x%zx bytes) for %s\n",
2875                         qsz, name);
2876                 return -ENOMEM;
2877         }
2878
2879         if (!WARN_ON(q->base_dma & (qsz - 1))) {
2880                 dev_info(smmu->dev, "allocated %u entries for %s\n",
2881                          1 << q->llq.max_n_shift, name);
2882         }
2883
2884         q->prod_reg     = page + prod_off;
2885         q->cons_reg     = page + cons_off;
2886         q->ent_dwords   = dwords;
2887
2888         q->q_base  = Q_BASE_RWA;
2889         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2890         q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2891
2892         q->llq.prod = q->llq.cons = 0;
2893         return 0;
2894 }
2895
2896 static void arm_smmu_cmdq_free_bitmap(void *data)
2897 {
2898         unsigned long *bitmap = data;
2899         bitmap_free(bitmap);
2900 }
2901
2902 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2903 {
2904         int ret = 0;
2905         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2906         unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2907         atomic_long_t *bitmap;
2908
2909         atomic_set(&cmdq->owner_prod, 0);
2910         atomic_set(&cmdq->lock, 0);
2911
2912         bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2913         if (!bitmap) {
2914                 dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2915                 ret = -ENOMEM;
2916         } else {
2917                 cmdq->valid_map = bitmap;
2918                 devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2919         }
2920
2921         return ret;
2922 }
2923
2924 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2925 {
2926         int ret;
2927
2928         /* cmdq */
2929         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2930                                       ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2931                                       CMDQ_ENT_DWORDS, "cmdq");
2932         if (ret)
2933                 return ret;
2934
2935         ret = arm_smmu_cmdq_init(smmu);
2936         if (ret)
2937                 return ret;
2938
2939         /* evtq */
2940         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2941                                       ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2942                                       EVTQ_ENT_DWORDS, "evtq");
2943         if (ret)
2944                 return ret;
2945
2946         if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2947             (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2948                 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2949                 if (!smmu->evtq.iopf)
2950                         return -ENOMEM;
2951         }
2952
2953         /* priq */
2954         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2955                 return 0;
2956
2957         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2958                                        ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2959                                        PRIQ_ENT_DWORDS, "priq");
2960 }
2961
2962 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2963 {
2964         unsigned int i;
2965         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2966         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2967         void *strtab = smmu->strtab_cfg.strtab;
2968
2969         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2970         if (!cfg->l1_desc)
2971                 return -ENOMEM;
2972
2973         for (i = 0; i < cfg->num_l1_ents; ++i) {
2974                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2975                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2976         }
2977
2978         return 0;
2979 }
2980
2981 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2982 {
2983         void *strtab;
2984         u64 reg;
2985         u32 size, l1size;
2986         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2987
2988         /* Calculate the L1 size, capped to the SIDSIZE. */
2989         size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2990         size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2991         cfg->num_l1_ents = 1 << size;
2992
2993         size += STRTAB_SPLIT;
2994         if (size < smmu->sid_bits)
2995                 dev_warn(smmu->dev,
2996                          "2-level strtab only covers %u/%u bits of SID\n",
2997                          size, smmu->sid_bits);
2998
2999         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3000         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3001                                      GFP_KERNEL);
3002         if (!strtab) {
3003                 dev_err(smmu->dev,
3004                         "failed to allocate l1 stream table (%u bytes)\n",
3005                         l1size);
3006                 return -ENOMEM;
3007         }
3008         cfg->strtab = strtab;
3009
3010         /* Configure strtab_base_cfg for 2 levels */
3011         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3012         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3013         reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3014         cfg->strtab_base_cfg = reg;
3015
3016         return arm_smmu_init_l1_strtab(smmu);
3017 }
3018
3019 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3020 {
3021         void *strtab;
3022         u64 reg;
3023         u32 size;
3024         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3025
3026         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3027         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3028                                      GFP_KERNEL);
3029         if (!strtab) {
3030                 dev_err(smmu->dev,
3031                         "failed to allocate linear stream table (%u bytes)\n",
3032                         size);
3033                 return -ENOMEM;
3034         }
3035         cfg->strtab = strtab;
3036         cfg->num_l1_ents = 1 << smmu->sid_bits;
3037
3038         /* Configure strtab_base_cfg for a linear table covering all SIDs */
3039         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3040         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3041         cfg->strtab_base_cfg = reg;
3042
3043         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3044         return 0;
3045 }
3046
3047 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3048 {
3049         u64 reg;
3050         int ret;
3051
3052         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3053                 ret = arm_smmu_init_strtab_2lvl(smmu);
3054         else
3055                 ret = arm_smmu_init_strtab_linear(smmu);
3056
3057         if (ret)
3058                 return ret;
3059
3060         /* Set the strtab base address */
3061         reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3062         reg |= STRTAB_BASE_RA;
3063         smmu->strtab_cfg.strtab_base = reg;
3064
3065         /* Allocate the first VMID for stage-2 bypass STEs */
3066         set_bit(0, smmu->vmid_map);
3067         return 0;
3068 }
3069
3070 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3071 {
3072         int ret;
3073
3074         mutex_init(&smmu->streams_mutex);
3075         smmu->streams = RB_ROOT;
3076
3077         ret = arm_smmu_init_queues(smmu);
3078         if (ret)
3079                 return ret;
3080
3081         return arm_smmu_init_strtab(smmu);
3082 }
3083
3084 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3085                                    unsigned int reg_off, unsigned int ack_off)
3086 {
3087         u32 reg;
3088
3089         writel_relaxed(val, smmu->base + reg_off);
3090         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3091                                           1, ARM_SMMU_POLL_TIMEOUT_US);
3092 }
3093
3094 /* GBPA is "special" */
3095 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3096 {
3097         int ret;
3098         u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3099
3100         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3101                                          1, ARM_SMMU_POLL_TIMEOUT_US);
3102         if (ret)
3103                 return ret;
3104
3105         reg &= ~clr;
3106         reg |= set;
3107         writel_relaxed(reg | GBPA_UPDATE, gbpa);
3108         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3109                                          1, ARM_SMMU_POLL_TIMEOUT_US);
3110
3111         if (ret)
3112                 dev_err(smmu->dev, "GBPA not responding to update\n");
3113         return ret;
3114 }
3115
3116 static void arm_smmu_free_msis(void *data)
3117 {
3118         struct device *dev = data;
3119         platform_msi_domain_free_irqs(dev);
3120 }
3121
3122 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3123 {
3124         phys_addr_t doorbell;
3125         struct device *dev = msi_desc_to_dev(desc);
3126         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3127         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3128
3129         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3130         doorbell &= MSI_CFG0_ADDR_MASK;
3131
3132         writeq_relaxed(doorbell, smmu->base + cfg[0]);
3133         writel_relaxed(msg->data, smmu->base + cfg[1]);
3134         writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3135 }
3136
3137 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3138 {
3139         struct msi_desc *desc;
3140         int ret, nvec = ARM_SMMU_MAX_MSIS;
3141         struct device *dev = smmu->dev;
3142
3143         /* Clear the MSI address regs */
3144         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3145         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3146
3147         if (smmu->features & ARM_SMMU_FEAT_PRI)
3148                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3149         else
3150                 nvec--;
3151
3152         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3153                 return;
3154
3155         if (!dev->msi_domain) {
3156                 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3157                 return;
3158         }
3159
3160         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3161         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3162         if (ret) {
3163                 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3164                 return;
3165         }
3166
3167         for_each_msi_entry(desc, dev) {
3168                 switch (desc->platform.msi_index) {
3169                 case EVTQ_MSI_INDEX:
3170                         smmu->evtq.q.irq = desc->irq;
3171                         break;
3172                 case GERROR_MSI_INDEX:
3173                         smmu->gerr_irq = desc->irq;
3174                         break;
3175                 case PRIQ_MSI_INDEX:
3176                         smmu->priq.q.irq = desc->irq;
3177                         break;
3178                 default:        /* Unknown */
3179                         continue;
3180                 }
3181         }
3182
3183         /* Add callback to free MSIs on teardown */
3184         devm_add_action(dev, arm_smmu_free_msis, dev);
3185 }
3186
3187 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3188 {
3189         int irq, ret;
3190
3191         arm_smmu_setup_msis(smmu);
3192
3193         /* Request interrupt lines */
3194         irq = smmu->evtq.q.irq;
3195         if (irq) {
3196                 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3197                                                 arm_smmu_evtq_thread,
3198                                                 IRQF_ONESHOT,
3199                                                 "arm-smmu-v3-evtq", smmu);
3200                 if (ret < 0)
3201                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
3202         } else {
3203                 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3204         }
3205
3206         irq = smmu->gerr_irq;
3207         if (irq) {
3208                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3209                                        0, "arm-smmu-v3-gerror", smmu);
3210                 if (ret < 0)
3211                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
3212         } else {
3213                 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3214         }
3215
3216         if (smmu->features & ARM_SMMU_FEAT_PRI) {
3217                 irq = smmu->priq.q.irq;
3218                 if (irq) {
3219                         ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3220                                                         arm_smmu_priq_thread,
3221                                                         IRQF_ONESHOT,
3222                                                         "arm-smmu-v3-priq",
3223                                                         smmu);
3224                         if (ret < 0)
3225                                 dev_warn(smmu->dev,
3226                                          "failed to enable priq irq\n");
3227                 } else {
3228                         dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3229                 }
3230         }
3231 }
3232
3233 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3234 {
3235         int ret, irq;
3236         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3237
3238         /* Disable IRQs first */
3239         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3240                                       ARM_SMMU_IRQ_CTRLACK);
3241         if (ret) {
3242                 dev_err(smmu->dev, "failed to disable irqs\n");
3243                 return ret;
3244         }
3245
3246         irq = smmu->combined_irq;
3247         if (irq) {
3248                 /*
3249                  * Cavium ThunderX2 implementation doesn't support unique irq
3250                  * lines. Use a single irq line for all the SMMUv3 interrupts.
3251                  */
3252                 ret = devm_request_threaded_irq(smmu->dev, irq,
3253                                         arm_smmu_combined_irq_handler,
3254                                         arm_smmu_combined_irq_thread,
3255                                         IRQF_ONESHOT,
3256                                         "arm-smmu-v3-combined-irq", smmu);
3257                 if (ret < 0)
3258                         dev_warn(smmu->dev, "failed to enable combined irq\n");
3259         } else
3260                 arm_smmu_setup_unique_irqs(smmu);
3261
3262         if (smmu->features & ARM_SMMU_FEAT_PRI)
3263                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3264
3265         /* Enable interrupt generation on the SMMU */
3266         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3267                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3268         if (ret)
3269                 dev_warn(smmu->dev, "failed to enable irqs\n");
3270
3271         return 0;
3272 }
3273
3274 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3275 {
3276         int ret;
3277
3278         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3279         if (ret)
3280                 dev_err(smmu->dev, "failed to clear cr0\n");
3281
3282         return ret;
3283 }
3284
3285 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3286 {
3287         int ret;
3288         u32 reg, enables;
3289         struct arm_smmu_cmdq_ent cmd;
3290
3291         /* Clear CR0 and sync (disables SMMU and queue processing) */
3292         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3293         if (reg & CR0_SMMUEN) {
3294                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3295                 WARN_ON(is_kdump_kernel() && !disable_bypass);
3296                 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3297         }
3298
3299         ret = arm_smmu_device_disable(smmu);
3300         if (ret)
3301                 return ret;
3302
3303         /* CR1 (table and queue memory attributes) */
3304         reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3305               FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3306               FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3307               FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3308               FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3309               FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3310         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3311
3312         /* CR2 (random crap) */
3313         reg = CR2_PTM | CR2_RECINVSID;
3314
3315         if (smmu->features & ARM_SMMU_FEAT_E2H)
3316                 reg |= CR2_E2H;
3317
3318         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3319
3320         /* Stream table */
3321         writeq_relaxed(smmu->strtab_cfg.strtab_base,
3322                        smmu->base + ARM_SMMU_STRTAB_BASE);
3323         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3324                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3325
3326         /* Command queue */
3327         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3328         writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3329         writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3330
3331         enables = CR0_CMDQEN;
3332         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3333                                       ARM_SMMU_CR0ACK);
3334         if (ret) {
3335                 dev_err(smmu->dev, "failed to enable command queue\n");
3336                 return ret;
3337         }
3338
3339         /* Invalidate any cached configuration */
3340         cmd.opcode = CMDQ_OP_CFGI_ALL;
3341         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3342         arm_smmu_cmdq_issue_sync(smmu);
3343
3344         /* Invalidate any stale TLB entries */
3345         if (smmu->features & ARM_SMMU_FEAT_HYP) {
3346                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3347                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3348         }
3349
3350         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3351         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3352         arm_smmu_cmdq_issue_sync(smmu);
3353
3354         /* Event queue */
3355         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3356         writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3357         writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3358
3359         enables |= CR0_EVTQEN;
3360         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3361                                       ARM_SMMU_CR0ACK);
3362         if (ret) {
3363                 dev_err(smmu->dev, "failed to enable event queue\n");
3364                 return ret;
3365         }
3366
3367         /* PRI queue */
3368         if (smmu->features & ARM_SMMU_FEAT_PRI) {
3369                 writeq_relaxed(smmu->priq.q.q_base,
3370                                smmu->base + ARM_SMMU_PRIQ_BASE);
3371                 writel_relaxed(smmu->priq.q.llq.prod,
3372                                smmu->page1 + ARM_SMMU_PRIQ_PROD);
3373                 writel_relaxed(smmu->priq.q.llq.cons,
3374                                smmu->page1 + ARM_SMMU_PRIQ_CONS);
3375
3376                 enables |= CR0_PRIQEN;
3377                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3378                                               ARM_SMMU_CR0ACK);
3379                 if (ret) {
3380                         dev_err(smmu->dev, "failed to enable PRI queue\n");
3381                         return ret;
3382                 }
3383         }
3384
3385         if (smmu->features & ARM_SMMU_FEAT_ATS) {
3386                 enables |= CR0_ATSCHK;
3387                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3388                                               ARM_SMMU_CR0ACK);
3389                 if (ret) {
3390                         dev_err(smmu->dev, "failed to enable ATS check\n");
3391                         return ret;
3392                 }
3393         }
3394
3395         ret = arm_smmu_setup_irqs(smmu);
3396         if (ret) {
3397                 dev_err(smmu->dev, "failed to setup irqs\n");
3398                 return ret;
3399         }
3400
3401         if (is_kdump_kernel())
3402                 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3403
3404         /* Enable the SMMU interface, or ensure bypass */
3405         if (!bypass || disable_bypass) {
3406                 enables |= CR0_SMMUEN;
3407         } else {
3408                 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3409                 if (ret)
3410                         return ret;
3411         }
3412         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3413                                       ARM_SMMU_CR0ACK);
3414         if (ret) {
3415                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3416                 return ret;
3417         }
3418
3419         return 0;
3420 }
3421
3422 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3423 {
3424         u32 reg;
3425         bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3426
3427         /* IDR0 */
3428         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3429
3430         /* 2-level structures */
3431         if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3432                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3433
3434         if (reg & IDR0_CD2L)
3435                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3436
3437         /*
3438          * Translation table endianness.
3439          * We currently require the same endianness as the CPU, but this
3440          * could be changed later by adding a new IO_PGTABLE_QUIRK.
3441          */
3442         switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3443         case IDR0_TTENDIAN_MIXED:
3444                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3445                 break;
3446 #ifdef __BIG_ENDIAN
3447         case IDR0_TTENDIAN_BE:
3448                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3449                 break;
3450 #else
3451         case IDR0_TTENDIAN_LE:
3452                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3453                 break;
3454 #endif
3455         default:
3456                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3457                 return -ENXIO;
3458         }
3459
3460         /* Boolean feature flags */
3461         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3462                 smmu->features |= ARM_SMMU_FEAT_PRI;
3463
3464         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3465                 smmu->features |= ARM_SMMU_FEAT_ATS;
3466
3467         if (reg & IDR0_SEV)
3468                 smmu->features |= ARM_SMMU_FEAT_SEV;
3469
3470         if (reg & IDR0_MSI) {
3471                 smmu->features |= ARM_SMMU_FEAT_MSI;
3472                 if (coherent && !disable_msipolling)
3473                         smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3474         }
3475
3476         if (reg & IDR0_HYP) {
3477                 smmu->features |= ARM_SMMU_FEAT_HYP;
3478                 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3479                         smmu->features |= ARM_SMMU_FEAT_E2H;
3480         }
3481
3482         /*
3483          * The coherency feature as set by FW is used in preference to the ID
3484          * register, but warn on mismatch.
3485          */
3486         if (!!(reg & IDR0_COHACC) != coherent)
3487                 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3488                          coherent ? "true" : "false");
3489
3490         switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3491         case IDR0_STALL_MODEL_FORCE:
3492                 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3493                 fallthrough;
3494         case IDR0_STALL_MODEL_STALL:
3495                 smmu->features |= ARM_SMMU_FEAT_STALLS;
3496         }
3497
3498         if (reg & IDR0_S1P)
3499                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3500
3501         if (reg & IDR0_S2P)
3502                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3503
3504         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3505                 dev_err(smmu->dev, "no translation support!\n");
3506                 return -ENXIO;
3507         }
3508
3509         /* We only support the AArch64 table format at present */
3510         switch (FIELD_GET(IDR0_TTF, reg)) {
3511         case IDR0_TTF_AARCH32_64:
3512                 smmu->ias = 40;
3513                 fallthrough;
3514         case IDR0_TTF_AARCH64:
3515                 break;
3516         default:
3517                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3518                 return -ENXIO;
3519         }
3520
3521         /* ASID/VMID sizes */
3522         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3523         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3524
3525         /* IDR1 */
3526         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3527         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3528                 dev_err(smmu->dev, "embedded implementation not supported\n");
3529                 return -ENXIO;
3530         }
3531
3532         /* Queue sizes, capped to ensure natural alignment */
3533         smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3534                                              FIELD_GET(IDR1_CMDQS, reg));
3535         if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3536                 /*
3537                  * We don't support splitting up batches, so one batch of
3538                  * commands plus an extra sync needs to fit inside the command
3539                  * queue. There's also no way we can handle the weird alignment
3540                  * restrictions on the base pointer for a unit-length queue.
3541                  */
3542                 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3543                         CMDQ_BATCH_ENTRIES);
3544                 return -ENXIO;
3545         }
3546
3547         smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3548                                              FIELD_GET(IDR1_EVTQS, reg));
3549         smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3550                                              FIELD_GET(IDR1_PRIQS, reg));
3551
3552         /* SID/SSID sizes */
3553         smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3554         smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3555
3556         /*
3557          * If the SMMU supports fewer bits than would fill a single L2 stream
3558          * table, use a linear table instead.
3559          */
3560         if (smmu->sid_bits <= STRTAB_SPLIT)
3561                 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3562
3563         /* IDR3 */
3564         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3565         if (FIELD_GET(IDR3_RIL, reg))
3566                 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3567
3568         /* IDR5 */
3569         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3570
3571         /* Maximum number of outstanding stalls */
3572         smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3573
3574         /* Page sizes */
3575         if (reg & IDR5_GRAN64K)
3576                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3577         if (reg & IDR5_GRAN16K)
3578                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3579         if (reg & IDR5_GRAN4K)
3580                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3581
3582         /* Input address size */
3583         if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3584                 smmu->features |= ARM_SMMU_FEAT_VAX;
3585
3586         /* Output address size */
3587         switch (FIELD_GET(IDR5_OAS, reg)) {
3588         case IDR5_OAS_32_BIT:
3589                 smmu->oas = 32;
3590                 break;
3591         case IDR5_OAS_36_BIT:
3592                 smmu->oas = 36;
3593                 break;
3594         case IDR5_OAS_40_BIT:
3595                 smmu->oas = 40;
3596                 break;
3597         case IDR5_OAS_42_BIT:
3598                 smmu->oas = 42;
3599                 break;
3600         case IDR5_OAS_44_BIT:
3601                 smmu->oas = 44;
3602                 break;
3603         case IDR5_OAS_52_BIT:
3604                 smmu->oas = 52;
3605                 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3606                 break;
3607         default:
3608                 dev_info(smmu->dev,
3609                         "unknown output address size. Truncating to 48-bit\n");
3610                 fallthrough;
3611         case IDR5_OAS_48_BIT:
3612                 smmu->oas = 48;
3613         }
3614
3615         if (arm_smmu_ops.pgsize_bitmap == -1UL)
3616                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3617         else
3618                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3619
3620         /* Set the DMA mask for our table walker */
3621         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3622                 dev_warn(smmu->dev,
3623                          "failed to set DMA mask for table walker\n");
3624
3625         smmu->ias = max(smmu->ias, smmu->oas);
3626
3627         if (arm_smmu_sva_supported(smmu))
3628                 smmu->features |= ARM_SMMU_FEAT_SVA;
3629
3630         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3631                  smmu->ias, smmu->oas, smmu->features);
3632         return 0;
3633 }
3634
3635 #ifdef CONFIG_ACPI
3636 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3637 {
3638         switch (model) {
3639         case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3640                 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3641                 break;
3642         case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3643                 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3644                 break;
3645         }
3646
3647         dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3648 }
3649
3650 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3651                                       struct arm_smmu_device *smmu)
3652 {
3653         struct acpi_iort_smmu_v3 *iort_smmu;
3654         struct device *dev = smmu->dev;
3655         struct acpi_iort_node *node;
3656
3657         node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3658
3659         /* Retrieve SMMUv3 specific data */
3660         iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3661
3662         acpi_smmu_get_options(iort_smmu->model, smmu);
3663
3664         if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3665                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3666
3667         return 0;
3668 }
3669 #else
3670 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3671                                              struct arm_smmu_device *smmu)
3672 {
3673         return -ENODEV;
3674 }
3675 #endif
3676
3677 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3678                                     struct arm_smmu_device *smmu)
3679 {
3680         struct device *dev = &pdev->dev;
3681         u32 cells;
3682         int ret = -EINVAL;
3683
3684         if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3685                 dev_err(dev, "missing #iommu-cells property\n");
3686         else if (cells != 1)
3687                 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3688         else
3689                 ret = 0;
3690
3691         parse_driver_options(smmu);
3692
3693         if (of_dma_is_coherent(dev->of_node))
3694                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3695
3696         return ret;
3697 }
3698
3699 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3700 {
3701         if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3702                 return SZ_64K;
3703         else
3704                 return SZ_128K;
3705 }
3706
3707 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3708 {
3709         int err;
3710
3711 #ifdef CONFIG_PCI
3712         if (pci_bus_type.iommu_ops != ops) {
3713                 err = bus_set_iommu(&pci_bus_type, ops);
3714                 if (err)
3715                         return err;
3716         }
3717 #endif
3718 #ifdef CONFIG_ARM_AMBA
3719         if (amba_bustype.iommu_ops != ops) {
3720                 err = bus_set_iommu(&amba_bustype, ops);
3721                 if (err)
3722                         goto err_reset_pci_ops;
3723         }
3724 #endif
3725         if (platform_bus_type.iommu_ops != ops) {
3726                 err = bus_set_iommu(&platform_bus_type, ops);
3727                 if (err)
3728                         goto err_reset_amba_ops;
3729         }
3730
3731         return 0;
3732
3733 err_reset_amba_ops:
3734 #ifdef CONFIG_ARM_AMBA
3735         bus_set_iommu(&amba_bustype, NULL);
3736 #endif
3737 err_reset_pci_ops: __maybe_unused;
3738 #ifdef CONFIG_PCI
3739         bus_set_iommu(&pci_bus_type, NULL);
3740 #endif
3741         return err;
3742 }
3743
3744 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3745                                       resource_size_t size)
3746 {
3747         struct resource res = DEFINE_RES_MEM(start, size);
3748
3749         return devm_ioremap_resource(dev, &res);
3750 }
3751
3752 static int arm_smmu_device_probe(struct platform_device *pdev)
3753 {
3754         int irq, ret;
3755         struct resource *res;
3756         resource_size_t ioaddr;
3757         struct arm_smmu_device *smmu;
3758         struct device *dev = &pdev->dev;
3759         bool bypass;
3760
3761         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3762         if (!smmu)
3763                 return -ENOMEM;
3764         smmu->dev = dev;
3765
3766         if (dev->of_node) {
3767                 ret = arm_smmu_device_dt_probe(pdev, smmu);
3768         } else {
3769                 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3770                 if (ret == -ENODEV)
3771                         return ret;
3772         }
3773
3774         /* Set bypass mode according to firmware probing result */
3775         bypass = !!ret;
3776
3777         /* Base address */
3778         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3779         if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3780                 dev_err(dev, "MMIO region too small (%pr)\n", res);
3781                 return -EINVAL;
3782         }
3783         ioaddr = res->start;
3784
3785         /*
3786          * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3787          * the PMCG registers which are reserved by the PMU driver.
3788          */
3789         smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3790         if (IS_ERR(smmu->base))
3791                 return PTR_ERR(smmu->base);
3792
3793         if (arm_smmu_resource_size(smmu) > SZ_64K) {
3794                 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3795                                                ARM_SMMU_REG_SZ);
3796                 if (IS_ERR(smmu->page1))
3797                         return PTR_ERR(smmu->page1);
3798         } else {
3799                 smmu->page1 = smmu->base;
3800         }
3801
3802         /* Interrupt lines */
3803
3804         irq = platform_get_irq_byname_optional(pdev, "combined");
3805         if (irq > 0)
3806                 smmu->combined_irq = irq;
3807         else {
3808                 irq = platform_get_irq_byname_optional(pdev, "eventq");
3809                 if (irq > 0)
3810                         smmu->evtq.q.irq = irq;
3811
3812                 irq = platform_get_irq_byname_optional(pdev, "priq");
3813                 if (irq > 0)
3814                         smmu->priq.q.irq = irq;
3815
3816                 irq = platform_get_irq_byname_optional(pdev, "gerror");
3817                 if (irq > 0)
3818                         smmu->gerr_irq = irq;
3819         }
3820         /* Probe the h/w */
3821         ret = arm_smmu_device_hw_probe(smmu);
3822         if (ret)
3823                 return ret;
3824
3825         /* Initialise in-memory data structures */
3826         ret = arm_smmu_init_structures(smmu);
3827         if (ret)
3828                 return ret;
3829
3830         /* Record our private device structure */
3831         platform_set_drvdata(pdev, smmu);
3832
3833         /* Reset the device */
3834         ret = arm_smmu_device_reset(smmu, bypass);
3835         if (ret)
3836                 return ret;
3837
3838         /* And we're up. Go go go! */
3839         ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3840                                      "smmu3.%pa", &ioaddr);
3841         if (ret)
3842                 return ret;
3843
3844         ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3845         if (ret) {
3846                 dev_err(dev, "Failed to register iommu\n");
3847                 goto err_sysfs_remove;
3848         }
3849
3850         ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3851         if (ret)
3852                 goto err_unregister_device;
3853
3854         return 0;
3855
3856 err_unregister_device:
3857         iommu_device_unregister(&smmu->iommu);
3858 err_sysfs_remove:
3859         iommu_device_sysfs_remove(&smmu->iommu);
3860         return ret;
3861 }
3862
3863 static int arm_smmu_device_remove(struct platform_device *pdev)
3864 {
3865         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3866
3867         arm_smmu_set_bus_ops(NULL);
3868         iommu_device_unregister(&smmu->iommu);
3869         iommu_device_sysfs_remove(&smmu->iommu);
3870         arm_smmu_device_disable(smmu);
3871         iopf_queue_free(smmu->evtq.iopf);
3872
3873         return 0;
3874 }
3875
3876 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3877 {
3878         arm_smmu_device_remove(pdev);
3879 }
3880
3881 static const struct of_device_id arm_smmu_of_match[] = {
3882         { .compatible = "arm,smmu-v3", },
3883         { },
3884 };
3885 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3886
3887 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3888 {
3889         arm_smmu_sva_notifier_synchronize();
3890         platform_driver_unregister(drv);
3891 }
3892
3893 static struct platform_driver arm_smmu_driver = {
3894         .driver = {
3895                 .name                   = "arm-smmu-v3",
3896                 .of_match_table         = arm_smmu_of_match,
3897                 .suppress_bind_attrs    = true,
3898         },
3899         .probe  = arm_smmu_device_probe,
3900         .remove = arm_smmu_device_remove,
3901         .shutdown = arm_smmu_device_shutdown,
3902 };
3903 module_driver(arm_smmu_driver, platform_driver_register,
3904               arm_smmu_driver_unregister);
3905
3906 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3907 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3908 MODULE_ALIAS("platform:arm-smmu-v3");
3909 MODULE_LICENSE("GPL v2");