iommu: Separate SVA and IOPF
[linux-2.6-microblaze.git] / drivers / iommu / arm / arm-smmu-v3 / arm-smmu-v3.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29
30 #include "arm-smmu-v3.h"
31 #include "../../dma-iommu.h"
32
33 static bool disable_bypass = true;
34 module_param(disable_bypass, bool, 0444);
35 MODULE_PARM_DESC(disable_bypass,
36         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
37
38 static bool disable_msipolling;
39 module_param(disable_msipolling, bool, 0444);
40 MODULE_PARM_DESC(disable_msipolling,
41         "Disable MSI-based polling for CMD_SYNC completion.");
42
43 enum arm_smmu_msi_index {
44         EVTQ_MSI_INDEX,
45         GERROR_MSI_INDEX,
46         PRIQ_MSI_INDEX,
47         ARM_SMMU_MAX_MSIS,
48 };
49
50 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
51         [EVTQ_MSI_INDEX] = {
52                 ARM_SMMU_EVTQ_IRQ_CFG0,
53                 ARM_SMMU_EVTQ_IRQ_CFG1,
54                 ARM_SMMU_EVTQ_IRQ_CFG2,
55         },
56         [GERROR_MSI_INDEX] = {
57                 ARM_SMMU_GERROR_IRQ_CFG0,
58                 ARM_SMMU_GERROR_IRQ_CFG1,
59                 ARM_SMMU_GERROR_IRQ_CFG2,
60         },
61         [PRIQ_MSI_INDEX] = {
62                 ARM_SMMU_PRIQ_IRQ_CFG0,
63                 ARM_SMMU_PRIQ_IRQ_CFG1,
64                 ARM_SMMU_PRIQ_IRQ_CFG2,
65         },
66 };
67
68 struct arm_smmu_option_prop {
69         u32 opt;
70         const char *prop;
71 };
72
73 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
74 DEFINE_MUTEX(arm_smmu_asid_lock);
75
76 /*
77  * Special value used by SVA when a process dies, to quiesce a CD without
78  * disabling it.
79  */
80 struct arm_smmu_ctx_desc quiet_cd = { 0 };
81
82 static struct arm_smmu_option_prop arm_smmu_options[] = {
83         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
84         { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
85         { 0, NULL},
86 };
87
88 static void parse_driver_options(struct arm_smmu_device *smmu)
89 {
90         int i = 0;
91
92         do {
93                 if (of_property_read_bool(smmu->dev->of_node,
94                                                 arm_smmu_options[i].prop)) {
95                         smmu->options |= arm_smmu_options[i].opt;
96                         dev_notice(smmu->dev, "option %s\n",
97                                 arm_smmu_options[i].prop);
98                 }
99         } while (arm_smmu_options[++i].opt);
100 }
101
102 /* Low-level queue manipulation functions */
103 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
104 {
105         u32 space, prod, cons;
106
107         prod = Q_IDX(q, q->prod);
108         cons = Q_IDX(q, q->cons);
109
110         if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
111                 space = (1 << q->max_n_shift) - (prod - cons);
112         else
113                 space = cons - prod;
114
115         return space >= n;
116 }
117
118 static bool queue_full(struct arm_smmu_ll_queue *q)
119 {
120         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
121                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
122 }
123
124 static bool queue_empty(struct arm_smmu_ll_queue *q)
125 {
126         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
127                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
128 }
129
130 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
131 {
132         return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
133                 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
134                ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
135                 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
136 }
137
138 static void queue_sync_cons_out(struct arm_smmu_queue *q)
139 {
140         /*
141          * Ensure that all CPU accesses (reads and writes) to the queue
142          * are complete before we update the cons pointer.
143          */
144         __iomb();
145         writel_relaxed(q->llq.cons, q->cons_reg);
146 }
147
148 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
149 {
150         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
151         q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
152 }
153
154 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
155 {
156         struct arm_smmu_ll_queue *llq = &q->llq;
157
158         if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
159                 return;
160
161         llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
162                       Q_IDX(llq, llq->cons);
163         queue_sync_cons_out(q);
164 }
165
166 static int queue_sync_prod_in(struct arm_smmu_queue *q)
167 {
168         u32 prod;
169         int ret = 0;
170
171         /*
172          * We can't use the _relaxed() variant here, as we must prevent
173          * speculative reads of the queue before we have determined that
174          * prod has indeed moved.
175          */
176         prod = readl(q->prod_reg);
177
178         if (Q_OVF(prod) != Q_OVF(q->llq.prod))
179                 ret = -EOVERFLOW;
180
181         q->llq.prod = prod;
182         return ret;
183 }
184
185 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
186 {
187         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
188         return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
189 }
190
191 static void queue_poll_init(struct arm_smmu_device *smmu,
192                             struct arm_smmu_queue_poll *qp)
193 {
194         qp->delay = 1;
195         qp->spin_cnt = 0;
196         qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
197         qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
198 }
199
200 static int queue_poll(struct arm_smmu_queue_poll *qp)
201 {
202         if (ktime_compare(ktime_get(), qp->timeout) > 0)
203                 return -ETIMEDOUT;
204
205         if (qp->wfe) {
206                 wfe();
207         } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
208                 cpu_relax();
209         } else {
210                 udelay(qp->delay);
211                 qp->delay *= 2;
212                 qp->spin_cnt = 0;
213         }
214
215         return 0;
216 }
217
218 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
219 {
220         int i;
221
222         for (i = 0; i < n_dwords; ++i)
223                 *dst++ = cpu_to_le64(*src++);
224 }
225
226 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
227 {
228         int i;
229
230         for (i = 0; i < n_dwords; ++i)
231                 *dst++ = le64_to_cpu(*src++);
232 }
233
234 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
235 {
236         if (queue_empty(&q->llq))
237                 return -EAGAIN;
238
239         queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
240         queue_inc_cons(&q->llq);
241         queue_sync_cons_out(q);
242         return 0;
243 }
244
245 /* High-level queue accessors */
246 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
247 {
248         memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
249         cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
250
251         switch (ent->opcode) {
252         case CMDQ_OP_TLBI_EL2_ALL:
253         case CMDQ_OP_TLBI_NSNH_ALL:
254                 break;
255         case CMDQ_OP_PREFETCH_CFG:
256                 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
257                 break;
258         case CMDQ_OP_CFGI_CD:
259                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
260                 fallthrough;
261         case CMDQ_OP_CFGI_STE:
262                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
263                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
264                 break;
265         case CMDQ_OP_CFGI_CD_ALL:
266                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
267                 break;
268         case CMDQ_OP_CFGI_ALL:
269                 /* Cover the entire SID range */
270                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
271                 break;
272         case CMDQ_OP_TLBI_NH_VA:
273                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
274                 fallthrough;
275         case CMDQ_OP_TLBI_EL2_VA:
276                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
279                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
283                 break;
284         case CMDQ_OP_TLBI_S2_IPA:
285                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
286                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
287                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
288                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
289                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
290                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
291                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
292                 break;
293         case CMDQ_OP_TLBI_NH_ASID:
294                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
295                 fallthrough;
296         case CMDQ_OP_TLBI_S12_VMALL:
297                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
298                 break;
299         case CMDQ_OP_TLBI_EL2_ASID:
300                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
301                 break;
302         case CMDQ_OP_ATC_INV:
303                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
304                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
305                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
306                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
307                 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
308                 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
309                 break;
310         case CMDQ_OP_PRI_RESP:
311                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
312                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
313                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
314                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
315                 switch (ent->pri.resp) {
316                 case PRI_RESP_DENY:
317                 case PRI_RESP_FAIL:
318                 case PRI_RESP_SUCC:
319                         break;
320                 default:
321                         return -EINVAL;
322                 }
323                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
324                 break;
325         case CMDQ_OP_RESUME:
326                 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
327                 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
328                 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
329                 break;
330         case CMDQ_OP_CMD_SYNC:
331                 if (ent->sync.msiaddr) {
332                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
333                         cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
334                 } else {
335                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
336                 }
337                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
338                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
339                 break;
340         default:
341                 return -ENOENT;
342         }
343
344         return 0;
345 }
346
347 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
348 {
349         return &smmu->cmdq;
350 }
351
352 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
353                                          struct arm_smmu_queue *q, u32 prod)
354 {
355         struct arm_smmu_cmdq_ent ent = {
356                 .opcode = CMDQ_OP_CMD_SYNC,
357         };
358
359         /*
360          * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
361          * payload, so the write will zero the entire command on that platform.
362          */
363         if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
364                 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
365                                    q->ent_dwords * 8;
366         }
367
368         arm_smmu_cmdq_build_cmd(cmd, &ent);
369 }
370
371 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
372                                      struct arm_smmu_queue *q)
373 {
374         static const char * const cerror_str[] = {
375                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
376                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
377                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
378                 [CMDQ_ERR_CERROR_ATC_INV_IDX]   = "ATC invalidate timeout",
379         };
380
381         int i;
382         u64 cmd[CMDQ_ENT_DWORDS];
383         u32 cons = readl_relaxed(q->cons_reg);
384         u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
385         struct arm_smmu_cmdq_ent cmd_sync = {
386                 .opcode = CMDQ_OP_CMD_SYNC,
387         };
388
389         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
390                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
391
392         switch (idx) {
393         case CMDQ_ERR_CERROR_ABT_IDX:
394                 dev_err(smmu->dev, "retrying command fetch\n");
395                 return;
396         case CMDQ_ERR_CERROR_NONE_IDX:
397                 return;
398         case CMDQ_ERR_CERROR_ATC_INV_IDX:
399                 /*
400                  * ATC Invalidation Completion timeout. CONS is still pointing
401                  * at the CMD_SYNC. Attempt to complete other pending commands
402                  * by repeating the CMD_SYNC, though we might well end up back
403                  * here since the ATC invalidation may still be pending.
404                  */
405                 return;
406         case CMDQ_ERR_CERROR_ILL_IDX:
407         default:
408                 break;
409         }
410
411         /*
412          * We may have concurrent producers, so we need to be careful
413          * not to touch any of the shadow cmdq state.
414          */
415         queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
416         dev_err(smmu->dev, "skipping command in error state:\n");
417         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
418                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
419
420         /* Convert the erroneous command into a CMD_SYNC */
421         arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
422
423         queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
424 }
425
426 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
427 {
428         __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
429 }
430
431 /*
432  * Command queue locking.
433  * This is a form of bastardised rwlock with the following major changes:
434  *
435  * - The only LOCK routines are exclusive_trylock() and shared_lock().
436  *   Neither have barrier semantics, and instead provide only a control
437  *   dependency.
438  *
439  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
440  *   fails if the caller appears to be the last lock holder (yes, this is
441  *   racy). All successful UNLOCK routines have RELEASE semantics.
442  */
443 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
444 {
445         int val;
446
447         /*
448          * We can try to avoid the cmpxchg() loop by simply incrementing the
449          * lock counter. When held in exclusive state, the lock counter is set
450          * to INT_MIN so these increments won't hurt as the value will remain
451          * negative.
452          */
453         if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
454                 return;
455
456         do {
457                 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
458         } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
459 }
460
461 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
462 {
463         (void)atomic_dec_return_release(&cmdq->lock);
464 }
465
466 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
467 {
468         if (atomic_read(&cmdq->lock) == 1)
469                 return false;
470
471         arm_smmu_cmdq_shared_unlock(cmdq);
472         return true;
473 }
474
475 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)            \
476 ({                                                                      \
477         bool __ret;                                                     \
478         local_irq_save(flags);                                          \
479         __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);       \
480         if (!__ret)                                                     \
481                 local_irq_restore(flags);                               \
482         __ret;                                                          \
483 })
484
485 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)          \
486 ({                                                                      \
487         atomic_set_release(&cmdq->lock, 0);                             \
488         local_irq_restore(flags);                                       \
489 })
490
491
492 /*
493  * Command queue insertion.
494  * This is made fiddly by our attempts to achieve some sort of scalability
495  * since there is one queue shared amongst all of the CPUs in the system.  If
496  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
497  * then you'll *love* this monstrosity.
498  *
499  * The basic idea is to split the queue up into ranges of commands that are
500  * owned by a given CPU; the owner may not have written all of the commands
501  * itself, but is responsible for advancing the hardware prod pointer when
502  * the time comes. The algorithm is roughly:
503  *
504  *      1. Allocate some space in the queue. At this point we also discover
505  *         whether the head of the queue is currently owned by another CPU,
506  *         or whether we are the owner.
507  *
508  *      2. Write our commands into our allocated slots in the queue.
509  *
510  *      3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
511  *
512  *      4. If we are an owner:
513  *              a. Wait for the previous owner to finish.
514  *              b. Mark the queue head as unowned, which tells us the range
515  *                 that we are responsible for publishing.
516  *              c. Wait for all commands in our owned range to become valid.
517  *              d. Advance the hardware prod pointer.
518  *              e. Tell the next owner we've finished.
519  *
520  *      5. If we are inserting a CMD_SYNC (we may or may not have been an
521  *         owner), then we need to stick around until it has completed:
522  *              a. If we have MSIs, the SMMU can write back into the CMD_SYNC
523  *                 to clear the first 4 bytes.
524  *              b. Otherwise, we spin waiting for the hardware cons pointer to
525  *                 advance past our command.
526  *
527  * The devil is in the details, particularly the use of locking for handling
528  * SYNC completion and freeing up space in the queue before we think that it is
529  * full.
530  */
531 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
532                                                u32 sprod, u32 eprod, bool set)
533 {
534         u32 swidx, sbidx, ewidx, ebidx;
535         struct arm_smmu_ll_queue llq = {
536                 .max_n_shift    = cmdq->q.llq.max_n_shift,
537                 .prod           = sprod,
538         };
539
540         ewidx = BIT_WORD(Q_IDX(&llq, eprod));
541         ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
542
543         while (llq.prod != eprod) {
544                 unsigned long mask;
545                 atomic_long_t *ptr;
546                 u32 limit = BITS_PER_LONG;
547
548                 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
549                 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
550
551                 ptr = &cmdq->valid_map[swidx];
552
553                 if ((swidx == ewidx) && (sbidx < ebidx))
554                         limit = ebidx;
555
556                 mask = GENMASK(limit - 1, sbidx);
557
558                 /*
559                  * The valid bit is the inverse of the wrap bit. This means
560                  * that a zero-initialised queue is invalid and, after marking
561                  * all entries as valid, they become invalid again when we
562                  * wrap.
563                  */
564                 if (set) {
565                         atomic_long_xor(mask, ptr);
566                 } else { /* Poll */
567                         unsigned long valid;
568
569                         valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
570                         atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
571                 }
572
573                 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
574         }
575 }
576
577 /* Mark all entries in the range [sprod, eprod) as valid */
578 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
579                                         u32 sprod, u32 eprod)
580 {
581         __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
582 }
583
584 /* Wait for all entries in the range [sprod, eprod) to become valid */
585 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
586                                          u32 sprod, u32 eprod)
587 {
588         __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
589 }
590
591 /* Wait for the command queue to become non-full */
592 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
593                                              struct arm_smmu_ll_queue *llq)
594 {
595         unsigned long flags;
596         struct arm_smmu_queue_poll qp;
597         struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
598         int ret = 0;
599
600         /*
601          * Try to update our copy of cons by grabbing exclusive cmdq access. If
602          * that fails, spin until somebody else updates it for us.
603          */
604         if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
605                 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
606                 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
607                 llq->val = READ_ONCE(cmdq->q.llq.val);
608                 return 0;
609         }
610
611         queue_poll_init(smmu, &qp);
612         do {
613                 llq->val = READ_ONCE(cmdq->q.llq.val);
614                 if (!queue_full(llq))
615                         break;
616
617                 ret = queue_poll(&qp);
618         } while (!ret);
619
620         return ret;
621 }
622
623 /*
624  * Wait until the SMMU signals a CMD_SYNC completion MSI.
625  * Must be called with the cmdq lock held in some capacity.
626  */
627 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
628                                           struct arm_smmu_ll_queue *llq)
629 {
630         int ret = 0;
631         struct arm_smmu_queue_poll qp;
632         struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
633         u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
634
635         queue_poll_init(smmu, &qp);
636
637         /*
638          * The MSI won't generate an event, since it's being written back
639          * into the command queue.
640          */
641         qp.wfe = false;
642         smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
643         llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
644         return ret;
645 }
646
647 /*
648  * Wait until the SMMU cons index passes llq->prod.
649  * Must be called with the cmdq lock held in some capacity.
650  */
651 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
652                                                struct arm_smmu_ll_queue *llq)
653 {
654         struct arm_smmu_queue_poll qp;
655         struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
656         u32 prod = llq->prod;
657         int ret = 0;
658
659         queue_poll_init(smmu, &qp);
660         llq->val = READ_ONCE(cmdq->q.llq.val);
661         do {
662                 if (queue_consumed(llq, prod))
663                         break;
664
665                 ret = queue_poll(&qp);
666
667                 /*
668                  * This needs to be a readl() so that our subsequent call
669                  * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
670                  *
671                  * Specifically, we need to ensure that we observe all
672                  * shared_lock()s by other CMD_SYNCs that share our owner,
673                  * so that a failing call to tryunlock() means that we're
674                  * the last one out and therefore we can safely advance
675                  * cmdq->q.llq.cons. Roughly speaking:
676                  *
677                  * CPU 0                CPU1                    CPU2 (us)
678                  *
679                  * if (sync)
680                  *      shared_lock();
681                  *
682                  * dma_wmb();
683                  * set_valid_map();
684                  *
685                  *                      if (owner) {
686                  *                              poll_valid_map();
687                  *                              <control dependency>
688                  *                              writel(prod_reg);
689                  *
690                  *                                              readl(cons_reg);
691                  *                                              tryunlock();
692                  *
693                  * Requires us to see CPU 0's shared_lock() acquisition.
694                  */
695                 llq->cons = readl(cmdq->q.cons_reg);
696         } while (!ret);
697
698         return ret;
699 }
700
701 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
702                                          struct arm_smmu_ll_queue *llq)
703 {
704         if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
705                 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
706
707         return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
708 }
709
710 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
711                                         u32 prod, int n)
712 {
713         int i;
714         struct arm_smmu_ll_queue llq = {
715                 .max_n_shift    = cmdq->q.llq.max_n_shift,
716                 .prod           = prod,
717         };
718
719         for (i = 0; i < n; ++i) {
720                 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
721
722                 prod = queue_inc_prod_n(&llq, i);
723                 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
724         }
725 }
726
727 /*
728  * This is the actual insertion function, and provides the following
729  * ordering guarantees to callers:
730  *
731  * - There is a dma_wmb() before publishing any commands to the queue.
732  *   This can be relied upon to order prior writes to data structures
733  *   in memory (such as a CD or an STE) before the command.
734  *
735  * - On completion of a CMD_SYNC, there is a control dependency.
736  *   This can be relied upon to order subsequent writes to memory (e.g.
737  *   freeing an IOVA) after completion of the CMD_SYNC.
738  *
739  * - Command insertion is totally ordered, so if two CPUs each race to
740  *   insert their own list of commands then all of the commands from one
741  *   CPU will appear before any of the commands from the other CPU.
742  */
743 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
744                                        u64 *cmds, int n, bool sync)
745 {
746         u64 cmd_sync[CMDQ_ENT_DWORDS];
747         u32 prod;
748         unsigned long flags;
749         bool owner;
750         struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
751         struct arm_smmu_ll_queue llq, head;
752         int ret = 0;
753
754         llq.max_n_shift = cmdq->q.llq.max_n_shift;
755
756         /* 1. Allocate some space in the queue */
757         local_irq_save(flags);
758         llq.val = READ_ONCE(cmdq->q.llq.val);
759         do {
760                 u64 old;
761
762                 while (!queue_has_space(&llq, n + sync)) {
763                         local_irq_restore(flags);
764                         if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
765                                 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
766                         local_irq_save(flags);
767                 }
768
769                 head.cons = llq.cons;
770                 head.prod = queue_inc_prod_n(&llq, n + sync) |
771                                              CMDQ_PROD_OWNED_FLAG;
772
773                 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
774                 if (old == llq.val)
775                         break;
776
777                 llq.val = old;
778         } while (1);
779         owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
780         head.prod &= ~CMDQ_PROD_OWNED_FLAG;
781         llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
782
783         /*
784          * 2. Write our commands into the queue
785          * Dependency ordering from the cmpxchg() loop above.
786          */
787         arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
788         if (sync) {
789                 prod = queue_inc_prod_n(&llq, n);
790                 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
791                 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
792
793                 /*
794                  * In order to determine completion of our CMD_SYNC, we must
795                  * ensure that the queue can't wrap twice without us noticing.
796                  * We achieve that by taking the cmdq lock as shared before
797                  * marking our slot as valid.
798                  */
799                 arm_smmu_cmdq_shared_lock(cmdq);
800         }
801
802         /* 3. Mark our slots as valid, ensuring commands are visible first */
803         dma_wmb();
804         arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
805
806         /* 4. If we are the owner, take control of the SMMU hardware */
807         if (owner) {
808                 /* a. Wait for previous owner to finish */
809                 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
810
811                 /* b. Stop gathering work by clearing the owned flag */
812                 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
813                                                    &cmdq->q.llq.atomic.prod);
814                 prod &= ~CMDQ_PROD_OWNED_FLAG;
815
816                 /*
817                  * c. Wait for any gathered work to be written to the queue.
818                  * Note that we read our own entries so that we have the control
819                  * dependency required by (d).
820                  */
821                 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
822
823                 /*
824                  * d. Advance the hardware prod pointer
825                  * Control dependency ordering from the entries becoming valid.
826                  */
827                 writel_relaxed(prod, cmdq->q.prod_reg);
828
829                 /*
830                  * e. Tell the next owner we're done
831                  * Make sure we've updated the hardware first, so that we don't
832                  * race to update prod and potentially move it backwards.
833                  */
834                 atomic_set_release(&cmdq->owner_prod, prod);
835         }
836
837         /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
838         if (sync) {
839                 llq.prod = queue_inc_prod_n(&llq, n);
840                 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
841                 if (ret) {
842                         dev_err_ratelimited(smmu->dev,
843                                             "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
844                                             llq.prod,
845                                             readl_relaxed(cmdq->q.prod_reg),
846                                             readl_relaxed(cmdq->q.cons_reg));
847                 }
848
849                 /*
850                  * Try to unlock the cmdq lock. This will fail if we're the last
851                  * reader, in which case we can safely update cmdq->q.llq.cons
852                  */
853                 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
854                         WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
855                         arm_smmu_cmdq_shared_unlock(cmdq);
856                 }
857         }
858
859         local_irq_restore(flags);
860         return ret;
861 }
862
863 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
864                                      struct arm_smmu_cmdq_ent *ent,
865                                      bool sync)
866 {
867         u64 cmd[CMDQ_ENT_DWORDS];
868
869         if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
870                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
871                          ent->opcode);
872                 return -EINVAL;
873         }
874
875         return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
876 }
877
878 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
879                                    struct arm_smmu_cmdq_ent *ent)
880 {
881         return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
882 }
883
884 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
885                                              struct arm_smmu_cmdq_ent *ent)
886 {
887         return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
888 }
889
890 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
891                                     struct arm_smmu_cmdq_batch *cmds,
892                                     struct arm_smmu_cmdq_ent *cmd)
893 {
894         int index;
895
896         if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
897             (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
898                 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
899                 cmds->num = 0;
900         }
901
902         if (cmds->num == CMDQ_BATCH_ENTRIES) {
903                 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
904                 cmds->num = 0;
905         }
906
907         index = cmds->num * CMDQ_ENT_DWORDS;
908         if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
909                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
910                          cmd->opcode);
911                 return;
912         }
913
914         cmds->num++;
915 }
916
917 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
918                                       struct arm_smmu_cmdq_batch *cmds)
919 {
920         return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
921 }
922
923 static int arm_smmu_page_response(struct device *dev,
924                                   struct iopf_fault *unused,
925                                   struct iommu_page_response *resp)
926 {
927         struct arm_smmu_cmdq_ent cmd = {0};
928         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
929         int sid = master->streams[0].id;
930
931         if (master->stall_enabled) {
932                 cmd.opcode              = CMDQ_OP_RESUME;
933                 cmd.resume.sid          = sid;
934                 cmd.resume.stag         = resp->grpid;
935                 switch (resp->code) {
936                 case IOMMU_PAGE_RESP_INVALID:
937                 case IOMMU_PAGE_RESP_FAILURE:
938                         cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
939                         break;
940                 case IOMMU_PAGE_RESP_SUCCESS:
941                         cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
942                         break;
943                 default:
944                         return -EINVAL;
945                 }
946         } else {
947                 return -ENODEV;
948         }
949
950         arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
951         /*
952          * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
953          * RESUME consumption guarantees that the stalled transaction will be
954          * terminated... at some point in the future. PRI_RESP is fire and
955          * forget.
956          */
957
958         return 0;
959 }
960
961 /* Context descriptor manipulation functions */
962 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
963 {
964         struct arm_smmu_cmdq_ent cmd = {
965                 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
966                         CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
967                 .tlbi.asid = asid,
968         };
969
970         arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
971 }
972
973 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
974                              int ssid, bool leaf)
975 {
976         size_t i;
977         struct arm_smmu_cmdq_batch cmds;
978         struct arm_smmu_device *smmu = master->smmu;
979         struct arm_smmu_cmdq_ent cmd = {
980                 .opcode = CMDQ_OP_CFGI_CD,
981                 .cfgi   = {
982                         .ssid   = ssid,
983                         .leaf   = leaf,
984                 },
985         };
986
987         cmds.num = 0;
988         for (i = 0; i < master->num_streams; i++) {
989                 cmd.cfgi.sid = master->streams[i].id;
990                 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
991         }
992
993         arm_smmu_cmdq_batch_submit(smmu, &cmds);
994 }
995
996 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
997                                         struct arm_smmu_l1_ctx_desc *l1_desc)
998 {
999         size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1000
1001         l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1002                                              &l1_desc->l2ptr_dma, GFP_KERNEL);
1003         if (!l1_desc->l2ptr) {
1004                 dev_warn(smmu->dev,
1005                          "failed to allocate context descriptor table\n");
1006                 return -ENOMEM;
1007         }
1008         return 0;
1009 }
1010
1011 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1012                                       struct arm_smmu_l1_ctx_desc *l1_desc)
1013 {
1014         u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1015                   CTXDESC_L1_DESC_V;
1016
1017         /* See comment in arm_smmu_write_ctx_desc() */
1018         WRITE_ONCE(*dst, cpu_to_le64(val));
1019 }
1020
1021 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
1022 {
1023         __le64 *l1ptr;
1024         unsigned int idx;
1025         struct arm_smmu_l1_ctx_desc *l1_desc;
1026         struct arm_smmu_device *smmu = master->smmu;
1027         struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1028
1029         if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1030                 return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
1031
1032         idx = ssid >> CTXDESC_SPLIT;
1033         l1_desc = &cd_table->l1_desc[idx];
1034         if (!l1_desc->l2ptr) {
1035                 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1036                         return NULL;
1037
1038                 l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1039                 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1040                 /* An invalid L1CD can be cached */
1041                 arm_smmu_sync_cd(master, ssid, false);
1042         }
1043         idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1044         return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1045 }
1046
1047 int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
1048                             struct arm_smmu_ctx_desc *cd)
1049 {
1050         /*
1051          * This function handles the following cases:
1052          *
1053          * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
1054          * (2) Install a secondary CD, for SID+SSID traffic.
1055          * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1056          *     CD, then invalidate the old entry and mappings.
1057          * (4) Quiesce the context without clearing the valid bit. Disable
1058          *     translation, and ignore any translation fault.
1059          * (5) Remove a secondary CD.
1060          */
1061         u64 val;
1062         bool cd_live;
1063         __le64 *cdptr;
1064         struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1065         struct arm_smmu_device *smmu = master->smmu;
1066
1067         if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
1068                 return -E2BIG;
1069
1070         cdptr = arm_smmu_get_cd_ptr(master, ssid);
1071         if (!cdptr)
1072                 return -ENOMEM;
1073
1074         val = le64_to_cpu(cdptr[0]);
1075         cd_live = !!(val & CTXDESC_CD_0_V);
1076
1077         if (!cd) { /* (5) */
1078                 val = 0;
1079         } else if (cd == &quiet_cd) { /* (4) */
1080                 if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1081                         val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R);
1082                 val |= CTXDESC_CD_0_TCR_EPD0;
1083         } else if (cd_live) { /* (3) */
1084                 val &= ~CTXDESC_CD_0_ASID;
1085                 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1086                 /*
1087                  * Until CD+TLB invalidation, both ASIDs may be used for tagging
1088                  * this substream's traffic
1089                  */
1090         } else { /* (1) and (2) */
1091                 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1092                 cdptr[2] = 0;
1093                 cdptr[3] = cpu_to_le64(cd->mair);
1094
1095                 /*
1096                  * STE may be live, and the SMMU might read dwords of this CD in any
1097                  * order. Ensure that it observes valid values before reading
1098                  * V=1.
1099                  */
1100                 arm_smmu_sync_cd(master, ssid, true);
1101
1102                 val = cd->tcr |
1103 #ifdef __BIG_ENDIAN
1104                         CTXDESC_CD_0_ENDI |
1105 #endif
1106                         CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1107                         (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1108                         CTXDESC_CD_0_AA64 |
1109                         FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1110                         CTXDESC_CD_0_V;
1111
1112                 if (cd_table->stall_enabled)
1113                         val |= CTXDESC_CD_0_S;
1114         }
1115
1116         /*
1117          * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1118          * "Configuration structures and configuration invalidation completion"
1119          *
1120          *   The size of single-copy atomic reads made by the SMMU is
1121          *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1122          *   field within an aligned 64-bit span of a structure can be altered
1123          *   without first making the structure invalid.
1124          */
1125         WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1126         arm_smmu_sync_cd(master, ssid, true);
1127         return 0;
1128 }
1129
1130 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1131 {
1132         int ret;
1133         size_t l1size;
1134         size_t max_contexts;
1135         struct arm_smmu_device *smmu = master->smmu;
1136         struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1137
1138         cd_table->stall_enabled = master->stall_enabled;
1139         cd_table->s1cdmax = master->ssid_bits;
1140         max_contexts = 1 << cd_table->s1cdmax;
1141
1142         if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1143             max_contexts <= CTXDESC_L2_ENTRIES) {
1144                 cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1145                 cd_table->num_l1_ents = max_contexts;
1146
1147                 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1148         } else {
1149                 cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1150                 cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
1151                                                   CTXDESC_L2_ENTRIES);
1152
1153                 cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
1154                                               sizeof(*cd_table->l1_desc),
1155                                               GFP_KERNEL);
1156                 if (!cd_table->l1_desc)
1157                         return -ENOMEM;
1158
1159                 l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1160         }
1161
1162         cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
1163                                            GFP_KERNEL);
1164         if (!cd_table->cdtab) {
1165                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1166                 ret = -ENOMEM;
1167                 goto err_free_l1;
1168         }
1169
1170         return 0;
1171
1172 err_free_l1:
1173         if (cd_table->l1_desc) {
1174                 devm_kfree(smmu->dev, cd_table->l1_desc);
1175                 cd_table->l1_desc = NULL;
1176         }
1177         return ret;
1178 }
1179
1180 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1181 {
1182         int i;
1183         size_t size, l1size;
1184         struct arm_smmu_device *smmu = master->smmu;
1185         struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1186
1187         if (cd_table->l1_desc) {
1188                 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1189
1190                 for (i = 0; i < cd_table->num_l1_ents; i++) {
1191                         if (!cd_table->l1_desc[i].l2ptr)
1192                                 continue;
1193
1194                         dmam_free_coherent(smmu->dev, size,
1195                                            cd_table->l1_desc[i].l2ptr,
1196                                            cd_table->l1_desc[i].l2ptr_dma);
1197                 }
1198                 devm_kfree(smmu->dev, cd_table->l1_desc);
1199                 cd_table->l1_desc = NULL;
1200
1201                 l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1202         } else {
1203                 l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1204         }
1205
1206         dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
1207         cd_table->cdtab_dma = 0;
1208         cd_table->cdtab = NULL;
1209 }
1210
1211 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1212 {
1213         bool free;
1214         struct arm_smmu_ctx_desc *old_cd;
1215
1216         if (!cd->asid)
1217                 return false;
1218
1219         free = refcount_dec_and_test(&cd->refs);
1220         if (free) {
1221                 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1222                 WARN_ON(old_cd != cd);
1223         }
1224         return free;
1225 }
1226
1227 /* Stream table manipulation functions */
1228 static void
1229 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1230 {
1231         u64 val = 0;
1232
1233         val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1234         val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1235
1236         /* See comment in arm_smmu_write_ctx_desc() */
1237         WRITE_ONCE(*dst, cpu_to_le64(val));
1238 }
1239
1240 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1241 {
1242         struct arm_smmu_cmdq_ent cmd = {
1243                 .opcode = CMDQ_OP_CFGI_STE,
1244                 .cfgi   = {
1245                         .sid    = sid,
1246                         .leaf   = true,
1247                 },
1248         };
1249
1250         arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1251 }
1252
1253 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1254                                       struct arm_smmu_ste *dst)
1255 {
1256         /*
1257          * This is hideously complicated, but we only really care about
1258          * three cases at the moment:
1259          *
1260          * 1. Invalid (all zero) -> bypass/fault (init)
1261          * 2. Bypass/fault -> translation/bypass (attach)
1262          * 3. Translation/bypass -> bypass/fault (detach)
1263          *
1264          * Given that we can't update the STE atomically and the SMMU
1265          * doesn't read the thing in a defined order, that leaves us
1266          * with the following maintenance requirements:
1267          *
1268          * 1. Update Config, return (init time STEs aren't live)
1269          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1270          * 3. Update Config, sync
1271          */
1272         u64 val = le64_to_cpu(dst->data[0]);
1273         bool ste_live = false;
1274         struct arm_smmu_device *smmu = master->smmu;
1275         struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
1276         struct arm_smmu_s2_cfg *s2_cfg = NULL;
1277         struct arm_smmu_domain *smmu_domain = master->domain;
1278         struct arm_smmu_cmdq_ent prefetch_cmd = {
1279                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1280                 .prefetch       = {
1281                         .sid    = sid,
1282                 },
1283         };
1284
1285         if (smmu_domain) {
1286                 switch (smmu_domain->stage) {
1287                 case ARM_SMMU_DOMAIN_S1:
1288                         cd_table = &master->cd_table;
1289                         break;
1290                 case ARM_SMMU_DOMAIN_S2:
1291                         s2_cfg = &smmu_domain->s2_cfg;
1292                         break;
1293                 default:
1294                         break;
1295                 }
1296         }
1297
1298         if (val & STRTAB_STE_0_V) {
1299                 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1300                 case STRTAB_STE_0_CFG_BYPASS:
1301                         break;
1302                 case STRTAB_STE_0_CFG_S1_TRANS:
1303                 case STRTAB_STE_0_CFG_S2_TRANS:
1304                         ste_live = true;
1305                         break;
1306                 case STRTAB_STE_0_CFG_ABORT:
1307                         BUG_ON(!disable_bypass);
1308                         break;
1309                 default:
1310                         BUG(); /* STE corruption */
1311                 }
1312         }
1313
1314         /* Nuke the existing STE_0 value, as we're going to rewrite it */
1315         val = STRTAB_STE_0_V;
1316
1317         /* Bypass/fault */
1318         if (!smmu_domain || !(cd_table || s2_cfg)) {
1319                 if (!smmu_domain && disable_bypass)
1320                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1321                 else
1322                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1323
1324                 dst->data[0] = cpu_to_le64(val);
1325                 dst->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1326                                                 STRTAB_STE_1_SHCFG_INCOMING));
1327                 dst->data[2] = 0; /* Nuke the VMID */
1328                 /*
1329                  * The SMMU can perform negative caching, so we must sync
1330                  * the STE regardless of whether the old value was live.
1331                  */
1332                 if (smmu)
1333                         arm_smmu_sync_ste_for_sid(smmu, sid);
1334                 return;
1335         }
1336
1337         if (cd_table) {
1338                 u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1339                         STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1340
1341                 BUG_ON(ste_live);
1342                 dst->data[1] = cpu_to_le64(
1343                          FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1344                          FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1345                          FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1346                          FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1347                          FIELD_PREP(STRTAB_STE_1_STRW, strw));
1348
1349                 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1350                     !master->stall_enabled)
1351                         dst->data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1352
1353                 val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1354                         FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1355                         FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
1356                         FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
1357         }
1358
1359         if (s2_cfg) {
1360                 BUG_ON(ste_live);
1361                 dst->data[2] = cpu_to_le64(
1362                          FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1363                          FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1364 #ifdef __BIG_ENDIAN
1365                          STRTAB_STE_2_S2ENDI |
1366 #endif
1367                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1368                          STRTAB_STE_2_S2R);
1369
1370                 dst->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1371
1372                 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1373         }
1374
1375         if (master->ats_enabled)
1376                 dst->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1377                                                  STRTAB_STE_1_EATS_TRANS));
1378
1379         arm_smmu_sync_ste_for_sid(smmu, sid);
1380         /* See comment in arm_smmu_write_ctx_desc() */
1381         WRITE_ONCE(dst->data[0], cpu_to_le64(val));
1382         arm_smmu_sync_ste_for_sid(smmu, sid);
1383
1384         /* It's likely that we'll want to use the new STE soon */
1385         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1386                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1387 }
1388
1389 static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab,
1390                                       unsigned int nent, bool force)
1391 {
1392         unsigned int i;
1393         u64 val = STRTAB_STE_0_V;
1394
1395         if (disable_bypass && !force)
1396                 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1397         else
1398                 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1399
1400         for (i = 0; i < nent; ++i) {
1401                 strtab->data[0] = cpu_to_le64(val);
1402                 strtab->data[1] = cpu_to_le64(FIELD_PREP(
1403                         STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
1404                 strtab->data[2] = 0;
1405                 strtab++;
1406         }
1407 }
1408
1409 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1410 {
1411         size_t size;
1412         void *strtab;
1413         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1414         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1415
1416         if (desc->l2ptr)
1417                 return 0;
1418
1419         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1420         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1421
1422         desc->span = STRTAB_SPLIT + 1;
1423         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1424                                           GFP_KERNEL);
1425         if (!desc->l2ptr) {
1426                 dev_err(smmu->dev,
1427                         "failed to allocate l2 stream table for SID %u\n",
1428                         sid);
1429                 return -ENOMEM;
1430         }
1431
1432         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1433         arm_smmu_write_strtab_l1_desc(strtab, desc);
1434         return 0;
1435 }
1436
1437 static struct arm_smmu_master *
1438 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1439 {
1440         struct rb_node *node;
1441         struct arm_smmu_stream *stream;
1442
1443         lockdep_assert_held(&smmu->streams_mutex);
1444
1445         node = smmu->streams.rb_node;
1446         while (node) {
1447                 stream = rb_entry(node, struct arm_smmu_stream, node);
1448                 if (stream->id < sid)
1449                         node = node->rb_right;
1450                 else if (stream->id > sid)
1451                         node = node->rb_left;
1452                 else
1453                         return stream->master;
1454         }
1455
1456         return NULL;
1457 }
1458
1459 /* IRQ and event handlers */
1460 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1461 {
1462         int ret;
1463         u32 perm = 0;
1464         struct arm_smmu_master *master;
1465         bool ssid_valid = evt[0] & EVTQ_0_SSV;
1466         u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1467         struct iopf_fault fault_evt = { };
1468         struct iommu_fault *flt = &fault_evt.fault;
1469
1470         switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1471         case EVT_ID_TRANSLATION_FAULT:
1472         case EVT_ID_ADDR_SIZE_FAULT:
1473         case EVT_ID_ACCESS_FAULT:
1474         case EVT_ID_PERMISSION_FAULT:
1475                 break;
1476         default:
1477                 return -EOPNOTSUPP;
1478         }
1479
1480         /* Stage-2 is always pinned at the moment */
1481         if (evt[1] & EVTQ_1_S2)
1482                 return -EFAULT;
1483
1484         if (!(evt[1] & EVTQ_1_STALL))
1485                 return -EOPNOTSUPP;
1486
1487         if (evt[1] & EVTQ_1_RnW)
1488                 perm |= IOMMU_FAULT_PERM_READ;
1489         else
1490                 perm |= IOMMU_FAULT_PERM_WRITE;
1491
1492         if (evt[1] & EVTQ_1_InD)
1493                 perm |= IOMMU_FAULT_PERM_EXEC;
1494
1495         if (evt[1] & EVTQ_1_PnU)
1496                 perm |= IOMMU_FAULT_PERM_PRIV;
1497
1498         flt->type = IOMMU_FAULT_PAGE_REQ;
1499         flt->prm = (struct iommu_fault_page_request) {
1500                 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1501                 .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1502                 .perm = perm,
1503                 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1504         };
1505
1506         if (ssid_valid) {
1507                 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1508                 flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1509         }
1510
1511         mutex_lock(&smmu->streams_mutex);
1512         master = arm_smmu_find_master(smmu, sid);
1513         if (!master) {
1514                 ret = -EINVAL;
1515                 goto out_unlock;
1516         }
1517
1518         ret = iommu_report_device_fault(master->dev, &fault_evt);
1519         if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1520                 /* Nobody cared, abort the access */
1521                 struct iommu_page_response resp = {
1522                         .pasid          = flt->prm.pasid,
1523                         .grpid          = flt->prm.grpid,
1524                         .code           = IOMMU_PAGE_RESP_FAILURE,
1525                 };
1526                 arm_smmu_page_response(master->dev, &fault_evt, &resp);
1527         }
1528
1529 out_unlock:
1530         mutex_unlock(&smmu->streams_mutex);
1531         return ret;
1532 }
1533
1534 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1535 {
1536         int i, ret;
1537         struct arm_smmu_device *smmu = dev;
1538         struct arm_smmu_queue *q = &smmu->evtq.q;
1539         struct arm_smmu_ll_queue *llq = &q->llq;
1540         static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1541                                       DEFAULT_RATELIMIT_BURST);
1542         u64 evt[EVTQ_ENT_DWORDS];
1543
1544         do {
1545                 while (!queue_remove_raw(q, evt)) {
1546                         u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1547
1548                         ret = arm_smmu_handle_evt(smmu, evt);
1549                         if (!ret || !__ratelimit(&rs))
1550                                 continue;
1551
1552                         dev_info(smmu->dev, "event 0x%02x received:\n", id);
1553                         for (i = 0; i < ARRAY_SIZE(evt); ++i)
1554                                 dev_info(smmu->dev, "\t0x%016llx\n",
1555                                          (unsigned long long)evt[i]);
1556
1557                         cond_resched();
1558                 }
1559
1560                 /*
1561                  * Not much we can do on overflow, so scream and pretend we're
1562                  * trying harder.
1563                  */
1564                 if (queue_sync_prod_in(q) == -EOVERFLOW)
1565                         dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1566         } while (!queue_empty(llq));
1567
1568         /* Sync our overflow flag, as we believe we're up to speed */
1569         queue_sync_cons_ovf(q);
1570         return IRQ_HANDLED;
1571 }
1572
1573 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1574 {
1575         u32 sid, ssid;
1576         u16 grpid;
1577         bool ssv, last;
1578
1579         sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1580         ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1581         ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1582         last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1583         grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1584
1585         dev_info(smmu->dev, "unexpected PRI request received:\n");
1586         dev_info(smmu->dev,
1587                  "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1588                  sid, ssid, grpid, last ? "L" : "",
1589                  evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1590                  evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1591                  evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1592                  evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1593                  evt[1] & PRIQ_1_ADDR_MASK);
1594
1595         if (last) {
1596                 struct arm_smmu_cmdq_ent cmd = {
1597                         .opcode                 = CMDQ_OP_PRI_RESP,
1598                         .substream_valid        = ssv,
1599                         .pri                    = {
1600                                 .sid    = sid,
1601                                 .ssid   = ssid,
1602                                 .grpid  = grpid,
1603                                 .resp   = PRI_RESP_DENY,
1604                         },
1605                 };
1606
1607                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1608         }
1609 }
1610
1611 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1612 {
1613         struct arm_smmu_device *smmu = dev;
1614         struct arm_smmu_queue *q = &smmu->priq.q;
1615         struct arm_smmu_ll_queue *llq = &q->llq;
1616         u64 evt[PRIQ_ENT_DWORDS];
1617
1618         do {
1619                 while (!queue_remove_raw(q, evt))
1620                         arm_smmu_handle_ppr(smmu, evt);
1621
1622                 if (queue_sync_prod_in(q) == -EOVERFLOW)
1623                         dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1624         } while (!queue_empty(llq));
1625
1626         /* Sync our overflow flag, as we believe we're up to speed */
1627         queue_sync_cons_ovf(q);
1628         return IRQ_HANDLED;
1629 }
1630
1631 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1632
1633 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1634 {
1635         u32 gerror, gerrorn, active;
1636         struct arm_smmu_device *smmu = dev;
1637
1638         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1639         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1640
1641         active = gerror ^ gerrorn;
1642         if (!(active & GERROR_ERR_MASK))
1643                 return IRQ_NONE; /* No errors pending */
1644
1645         dev_warn(smmu->dev,
1646                  "unexpected global error reported (0x%08x), this could be serious\n",
1647                  active);
1648
1649         if (active & GERROR_SFM_ERR) {
1650                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1651                 arm_smmu_device_disable(smmu);
1652         }
1653
1654         if (active & GERROR_MSI_GERROR_ABT_ERR)
1655                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1656
1657         if (active & GERROR_MSI_PRIQ_ABT_ERR)
1658                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1659
1660         if (active & GERROR_MSI_EVTQ_ABT_ERR)
1661                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1662
1663         if (active & GERROR_MSI_CMDQ_ABT_ERR)
1664                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1665
1666         if (active & GERROR_PRIQ_ABT_ERR)
1667                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1668
1669         if (active & GERROR_EVTQ_ABT_ERR)
1670                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1671
1672         if (active & GERROR_CMDQ_ERR)
1673                 arm_smmu_cmdq_skip_err(smmu);
1674
1675         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1676         return IRQ_HANDLED;
1677 }
1678
1679 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1680 {
1681         struct arm_smmu_device *smmu = dev;
1682
1683         arm_smmu_evtq_thread(irq, dev);
1684         if (smmu->features & ARM_SMMU_FEAT_PRI)
1685                 arm_smmu_priq_thread(irq, dev);
1686
1687         return IRQ_HANDLED;
1688 }
1689
1690 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1691 {
1692         arm_smmu_gerror_handler(irq, dev);
1693         return IRQ_WAKE_THREAD;
1694 }
1695
1696 static void
1697 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1698                         struct arm_smmu_cmdq_ent *cmd)
1699 {
1700         size_t log2_span;
1701         size_t span_mask;
1702         /* ATC invalidates are always on 4096-bytes pages */
1703         size_t inval_grain_shift = 12;
1704         unsigned long page_start, page_end;
1705
1706         /*
1707          * ATS and PASID:
1708          *
1709          * If substream_valid is clear, the PCIe TLP is sent without a PASID
1710          * prefix. In that case all ATC entries within the address range are
1711          * invalidated, including those that were requested with a PASID! There
1712          * is no way to invalidate only entries without PASID.
1713          *
1714          * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1715          * traffic), translation requests without PASID create ATC entries
1716          * without PASID, which must be invalidated with substream_valid clear.
1717          * This has the unpleasant side-effect of invalidating all PASID-tagged
1718          * ATC entries within the address range.
1719          */
1720         *cmd = (struct arm_smmu_cmdq_ent) {
1721                 .opcode                 = CMDQ_OP_ATC_INV,
1722                 .substream_valid        = (ssid != IOMMU_NO_PASID),
1723                 .atc.ssid               = ssid,
1724         };
1725
1726         if (!size) {
1727                 cmd->atc.size = ATC_INV_SIZE_ALL;
1728                 return;
1729         }
1730
1731         page_start      = iova >> inval_grain_shift;
1732         page_end        = (iova + size - 1) >> inval_grain_shift;
1733
1734         /*
1735          * In an ATS Invalidate Request, the address must be aligned on the
1736          * range size, which must be a power of two number of page sizes. We
1737          * thus have to choose between grossly over-invalidating the region, or
1738          * splitting the invalidation into multiple commands. For simplicity
1739          * we'll go with the first solution, but should refine it in the future
1740          * if multiple commands are shown to be more efficient.
1741          *
1742          * Find the smallest power of two that covers the range. The most
1743          * significant differing bit between the start and end addresses,
1744          * fls(start ^ end), indicates the required span. For example:
1745          *
1746          * We want to invalidate pages [8; 11]. This is already the ideal range:
1747          *              x = 0b1000 ^ 0b1011 = 0b11
1748          *              span = 1 << fls(x) = 4
1749          *
1750          * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1751          *              x = 0b0111 ^ 0b1010 = 0b1101
1752          *              span = 1 << fls(x) = 16
1753          */
1754         log2_span       = fls_long(page_start ^ page_end);
1755         span_mask       = (1ULL << log2_span) - 1;
1756
1757         page_start      &= ~span_mask;
1758
1759         cmd->atc.addr   = page_start << inval_grain_shift;
1760         cmd->atc.size   = log2_span;
1761 }
1762
1763 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1764 {
1765         int i;
1766         struct arm_smmu_cmdq_ent cmd;
1767         struct arm_smmu_cmdq_batch cmds;
1768
1769         arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
1770
1771         cmds.num = 0;
1772         for (i = 0; i < master->num_streams; i++) {
1773                 cmd.atc.sid = master->streams[i].id;
1774                 arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1775         }
1776
1777         return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1778 }
1779
1780 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1781                             unsigned long iova, size_t size)
1782 {
1783         int i;
1784         unsigned long flags;
1785         struct arm_smmu_cmdq_ent cmd;
1786         struct arm_smmu_master *master;
1787         struct arm_smmu_cmdq_batch cmds;
1788
1789         if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1790                 return 0;
1791
1792         /*
1793          * Ensure that we've completed prior invalidation of the main TLBs
1794          * before we read 'nr_ats_masters' in case of a concurrent call to
1795          * arm_smmu_enable_ats():
1796          *
1797          *      // unmap()                      // arm_smmu_enable_ats()
1798          *      TLBI+SYNC                       atomic_inc(&nr_ats_masters);
1799          *      smp_mb();                       [...]
1800          *      atomic_read(&nr_ats_masters);   pci_enable_ats() // writel()
1801          *
1802          * Ensures that we always see the incremented 'nr_ats_masters' count if
1803          * ATS was enabled at the PCI device before completion of the TLBI.
1804          */
1805         smp_mb();
1806         if (!atomic_read(&smmu_domain->nr_ats_masters))
1807                 return 0;
1808
1809         arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1810
1811         cmds.num = 0;
1812
1813         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1814         list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1815                 if (!master->ats_enabled)
1816                         continue;
1817
1818                 for (i = 0; i < master->num_streams; i++) {
1819                         cmd.atc.sid = master->streams[i].id;
1820                         arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1821                 }
1822         }
1823         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1824
1825         return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1826 }
1827
1828 /* IO_PGTABLE API */
1829 static void arm_smmu_tlb_inv_context(void *cookie)
1830 {
1831         struct arm_smmu_domain *smmu_domain = cookie;
1832         struct arm_smmu_device *smmu = smmu_domain->smmu;
1833         struct arm_smmu_cmdq_ent cmd;
1834
1835         /*
1836          * NOTE: when io-pgtable is in non-strict mode, we may get here with
1837          * PTEs previously cleared by unmaps on the current CPU not yet visible
1838          * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1839          * insertion to guarantee those are observed before the TLBI. Do be
1840          * careful, 007.
1841          */
1842         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1843                 arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
1844         } else {
1845                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1846                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1847                 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1848         }
1849         arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
1850 }
1851
1852 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1853                                      unsigned long iova, size_t size,
1854                                      size_t granule,
1855                                      struct arm_smmu_domain *smmu_domain)
1856 {
1857         struct arm_smmu_device *smmu = smmu_domain->smmu;
1858         unsigned long end = iova + size, num_pages = 0, tg = 0;
1859         size_t inv_range = granule;
1860         struct arm_smmu_cmdq_batch cmds;
1861
1862         if (!size)
1863                 return;
1864
1865         if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1866                 /* Get the leaf page size */
1867                 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1868
1869                 num_pages = size >> tg;
1870
1871                 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1872                 cmd->tlbi.tg = (tg - 10) / 2;
1873
1874                 /*
1875                  * Determine what level the granule is at. For non-leaf, both
1876                  * io-pgtable and SVA pass a nominal last-level granule because
1877                  * they don't know what level(s) actually apply, so ignore that
1878                  * and leave TTL=0. However for various errata reasons we still
1879                  * want to use a range command, so avoid the SVA corner case
1880                  * where both scale and num could be 0 as well.
1881                  */
1882                 if (cmd->tlbi.leaf)
1883                         cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1884                 else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
1885                         num_pages++;
1886         }
1887
1888         cmds.num = 0;
1889
1890         while (iova < end) {
1891                 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1892                         /*
1893                          * On each iteration of the loop, the range is 5 bits
1894                          * worth of the aligned size remaining.
1895                          * The range in pages is:
1896                          *
1897                          * range = (num_pages & (0x1f << __ffs(num_pages)))
1898                          */
1899                         unsigned long scale, num;
1900
1901                         /* Determine the power of 2 multiple number of pages */
1902                         scale = __ffs(num_pages);
1903                         cmd->tlbi.scale = scale;
1904
1905                         /* Determine how many chunks of 2^scale size we have */
1906                         num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1907                         cmd->tlbi.num = num - 1;
1908
1909                         /* range is num * 2^scale * pgsize */
1910                         inv_range = num << (scale + tg);
1911
1912                         /* Clear out the lower order bits for the next iteration */
1913                         num_pages -= num << scale;
1914                 }
1915
1916                 cmd->tlbi.addr = iova;
1917                 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1918                 iova += inv_range;
1919         }
1920         arm_smmu_cmdq_batch_submit(smmu, &cmds);
1921 }
1922
1923 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1924                                           size_t granule, bool leaf,
1925                                           struct arm_smmu_domain *smmu_domain)
1926 {
1927         struct arm_smmu_cmdq_ent cmd = {
1928                 .tlbi = {
1929                         .leaf   = leaf,
1930                 },
1931         };
1932
1933         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1934                 cmd.opcode      = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1935                                   CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1936                 cmd.tlbi.asid   = smmu_domain->cd.asid;
1937         } else {
1938                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1939                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1940         }
1941         __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1942
1943         /*
1944          * Unfortunately, this can't be leaf-only since we may have
1945          * zapped an entire table.
1946          */
1947         arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
1948 }
1949
1950 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1951                                  size_t granule, bool leaf,
1952                                  struct arm_smmu_domain *smmu_domain)
1953 {
1954         struct arm_smmu_cmdq_ent cmd = {
1955                 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1956                           CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1957                 .tlbi = {
1958                         .asid   = asid,
1959                         .leaf   = leaf,
1960                 },
1961         };
1962
1963         __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1964 }
1965
1966 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1967                                          unsigned long iova, size_t granule,
1968                                          void *cookie)
1969 {
1970         struct arm_smmu_domain *smmu_domain = cookie;
1971         struct iommu_domain *domain = &smmu_domain->domain;
1972
1973         iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1974 }
1975
1976 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1977                                   size_t granule, void *cookie)
1978 {
1979         arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1980 }
1981
1982 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1983         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1984         .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1985         .tlb_add_page   = arm_smmu_tlb_inv_page_nosync,
1986 };
1987
1988 /* IOMMU API */
1989 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
1990 {
1991         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
1992
1993         switch (cap) {
1994         case IOMMU_CAP_CACHE_COHERENCY:
1995                 /* Assume that a coherent TCU implies coherent TBUs */
1996                 return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
1997         case IOMMU_CAP_NOEXEC:
1998         case IOMMU_CAP_DEFERRED_FLUSH:
1999                 return true;
2000         default:
2001                 return false;
2002         }
2003 }
2004
2005 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2006 {
2007         struct arm_smmu_domain *smmu_domain;
2008
2009         if (type == IOMMU_DOMAIN_SVA)
2010                 return arm_smmu_sva_domain_alloc();
2011
2012         if (type != IOMMU_DOMAIN_UNMANAGED &&
2013             type != IOMMU_DOMAIN_DMA &&
2014             type != IOMMU_DOMAIN_IDENTITY)
2015                 return NULL;
2016
2017         /*
2018          * Allocate the domain and initialise some of its data structures.
2019          * We can't really do anything meaningful until we've added a
2020          * master.
2021          */
2022         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2023         if (!smmu_domain)
2024                 return NULL;
2025
2026         mutex_init(&smmu_domain->init_mutex);
2027         INIT_LIST_HEAD(&smmu_domain->devices);
2028         spin_lock_init(&smmu_domain->devices_lock);
2029         INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2030
2031         return &smmu_domain->domain;
2032 }
2033
2034 static void arm_smmu_domain_free(struct iommu_domain *domain)
2035 {
2036         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2037         struct arm_smmu_device *smmu = smmu_domain->smmu;
2038
2039         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2040
2041         /* Free the ASID or VMID */
2042         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2043                 /* Prevent SVA from touching the CD while we're freeing it */
2044                 mutex_lock(&arm_smmu_asid_lock);
2045                 arm_smmu_free_asid(&smmu_domain->cd);
2046                 mutex_unlock(&arm_smmu_asid_lock);
2047         } else {
2048                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2049                 if (cfg->vmid)
2050                         ida_free(&smmu->vmid_map, cfg->vmid);
2051         }
2052
2053         kfree(smmu_domain);
2054 }
2055
2056 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2057                                        struct io_pgtable_cfg *pgtbl_cfg)
2058 {
2059         int ret;
2060         u32 asid;
2061         struct arm_smmu_device *smmu = smmu_domain->smmu;
2062         struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2063         typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2064
2065         refcount_set(&cd->refs, 1);
2066
2067         /* Prevent SVA from modifying the ASID until it is written to the CD */
2068         mutex_lock(&arm_smmu_asid_lock);
2069         ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
2070                        XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2071         if (ret)
2072                 goto out_unlock;
2073
2074         cd->asid        = (u16)asid;
2075         cd->ttbr        = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2076         cd->tcr         = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2077                           FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2078                           FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2079                           FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2080                           FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2081                           FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2082                           CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2083         cd->mair        = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2084
2085         mutex_unlock(&arm_smmu_asid_lock);
2086         return 0;
2087
2088 out_unlock:
2089         mutex_unlock(&arm_smmu_asid_lock);
2090         return ret;
2091 }
2092
2093 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2094                                        struct io_pgtable_cfg *pgtbl_cfg)
2095 {
2096         int vmid;
2097         struct arm_smmu_device *smmu = smmu_domain->smmu;
2098         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2099         typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2100
2101         /* Reserve VMID 0 for stage-2 bypass STEs */
2102         vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2103                                GFP_KERNEL);
2104         if (vmid < 0)
2105                 return vmid;
2106
2107         vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2108         cfg->vmid       = (u16)vmid;
2109         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2110         cfg->vtcr       = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2111                           FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2112                           FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2113                           FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2114                           FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2115                           FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2116                           FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2117         return 0;
2118 }
2119
2120 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
2121 {
2122         int ret;
2123         unsigned long ias, oas;
2124         enum io_pgtable_fmt fmt;
2125         struct io_pgtable_cfg pgtbl_cfg;
2126         struct io_pgtable_ops *pgtbl_ops;
2127         int (*finalise_stage_fn)(struct arm_smmu_domain *,
2128                                  struct io_pgtable_cfg *);
2129         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2130         struct arm_smmu_device *smmu = smmu_domain->smmu;
2131
2132         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2133                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2134                 return 0;
2135         }
2136
2137         /* Restrict the stage to what we can actually support */
2138         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2139                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2140         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2141                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2142
2143         switch (smmu_domain->stage) {
2144         case ARM_SMMU_DOMAIN_S1:
2145                 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2146                 ias = min_t(unsigned long, ias, VA_BITS);
2147                 oas = smmu->ias;
2148                 fmt = ARM_64_LPAE_S1;
2149                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2150                 break;
2151         case ARM_SMMU_DOMAIN_S2:
2152                 ias = smmu->ias;
2153                 oas = smmu->oas;
2154                 fmt = ARM_64_LPAE_S2;
2155                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2156                 break;
2157         default:
2158                 return -EINVAL;
2159         }
2160
2161         pgtbl_cfg = (struct io_pgtable_cfg) {
2162                 .pgsize_bitmap  = smmu->pgsize_bitmap,
2163                 .ias            = ias,
2164                 .oas            = oas,
2165                 .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2166                 .tlb            = &arm_smmu_flush_ops,
2167                 .iommu_dev      = smmu->dev,
2168         };
2169
2170         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2171         if (!pgtbl_ops)
2172                 return -ENOMEM;
2173
2174         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2175         domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2176         domain->geometry.force_aperture = true;
2177
2178         ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
2179         if (ret < 0) {
2180                 free_io_pgtable_ops(pgtbl_ops);
2181                 return ret;
2182         }
2183
2184         smmu_domain->pgtbl_ops = pgtbl_ops;
2185         return 0;
2186 }
2187
2188 static struct arm_smmu_ste *
2189 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2190 {
2191         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2192
2193         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2194                 unsigned int idx1, idx2;
2195
2196                 /* Two-level walk */
2197                 idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2198                 idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
2199                 return &cfg->l1_desc[idx1].l2ptr[idx2];
2200         } else {
2201                 /* Simple linear lookup */
2202                 return (struct arm_smmu_ste *)&cfg
2203                                ->strtab[sid * STRTAB_STE_DWORDS];
2204         }
2205 }
2206
2207 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2208 {
2209         int i, j;
2210         struct arm_smmu_device *smmu = master->smmu;
2211
2212         for (i = 0; i < master->num_streams; ++i) {
2213                 u32 sid = master->streams[i].id;
2214                 struct arm_smmu_ste *step =
2215                         arm_smmu_get_step_for_sid(smmu, sid);
2216
2217                 /* Bridged PCI devices may end up with duplicated IDs */
2218                 for (j = 0; j < i; j++)
2219                         if (master->streams[j].id == sid)
2220                                 break;
2221                 if (j < i)
2222                         continue;
2223
2224                 arm_smmu_write_strtab_ent(master, sid, step);
2225         }
2226 }
2227
2228 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2229 {
2230         struct device *dev = master->dev;
2231         struct arm_smmu_device *smmu = master->smmu;
2232         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2233
2234         if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2235                 return false;
2236
2237         if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2238                 return false;
2239
2240         return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2241 }
2242
2243 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2244 {
2245         size_t stu;
2246         struct pci_dev *pdev;
2247         struct arm_smmu_device *smmu = master->smmu;
2248         struct arm_smmu_domain *smmu_domain = master->domain;
2249
2250         /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2251         if (!master->ats_enabled)
2252                 return;
2253
2254         /* Smallest Translation Unit: log2 of the smallest supported granule */
2255         stu = __ffs(smmu->pgsize_bitmap);
2256         pdev = to_pci_dev(master->dev);
2257
2258         atomic_inc(&smmu_domain->nr_ats_masters);
2259         arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2260         if (pci_enable_ats(pdev, stu))
2261                 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2262 }
2263
2264 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2265 {
2266         struct arm_smmu_domain *smmu_domain = master->domain;
2267
2268         if (!master->ats_enabled)
2269                 return;
2270
2271         pci_disable_ats(to_pci_dev(master->dev));
2272         /*
2273          * Ensure ATS is disabled at the endpoint before we issue the
2274          * ATC invalidation via the SMMU.
2275          */
2276         wmb();
2277         arm_smmu_atc_inv_master(master);
2278         atomic_dec(&smmu_domain->nr_ats_masters);
2279 }
2280
2281 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2282 {
2283         int ret;
2284         int features;
2285         int num_pasids;
2286         struct pci_dev *pdev;
2287
2288         if (!dev_is_pci(master->dev))
2289                 return -ENODEV;
2290
2291         pdev = to_pci_dev(master->dev);
2292
2293         features = pci_pasid_features(pdev);
2294         if (features < 0)
2295                 return features;
2296
2297         num_pasids = pci_max_pasids(pdev);
2298         if (num_pasids <= 0)
2299                 return num_pasids;
2300
2301         ret = pci_enable_pasid(pdev, features);
2302         if (ret) {
2303                 dev_err(&pdev->dev, "Failed to enable PASID\n");
2304                 return ret;
2305         }
2306
2307         master->ssid_bits = min_t(u8, ilog2(num_pasids),
2308                                   master->smmu->ssid_bits);
2309         return 0;
2310 }
2311
2312 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2313 {
2314         struct pci_dev *pdev;
2315
2316         if (!dev_is_pci(master->dev))
2317                 return;
2318
2319         pdev = to_pci_dev(master->dev);
2320
2321         if (!pdev->pasid_enabled)
2322                 return;
2323
2324         master->ssid_bits = 0;
2325         pci_disable_pasid(pdev);
2326 }
2327
2328 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2329 {
2330         unsigned long flags;
2331         struct arm_smmu_domain *smmu_domain = master->domain;
2332
2333         if (!smmu_domain)
2334                 return;
2335
2336         arm_smmu_disable_ats(master);
2337
2338         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2339         list_del(&master->domain_head);
2340         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2341
2342         master->domain = NULL;
2343         master->ats_enabled = false;
2344         arm_smmu_install_ste_for_dev(master);
2345         /*
2346          * Clearing the CD entry isn't strictly required to detach the domain
2347          * since the table is uninstalled anyway, but it helps avoid confusion
2348          * in the call to arm_smmu_write_ctx_desc on the next attach (which
2349          * expects the entry to be empty).
2350          */
2351         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab)
2352                 arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
2353 }
2354
2355 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2356 {
2357         int ret = 0;
2358         unsigned long flags;
2359         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2360         struct arm_smmu_device *smmu;
2361         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2362         struct arm_smmu_master *master;
2363
2364         if (!fwspec)
2365                 return -ENOENT;
2366
2367         master = dev_iommu_priv_get(dev);
2368         smmu = master->smmu;
2369
2370         /*
2371          * Checking that SVA is disabled ensures that this device isn't bound to
2372          * any mm, and can be safely detached from its old domain. Bonds cannot
2373          * be removed concurrently since we're holding the group mutex.
2374          */
2375         if (arm_smmu_master_sva_enabled(master)) {
2376                 dev_err(dev, "cannot attach - SVA enabled\n");
2377                 return -EBUSY;
2378         }
2379
2380         arm_smmu_detach_dev(master);
2381
2382         mutex_lock(&smmu_domain->init_mutex);
2383
2384         if (!smmu_domain->smmu) {
2385                 smmu_domain->smmu = smmu;
2386                 ret = arm_smmu_domain_finalise(domain);
2387                 if (ret)
2388                         smmu_domain->smmu = NULL;
2389         } else if (smmu_domain->smmu != smmu)
2390                 ret = -EINVAL;
2391
2392         mutex_unlock(&smmu_domain->init_mutex);
2393         if (ret)
2394                 return ret;
2395
2396         master->domain = smmu_domain;
2397
2398         /*
2399          * The SMMU does not support enabling ATS with bypass. When the STE is
2400          * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and
2401          * Translated transactions are denied as though ATS is disabled for the
2402          * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2403          * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2404          */
2405         if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2406                 master->ats_enabled = arm_smmu_ats_supported(master);
2407
2408         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2409         list_add(&master->domain_head, &smmu_domain->devices);
2410         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2411
2412         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2413                 if (!master->cd_table.cdtab) {
2414                         ret = arm_smmu_alloc_cd_tables(master);
2415                         if (ret) {
2416                                 master->domain = NULL;
2417                                 goto out_list_del;
2418                         }
2419                 }
2420
2421                 /*
2422                  * Prevent SVA from concurrently modifying the CD or writing to
2423                  * the CD entry
2424                  */
2425                 mutex_lock(&arm_smmu_asid_lock);
2426                 ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
2427                 mutex_unlock(&arm_smmu_asid_lock);
2428                 if (ret) {
2429                         master->domain = NULL;
2430                         goto out_list_del;
2431                 }
2432         }
2433
2434         arm_smmu_install_ste_for_dev(master);
2435
2436         arm_smmu_enable_ats(master);
2437         return 0;
2438
2439 out_list_del:
2440         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2441         list_del(&master->domain_head);
2442         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2443
2444         return ret;
2445 }
2446
2447 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2448                               phys_addr_t paddr, size_t pgsize, size_t pgcount,
2449                               int prot, gfp_t gfp, size_t *mapped)
2450 {
2451         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2452
2453         if (!ops)
2454                 return -ENODEV;
2455
2456         return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2457 }
2458
2459 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2460                                    size_t pgsize, size_t pgcount,
2461                                    struct iommu_iotlb_gather *gather)
2462 {
2463         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2464         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2465
2466         if (!ops)
2467                 return 0;
2468
2469         return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2470 }
2471
2472 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2473 {
2474         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2475
2476         if (smmu_domain->smmu)
2477                 arm_smmu_tlb_inv_context(smmu_domain);
2478 }
2479
2480 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2481                                 struct iommu_iotlb_gather *gather)
2482 {
2483         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2484
2485         if (!gather->pgsize)
2486                 return;
2487
2488         arm_smmu_tlb_inv_range_domain(gather->start,
2489                                       gather->end - gather->start + 1,
2490                                       gather->pgsize, true, smmu_domain);
2491 }
2492
2493 static phys_addr_t
2494 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2495 {
2496         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2497
2498         if (!ops)
2499                 return 0;
2500
2501         return ops->iova_to_phys(ops, iova);
2502 }
2503
2504 static struct platform_driver arm_smmu_driver;
2505
2506 static
2507 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2508 {
2509         struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2510                                                           fwnode);
2511         put_device(dev);
2512         return dev ? dev_get_drvdata(dev) : NULL;
2513 }
2514
2515 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2516 {
2517         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2518
2519         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2520                 limit *= 1UL << STRTAB_SPLIT;
2521
2522         return sid < limit;
2523 }
2524
2525 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2526 {
2527         /* Check the SIDs are in range of the SMMU and our stream table */
2528         if (!arm_smmu_sid_in_range(smmu, sid))
2529                 return -ERANGE;
2530
2531         /* Ensure l2 strtab is initialised */
2532         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2533                 return arm_smmu_init_l2_strtab(smmu, sid);
2534
2535         return 0;
2536 }
2537
2538 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2539                                   struct arm_smmu_master *master)
2540 {
2541         int i;
2542         int ret = 0;
2543         struct arm_smmu_stream *new_stream, *cur_stream;
2544         struct rb_node **new_node, *parent_node = NULL;
2545         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2546
2547         master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2548                                   GFP_KERNEL);
2549         if (!master->streams)
2550                 return -ENOMEM;
2551         master->num_streams = fwspec->num_ids;
2552
2553         mutex_lock(&smmu->streams_mutex);
2554         for (i = 0; i < fwspec->num_ids; i++) {
2555                 u32 sid = fwspec->ids[i];
2556
2557                 new_stream = &master->streams[i];
2558                 new_stream->id = sid;
2559                 new_stream->master = master;
2560
2561                 ret = arm_smmu_init_sid_strtab(smmu, sid);
2562                 if (ret)
2563                         break;
2564
2565                 /* Insert into SID tree */
2566                 new_node = &(smmu->streams.rb_node);
2567                 while (*new_node) {
2568                         cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2569                                               node);
2570                         parent_node = *new_node;
2571                         if (cur_stream->id > new_stream->id) {
2572                                 new_node = &((*new_node)->rb_left);
2573                         } else if (cur_stream->id < new_stream->id) {
2574                                 new_node = &((*new_node)->rb_right);
2575                         } else {
2576                                 dev_warn(master->dev,
2577                                          "stream %u already in tree\n",
2578                                          cur_stream->id);
2579                                 ret = -EINVAL;
2580                                 break;
2581                         }
2582                 }
2583                 if (ret)
2584                         break;
2585
2586                 rb_link_node(&new_stream->node, parent_node, new_node);
2587                 rb_insert_color(&new_stream->node, &smmu->streams);
2588         }
2589
2590         if (ret) {
2591                 for (i--; i >= 0; i--)
2592                         rb_erase(&master->streams[i].node, &smmu->streams);
2593                 kfree(master->streams);
2594         }
2595         mutex_unlock(&smmu->streams_mutex);
2596
2597         return ret;
2598 }
2599
2600 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2601 {
2602         int i;
2603         struct arm_smmu_device *smmu = master->smmu;
2604         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2605
2606         if (!smmu || !master->streams)
2607                 return;
2608
2609         mutex_lock(&smmu->streams_mutex);
2610         for (i = 0; i < fwspec->num_ids; i++)
2611                 rb_erase(&master->streams[i].node, &smmu->streams);
2612         mutex_unlock(&smmu->streams_mutex);
2613
2614         kfree(master->streams);
2615 }
2616
2617 static struct iommu_ops arm_smmu_ops;
2618
2619 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2620 {
2621         int ret;
2622         struct arm_smmu_device *smmu;
2623         struct arm_smmu_master *master;
2624         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2625
2626         if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2627                 return ERR_PTR(-EBUSY);
2628
2629         smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2630         if (!smmu)
2631                 return ERR_PTR(-ENODEV);
2632
2633         master = kzalloc(sizeof(*master), GFP_KERNEL);
2634         if (!master)
2635                 return ERR_PTR(-ENOMEM);
2636
2637         master->dev = dev;
2638         master->smmu = smmu;
2639         INIT_LIST_HEAD(&master->bonds);
2640         dev_iommu_priv_set(dev, master);
2641
2642         ret = arm_smmu_insert_master(smmu, master);
2643         if (ret)
2644                 goto err_free_master;
2645
2646         device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2647         master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2648
2649         /*
2650          * Note that PASID must be enabled before, and disabled after ATS:
2651          * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2652          *
2653          *   Behavior is undefined if this bit is Set and the value of the PASID
2654          *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2655          *   are changed.
2656          */
2657         arm_smmu_enable_pasid(master);
2658
2659         if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2660                 master->ssid_bits = min_t(u8, master->ssid_bits,
2661                                           CTXDESC_LINEAR_CDMAX);
2662
2663         if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2664              device_property_read_bool(dev, "dma-can-stall")) ||
2665             smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2666                 master->stall_enabled = true;
2667
2668         return &smmu->iommu;
2669
2670 err_free_master:
2671         kfree(master);
2672         return ERR_PTR(ret);
2673 }
2674
2675 static void arm_smmu_release_device(struct device *dev)
2676 {
2677         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2678
2679         if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2680                 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2681         arm_smmu_detach_dev(master);
2682         arm_smmu_disable_pasid(master);
2683         arm_smmu_remove_master(master);
2684         if (master->cd_table.cdtab)
2685                 arm_smmu_free_cd_tables(master);
2686         kfree(master);
2687 }
2688
2689 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2690 {
2691         struct iommu_group *group;
2692
2693         /*
2694          * We don't support devices sharing stream IDs other than PCI RID
2695          * aliases, since the necessary ID-to-device lookup becomes rather
2696          * impractical given a potential sparse 32-bit stream ID space.
2697          */
2698         if (dev_is_pci(dev))
2699                 group = pci_device_group(dev);
2700         else
2701                 group = generic_device_group(dev);
2702
2703         return group;
2704 }
2705
2706 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2707 {
2708         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2709         int ret = 0;
2710
2711         mutex_lock(&smmu_domain->init_mutex);
2712         if (smmu_domain->smmu)
2713                 ret = -EPERM;
2714         else
2715                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2716         mutex_unlock(&smmu_domain->init_mutex);
2717
2718         return ret;
2719 }
2720
2721 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2722 {
2723         return iommu_fwspec_add_ids(dev, args->args, 1);
2724 }
2725
2726 static void arm_smmu_get_resv_regions(struct device *dev,
2727                                       struct list_head *head)
2728 {
2729         struct iommu_resv_region *region;
2730         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2731
2732         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2733                                          prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2734         if (!region)
2735                 return;
2736
2737         list_add_tail(&region->list, head);
2738
2739         iommu_dma_get_resv_regions(dev, head);
2740 }
2741
2742 static int arm_smmu_dev_enable_feature(struct device *dev,
2743                                        enum iommu_dev_features feat)
2744 {
2745         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2746
2747         if (!master)
2748                 return -ENODEV;
2749
2750         switch (feat) {
2751         case IOMMU_DEV_FEAT_IOPF:
2752                 if (!arm_smmu_master_iopf_supported(master))
2753                         return -EINVAL;
2754                 if (master->iopf_enabled)
2755                         return -EBUSY;
2756                 master->iopf_enabled = true;
2757                 return 0;
2758         case IOMMU_DEV_FEAT_SVA:
2759                 if (!arm_smmu_master_sva_supported(master))
2760                         return -EINVAL;
2761                 if (arm_smmu_master_sva_enabled(master))
2762                         return -EBUSY;
2763                 return arm_smmu_master_enable_sva(master);
2764         default:
2765                 return -EINVAL;
2766         }
2767 }
2768
2769 static int arm_smmu_dev_disable_feature(struct device *dev,
2770                                         enum iommu_dev_features feat)
2771 {
2772         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2773
2774         if (!master)
2775                 return -EINVAL;
2776
2777         switch (feat) {
2778         case IOMMU_DEV_FEAT_IOPF:
2779                 if (!master->iopf_enabled)
2780                         return -EINVAL;
2781                 if (master->sva_enabled)
2782                         return -EBUSY;
2783                 master->iopf_enabled = false;
2784                 return 0;
2785         case IOMMU_DEV_FEAT_SVA:
2786                 if (!arm_smmu_master_sva_enabled(master))
2787                         return -EINVAL;
2788                 return arm_smmu_master_disable_sva(master);
2789         default:
2790                 return -EINVAL;
2791         }
2792 }
2793
2794 /*
2795  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
2796  * PCIe link and save the data to memory by DMA. The hardware is restricted to
2797  * use identity mapping only.
2798  */
2799 #define IS_HISI_PTT_DEVICE(pdev)        ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
2800                                          (pdev)->device == 0xa12e)
2801
2802 static int arm_smmu_def_domain_type(struct device *dev)
2803 {
2804         if (dev_is_pci(dev)) {
2805                 struct pci_dev *pdev = to_pci_dev(dev);
2806
2807                 if (IS_HISI_PTT_DEVICE(pdev))
2808                         return IOMMU_DOMAIN_IDENTITY;
2809         }
2810
2811         return 0;
2812 }
2813
2814 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
2815 {
2816         struct iommu_domain *domain;
2817
2818         domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
2819         if (WARN_ON(IS_ERR(domain)) || !domain)
2820                 return;
2821
2822         arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
2823 }
2824
2825 static struct iommu_ops arm_smmu_ops = {
2826         .capable                = arm_smmu_capable,
2827         .domain_alloc           = arm_smmu_domain_alloc,
2828         .probe_device           = arm_smmu_probe_device,
2829         .release_device         = arm_smmu_release_device,
2830         .device_group           = arm_smmu_device_group,
2831         .of_xlate               = arm_smmu_of_xlate,
2832         .get_resv_regions       = arm_smmu_get_resv_regions,
2833         .remove_dev_pasid       = arm_smmu_remove_dev_pasid,
2834         .dev_enable_feat        = arm_smmu_dev_enable_feature,
2835         .dev_disable_feat       = arm_smmu_dev_disable_feature,
2836         .page_response          = arm_smmu_page_response,
2837         .def_domain_type        = arm_smmu_def_domain_type,
2838         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
2839         .owner                  = THIS_MODULE,
2840         .default_domain_ops = &(const struct iommu_domain_ops) {
2841                 .attach_dev             = arm_smmu_attach_dev,
2842                 .map_pages              = arm_smmu_map_pages,
2843                 .unmap_pages            = arm_smmu_unmap_pages,
2844                 .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
2845                 .iotlb_sync             = arm_smmu_iotlb_sync,
2846                 .iova_to_phys           = arm_smmu_iova_to_phys,
2847                 .enable_nesting         = arm_smmu_enable_nesting,
2848                 .free                   = arm_smmu_domain_free,
2849         }
2850 };
2851
2852 /* Probing and initialisation functions */
2853 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2854                                    struct arm_smmu_queue *q,
2855                                    void __iomem *page,
2856                                    unsigned long prod_off,
2857                                    unsigned long cons_off,
2858                                    size_t dwords, const char *name)
2859 {
2860         size_t qsz;
2861
2862         do {
2863                 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2864                 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2865                                               GFP_KERNEL);
2866                 if (q->base || qsz < PAGE_SIZE)
2867                         break;
2868
2869                 q->llq.max_n_shift--;
2870         } while (1);
2871
2872         if (!q->base) {
2873                 dev_err(smmu->dev,
2874                         "failed to allocate queue (0x%zx bytes) for %s\n",
2875                         qsz, name);
2876                 return -ENOMEM;
2877         }
2878
2879         if (!WARN_ON(q->base_dma & (qsz - 1))) {
2880                 dev_info(smmu->dev, "allocated %u entries for %s\n",
2881                          1 << q->llq.max_n_shift, name);
2882         }
2883
2884         q->prod_reg     = page + prod_off;
2885         q->cons_reg     = page + cons_off;
2886         q->ent_dwords   = dwords;
2887
2888         q->q_base  = Q_BASE_RWA;
2889         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2890         q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2891
2892         q->llq.prod = q->llq.cons = 0;
2893         return 0;
2894 }
2895
2896 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2897 {
2898         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2899         unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2900
2901         atomic_set(&cmdq->owner_prod, 0);
2902         atomic_set(&cmdq->lock, 0);
2903
2904         cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2905                                                               GFP_KERNEL);
2906         if (!cmdq->valid_map)
2907                 return -ENOMEM;
2908
2909         return 0;
2910 }
2911
2912 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2913 {
2914         int ret;
2915
2916         /* cmdq */
2917         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2918                                       ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2919                                       CMDQ_ENT_DWORDS, "cmdq");
2920         if (ret)
2921                 return ret;
2922
2923         ret = arm_smmu_cmdq_init(smmu);
2924         if (ret)
2925                 return ret;
2926
2927         /* evtq */
2928         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2929                                       ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2930                                       EVTQ_ENT_DWORDS, "evtq");
2931         if (ret)
2932                 return ret;
2933
2934         if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2935             (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2936                 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2937                 if (!smmu->evtq.iopf)
2938                         return -ENOMEM;
2939         }
2940
2941         /* priq */
2942         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2943                 return 0;
2944
2945         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2946                                        ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2947                                        PRIQ_ENT_DWORDS, "priq");
2948 }
2949
2950 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2951 {
2952         unsigned int i;
2953         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2954         void *strtab = smmu->strtab_cfg.strtab;
2955
2956         cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
2957                                     sizeof(*cfg->l1_desc), GFP_KERNEL);
2958         if (!cfg->l1_desc)
2959                 return -ENOMEM;
2960
2961         for (i = 0; i < cfg->num_l1_ents; ++i) {
2962                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2963                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2964         }
2965
2966         return 0;
2967 }
2968
2969 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2970 {
2971         void *strtab;
2972         u64 reg;
2973         u32 size, l1size;
2974         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2975
2976         /* Calculate the L1 size, capped to the SIDSIZE. */
2977         size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2978         size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2979         cfg->num_l1_ents = 1 << size;
2980
2981         size += STRTAB_SPLIT;
2982         if (size < smmu->sid_bits)
2983                 dev_warn(smmu->dev,
2984                          "2-level strtab only covers %u/%u bits of SID\n",
2985                          size, smmu->sid_bits);
2986
2987         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2988         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2989                                      GFP_KERNEL);
2990         if (!strtab) {
2991                 dev_err(smmu->dev,
2992                         "failed to allocate l1 stream table (%u bytes)\n",
2993                         l1size);
2994                 return -ENOMEM;
2995         }
2996         cfg->strtab = strtab;
2997
2998         /* Configure strtab_base_cfg for 2 levels */
2999         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3000         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3001         reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3002         cfg->strtab_base_cfg = reg;
3003
3004         return arm_smmu_init_l1_strtab(smmu);
3005 }
3006
3007 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3008 {
3009         void *strtab;
3010         u64 reg;
3011         u32 size;
3012         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3013
3014         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3015         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3016                                      GFP_KERNEL);
3017         if (!strtab) {
3018                 dev_err(smmu->dev,
3019                         "failed to allocate linear stream table (%u bytes)\n",
3020                         size);
3021                 return -ENOMEM;
3022         }
3023         cfg->strtab = strtab;
3024         cfg->num_l1_ents = 1 << smmu->sid_bits;
3025
3026         /* Configure strtab_base_cfg for a linear table covering all SIDs */
3027         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3028         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3029         cfg->strtab_base_cfg = reg;
3030
3031         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3032         return 0;
3033 }
3034
3035 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3036 {
3037         u64 reg;
3038         int ret;
3039
3040         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3041                 ret = arm_smmu_init_strtab_2lvl(smmu);
3042         else
3043                 ret = arm_smmu_init_strtab_linear(smmu);
3044
3045         if (ret)
3046                 return ret;
3047
3048         /* Set the strtab base address */
3049         reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3050         reg |= STRTAB_BASE_RA;
3051         smmu->strtab_cfg.strtab_base = reg;
3052
3053         ida_init(&smmu->vmid_map);
3054
3055         return 0;
3056 }
3057
3058 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3059 {
3060         int ret;
3061
3062         mutex_init(&smmu->streams_mutex);
3063         smmu->streams = RB_ROOT;
3064
3065         ret = arm_smmu_init_queues(smmu);
3066         if (ret)
3067                 return ret;
3068
3069         return arm_smmu_init_strtab(smmu);
3070 }
3071
3072 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3073                                    unsigned int reg_off, unsigned int ack_off)
3074 {
3075         u32 reg;
3076
3077         writel_relaxed(val, smmu->base + reg_off);
3078         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3079                                           1, ARM_SMMU_POLL_TIMEOUT_US);
3080 }
3081
3082 /* GBPA is "special" */
3083 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3084 {
3085         int ret;
3086         u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3087
3088         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3089                                          1, ARM_SMMU_POLL_TIMEOUT_US);
3090         if (ret)
3091                 return ret;
3092
3093         reg &= ~clr;
3094         reg |= set;
3095         writel_relaxed(reg | GBPA_UPDATE, gbpa);
3096         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3097                                          1, ARM_SMMU_POLL_TIMEOUT_US);
3098
3099         if (ret)
3100                 dev_err(smmu->dev, "GBPA not responding to update\n");
3101         return ret;
3102 }
3103
3104 static void arm_smmu_free_msis(void *data)
3105 {
3106         struct device *dev = data;
3107         platform_msi_domain_free_irqs(dev);
3108 }
3109
3110 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3111 {
3112         phys_addr_t doorbell;
3113         struct device *dev = msi_desc_to_dev(desc);
3114         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3115         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3116
3117         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3118         doorbell &= MSI_CFG0_ADDR_MASK;
3119
3120         writeq_relaxed(doorbell, smmu->base + cfg[0]);
3121         writel_relaxed(msg->data, smmu->base + cfg[1]);
3122         writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3123 }
3124
3125 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3126 {
3127         int ret, nvec = ARM_SMMU_MAX_MSIS;
3128         struct device *dev = smmu->dev;
3129
3130         /* Clear the MSI address regs */
3131         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3132         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3133
3134         if (smmu->features & ARM_SMMU_FEAT_PRI)
3135                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3136         else
3137                 nvec--;
3138
3139         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3140                 return;
3141
3142         if (!dev->msi.domain) {
3143                 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3144                 return;
3145         }
3146
3147         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3148         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3149         if (ret) {
3150                 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3151                 return;
3152         }
3153
3154         smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3155         smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3156         smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3157
3158         /* Add callback to free MSIs on teardown */
3159         devm_add_action(dev, arm_smmu_free_msis, dev);
3160 }
3161
3162 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3163 {
3164         int irq, ret;
3165
3166         arm_smmu_setup_msis(smmu);
3167
3168         /* Request interrupt lines */
3169         irq = smmu->evtq.q.irq;
3170         if (irq) {
3171                 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3172                                                 arm_smmu_evtq_thread,
3173                                                 IRQF_ONESHOT,
3174                                                 "arm-smmu-v3-evtq", smmu);
3175                 if (ret < 0)
3176                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
3177         } else {
3178                 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3179         }
3180
3181         irq = smmu->gerr_irq;
3182         if (irq) {
3183                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3184                                        0, "arm-smmu-v3-gerror", smmu);
3185                 if (ret < 0)
3186                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
3187         } else {
3188                 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3189         }
3190
3191         if (smmu->features & ARM_SMMU_FEAT_PRI) {
3192                 irq = smmu->priq.q.irq;
3193                 if (irq) {
3194                         ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3195                                                         arm_smmu_priq_thread,
3196                                                         IRQF_ONESHOT,
3197                                                         "arm-smmu-v3-priq",
3198                                                         smmu);
3199                         if (ret < 0)
3200                                 dev_warn(smmu->dev,
3201                                          "failed to enable priq irq\n");
3202                 } else {
3203                         dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3204                 }
3205         }
3206 }
3207
3208 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3209 {
3210         int ret, irq;
3211         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3212
3213         /* Disable IRQs first */
3214         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3215                                       ARM_SMMU_IRQ_CTRLACK);
3216         if (ret) {
3217                 dev_err(smmu->dev, "failed to disable irqs\n");
3218                 return ret;
3219         }
3220
3221         irq = smmu->combined_irq;
3222         if (irq) {
3223                 /*
3224                  * Cavium ThunderX2 implementation doesn't support unique irq
3225                  * lines. Use a single irq line for all the SMMUv3 interrupts.
3226                  */
3227                 ret = devm_request_threaded_irq(smmu->dev, irq,
3228                                         arm_smmu_combined_irq_handler,
3229                                         arm_smmu_combined_irq_thread,
3230                                         IRQF_ONESHOT,
3231                                         "arm-smmu-v3-combined-irq", smmu);
3232                 if (ret < 0)
3233                         dev_warn(smmu->dev, "failed to enable combined irq\n");
3234         } else
3235                 arm_smmu_setup_unique_irqs(smmu);
3236
3237         if (smmu->features & ARM_SMMU_FEAT_PRI)
3238                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3239
3240         /* Enable interrupt generation on the SMMU */
3241         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3242                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3243         if (ret)
3244                 dev_warn(smmu->dev, "failed to enable irqs\n");
3245
3246         return 0;
3247 }
3248
3249 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3250 {
3251         int ret;
3252
3253         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3254         if (ret)
3255                 dev_err(smmu->dev, "failed to clear cr0\n");
3256
3257         return ret;
3258 }
3259
3260 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3261 {
3262         int ret;
3263         u32 reg, enables;
3264         struct arm_smmu_cmdq_ent cmd;
3265
3266         /* Clear CR0 and sync (disables SMMU and queue processing) */
3267         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3268         if (reg & CR0_SMMUEN) {
3269                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3270                 WARN_ON(is_kdump_kernel() && !disable_bypass);
3271                 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3272         }
3273
3274         ret = arm_smmu_device_disable(smmu);
3275         if (ret)
3276                 return ret;
3277
3278         /* CR1 (table and queue memory attributes) */
3279         reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3280               FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3281               FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3282               FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3283               FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3284               FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3285         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3286
3287         /* CR2 (random crap) */
3288         reg = CR2_PTM | CR2_RECINVSID;
3289
3290         if (smmu->features & ARM_SMMU_FEAT_E2H)
3291                 reg |= CR2_E2H;
3292
3293         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3294
3295         /* Stream table */
3296         writeq_relaxed(smmu->strtab_cfg.strtab_base,
3297                        smmu->base + ARM_SMMU_STRTAB_BASE);
3298         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3299                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3300
3301         /* Command queue */
3302         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3303         writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3304         writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3305
3306         enables = CR0_CMDQEN;
3307         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3308                                       ARM_SMMU_CR0ACK);
3309         if (ret) {
3310                 dev_err(smmu->dev, "failed to enable command queue\n");
3311                 return ret;
3312         }
3313
3314         /* Invalidate any cached configuration */
3315         cmd.opcode = CMDQ_OP_CFGI_ALL;
3316         arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3317
3318         /* Invalidate any stale TLB entries */
3319         if (smmu->features & ARM_SMMU_FEAT_HYP) {
3320                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3321                 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3322         }
3323
3324         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3325         arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3326
3327         /* Event queue */
3328         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3329         writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3330         writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3331
3332         enables |= CR0_EVTQEN;
3333         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3334                                       ARM_SMMU_CR0ACK);
3335         if (ret) {
3336                 dev_err(smmu->dev, "failed to enable event queue\n");
3337                 return ret;
3338         }
3339
3340         /* PRI queue */
3341         if (smmu->features & ARM_SMMU_FEAT_PRI) {
3342                 writeq_relaxed(smmu->priq.q.q_base,
3343                                smmu->base + ARM_SMMU_PRIQ_BASE);
3344                 writel_relaxed(smmu->priq.q.llq.prod,
3345                                smmu->page1 + ARM_SMMU_PRIQ_PROD);
3346                 writel_relaxed(smmu->priq.q.llq.cons,
3347                                smmu->page1 + ARM_SMMU_PRIQ_CONS);
3348
3349                 enables |= CR0_PRIQEN;
3350                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3351                                               ARM_SMMU_CR0ACK);
3352                 if (ret) {
3353                         dev_err(smmu->dev, "failed to enable PRI queue\n");
3354                         return ret;
3355                 }
3356         }
3357
3358         if (smmu->features & ARM_SMMU_FEAT_ATS) {
3359                 enables |= CR0_ATSCHK;
3360                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3361                                               ARM_SMMU_CR0ACK);
3362                 if (ret) {
3363                         dev_err(smmu->dev, "failed to enable ATS check\n");
3364                         return ret;
3365                 }
3366         }
3367
3368         ret = arm_smmu_setup_irqs(smmu);
3369         if (ret) {
3370                 dev_err(smmu->dev, "failed to setup irqs\n");
3371                 return ret;
3372         }
3373
3374         if (is_kdump_kernel())
3375                 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3376
3377         /* Enable the SMMU interface, or ensure bypass */
3378         if (!bypass || disable_bypass) {
3379                 enables |= CR0_SMMUEN;
3380         } else {
3381                 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3382                 if (ret)
3383                         return ret;
3384         }
3385         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3386                                       ARM_SMMU_CR0ACK);
3387         if (ret) {
3388                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3389                 return ret;
3390         }
3391
3392         return 0;
3393 }
3394
3395 #define IIDR_IMPLEMENTER_ARM            0x43b
3396 #define IIDR_PRODUCTID_ARM_MMU_600      0x483
3397 #define IIDR_PRODUCTID_ARM_MMU_700      0x487
3398
3399 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3400 {
3401         u32 reg;
3402         unsigned int implementer, productid, variant, revision;
3403
3404         reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3405         implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3406         productid = FIELD_GET(IIDR_PRODUCTID, reg);
3407         variant = FIELD_GET(IIDR_VARIANT, reg);
3408         revision = FIELD_GET(IIDR_REVISION, reg);
3409
3410         switch (implementer) {
3411         case IIDR_IMPLEMENTER_ARM:
3412                 switch (productid) {
3413                 case IIDR_PRODUCTID_ARM_MMU_600:
3414                         /* Arm erratum 1076982 */
3415                         if (variant == 0 && revision <= 2)
3416                                 smmu->features &= ~ARM_SMMU_FEAT_SEV;
3417                         /* Arm erratum 1209401 */
3418                         if (variant < 2)
3419                                 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3420                         break;
3421                 case IIDR_PRODUCTID_ARM_MMU_700:
3422                         /* Arm erratum 2812531 */
3423                         smmu->features &= ~ARM_SMMU_FEAT_BTM;
3424                         smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3425                         /* Arm errata 2268618, 2812531 */
3426                         smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3427                         break;
3428                 }
3429                 break;
3430         }
3431 }
3432
3433 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3434 {
3435         u32 reg;
3436         bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3437
3438         /* IDR0 */
3439         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3440
3441         /* 2-level structures */
3442         if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3443                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3444
3445         if (reg & IDR0_CD2L)
3446                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3447
3448         /*
3449          * Translation table endianness.
3450          * We currently require the same endianness as the CPU, but this
3451          * could be changed later by adding a new IO_PGTABLE_QUIRK.
3452          */
3453         switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3454         case IDR0_TTENDIAN_MIXED:
3455                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3456                 break;
3457 #ifdef __BIG_ENDIAN
3458         case IDR0_TTENDIAN_BE:
3459                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3460                 break;
3461 #else
3462         case IDR0_TTENDIAN_LE:
3463                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3464                 break;
3465 #endif
3466         default:
3467                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3468                 return -ENXIO;
3469         }
3470
3471         /* Boolean feature flags */
3472         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3473                 smmu->features |= ARM_SMMU_FEAT_PRI;
3474
3475         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3476                 smmu->features |= ARM_SMMU_FEAT_ATS;
3477
3478         if (reg & IDR0_SEV)
3479                 smmu->features |= ARM_SMMU_FEAT_SEV;
3480
3481         if (reg & IDR0_MSI) {
3482                 smmu->features |= ARM_SMMU_FEAT_MSI;
3483                 if (coherent && !disable_msipolling)
3484                         smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3485         }
3486
3487         if (reg & IDR0_HYP) {
3488                 smmu->features |= ARM_SMMU_FEAT_HYP;
3489                 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3490                         smmu->features |= ARM_SMMU_FEAT_E2H;
3491         }
3492
3493         /*
3494          * The coherency feature as set by FW is used in preference to the ID
3495          * register, but warn on mismatch.
3496          */
3497         if (!!(reg & IDR0_COHACC) != coherent)
3498                 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3499                          coherent ? "true" : "false");
3500
3501         switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3502         case IDR0_STALL_MODEL_FORCE:
3503                 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3504                 fallthrough;
3505         case IDR0_STALL_MODEL_STALL:
3506                 smmu->features |= ARM_SMMU_FEAT_STALLS;
3507         }
3508
3509         if (reg & IDR0_S1P)
3510                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3511
3512         if (reg & IDR0_S2P)
3513                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3514
3515         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3516                 dev_err(smmu->dev, "no translation support!\n");
3517                 return -ENXIO;
3518         }
3519
3520         /* We only support the AArch64 table format at present */
3521         switch (FIELD_GET(IDR0_TTF, reg)) {
3522         case IDR0_TTF_AARCH32_64:
3523                 smmu->ias = 40;
3524                 fallthrough;
3525         case IDR0_TTF_AARCH64:
3526                 break;
3527         default:
3528                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3529                 return -ENXIO;
3530         }
3531
3532         /* ASID/VMID sizes */
3533         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3534         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3535
3536         /* IDR1 */
3537         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3538         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3539                 dev_err(smmu->dev, "embedded implementation not supported\n");
3540                 return -ENXIO;
3541         }
3542
3543         /* Queue sizes, capped to ensure natural alignment */
3544         smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3545                                              FIELD_GET(IDR1_CMDQS, reg));
3546         if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3547                 /*
3548                  * We don't support splitting up batches, so one batch of
3549                  * commands plus an extra sync needs to fit inside the command
3550                  * queue. There's also no way we can handle the weird alignment
3551                  * restrictions on the base pointer for a unit-length queue.
3552                  */
3553                 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3554                         CMDQ_BATCH_ENTRIES);
3555                 return -ENXIO;
3556         }
3557
3558         smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3559                                              FIELD_GET(IDR1_EVTQS, reg));
3560         smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3561                                              FIELD_GET(IDR1_PRIQS, reg));
3562
3563         /* SID/SSID sizes */
3564         smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3565         smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3566         smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3567
3568         /*
3569          * If the SMMU supports fewer bits than would fill a single L2 stream
3570          * table, use a linear table instead.
3571          */
3572         if (smmu->sid_bits <= STRTAB_SPLIT)
3573                 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3574
3575         /* IDR3 */
3576         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3577         if (FIELD_GET(IDR3_RIL, reg))
3578                 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3579
3580         /* IDR5 */
3581         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3582
3583         /* Maximum number of outstanding stalls */
3584         smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3585
3586         /* Page sizes */
3587         if (reg & IDR5_GRAN64K)
3588                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3589         if (reg & IDR5_GRAN16K)
3590                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3591         if (reg & IDR5_GRAN4K)
3592                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3593
3594         /* Input address size */
3595         if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3596                 smmu->features |= ARM_SMMU_FEAT_VAX;
3597
3598         /* Output address size */
3599         switch (FIELD_GET(IDR5_OAS, reg)) {
3600         case IDR5_OAS_32_BIT:
3601                 smmu->oas = 32;
3602                 break;
3603         case IDR5_OAS_36_BIT:
3604                 smmu->oas = 36;
3605                 break;
3606         case IDR5_OAS_40_BIT:
3607                 smmu->oas = 40;
3608                 break;
3609         case IDR5_OAS_42_BIT:
3610                 smmu->oas = 42;
3611                 break;
3612         case IDR5_OAS_44_BIT:
3613                 smmu->oas = 44;
3614                 break;
3615         case IDR5_OAS_52_BIT:
3616                 smmu->oas = 52;
3617                 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3618                 break;
3619         default:
3620                 dev_info(smmu->dev,
3621                         "unknown output address size. Truncating to 48-bit\n");
3622                 fallthrough;
3623         case IDR5_OAS_48_BIT:
3624                 smmu->oas = 48;
3625         }
3626
3627         if (arm_smmu_ops.pgsize_bitmap == -1UL)
3628                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3629         else
3630                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3631
3632         /* Set the DMA mask for our table walker */
3633         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3634                 dev_warn(smmu->dev,
3635                          "failed to set DMA mask for table walker\n");
3636
3637         smmu->ias = max(smmu->ias, smmu->oas);
3638
3639         if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3640             (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3641                 smmu->features |= ARM_SMMU_FEAT_NESTING;
3642
3643         arm_smmu_device_iidr_probe(smmu);
3644
3645         if (arm_smmu_sva_supported(smmu))
3646                 smmu->features |= ARM_SMMU_FEAT_SVA;
3647
3648         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3649                  smmu->ias, smmu->oas, smmu->features);
3650         return 0;
3651 }
3652
3653 #ifdef CONFIG_ACPI
3654 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3655 {
3656         switch (model) {
3657         case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3658                 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3659                 break;
3660         case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3661                 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3662                 break;
3663         }
3664
3665         dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3666 }
3667
3668 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3669                                       struct arm_smmu_device *smmu)
3670 {
3671         struct acpi_iort_smmu_v3 *iort_smmu;
3672         struct device *dev = smmu->dev;
3673         struct acpi_iort_node *node;
3674
3675         node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3676
3677         /* Retrieve SMMUv3 specific data */
3678         iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3679
3680         acpi_smmu_get_options(iort_smmu->model, smmu);
3681
3682         if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3683                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3684
3685         return 0;
3686 }
3687 #else
3688 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3689                                              struct arm_smmu_device *smmu)
3690 {
3691         return -ENODEV;
3692 }
3693 #endif
3694
3695 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3696                                     struct arm_smmu_device *smmu)
3697 {
3698         struct device *dev = &pdev->dev;
3699         u32 cells;
3700         int ret = -EINVAL;
3701
3702         if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3703                 dev_err(dev, "missing #iommu-cells property\n");
3704         else if (cells != 1)
3705                 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3706         else
3707                 ret = 0;
3708
3709         parse_driver_options(smmu);
3710
3711         if (of_dma_is_coherent(dev->of_node))
3712                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3713
3714         return ret;
3715 }
3716
3717 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3718 {
3719         if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3720                 return SZ_64K;
3721         else
3722                 return SZ_128K;
3723 }
3724
3725 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3726                                       resource_size_t size)
3727 {
3728         struct resource res = DEFINE_RES_MEM(start, size);
3729
3730         return devm_ioremap_resource(dev, &res);
3731 }
3732
3733 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3734 {
3735         struct list_head rmr_list;
3736         struct iommu_resv_region *e;
3737
3738         INIT_LIST_HEAD(&rmr_list);
3739         iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3740
3741         list_for_each_entry(e, &rmr_list, list) {
3742                 struct arm_smmu_ste *step;
3743                 struct iommu_iort_rmr_data *rmr;
3744                 int ret, i;
3745
3746                 rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3747                 for (i = 0; i < rmr->num_sids; i++) {
3748                         ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3749                         if (ret) {
3750                                 dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3751                                         rmr->sids[i]);
3752                                 continue;
3753                         }
3754
3755                         step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3756                         arm_smmu_init_bypass_stes(step, 1, true);
3757                 }
3758         }
3759
3760         iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3761 }
3762
3763 static int arm_smmu_device_probe(struct platform_device *pdev)
3764 {
3765         int irq, ret;
3766         struct resource *res;
3767         resource_size_t ioaddr;
3768         struct arm_smmu_device *smmu;
3769         struct device *dev = &pdev->dev;
3770         bool bypass;
3771
3772         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3773         if (!smmu)
3774                 return -ENOMEM;
3775         smmu->dev = dev;
3776
3777         if (dev->of_node) {
3778                 ret = arm_smmu_device_dt_probe(pdev, smmu);
3779         } else {
3780                 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3781                 if (ret == -ENODEV)
3782                         return ret;
3783         }
3784
3785         /* Set bypass mode according to firmware probing result */
3786         bypass = !!ret;
3787
3788         /* Base address */
3789         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3790         if (!res)
3791                 return -EINVAL;
3792         if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3793                 dev_err(dev, "MMIO region too small (%pr)\n", res);
3794                 return -EINVAL;
3795         }
3796         ioaddr = res->start;
3797
3798         /*
3799          * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3800          * the PMCG registers which are reserved by the PMU driver.
3801          */
3802         smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3803         if (IS_ERR(smmu->base))
3804                 return PTR_ERR(smmu->base);
3805
3806         if (arm_smmu_resource_size(smmu) > SZ_64K) {
3807                 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3808                                                ARM_SMMU_REG_SZ);
3809                 if (IS_ERR(smmu->page1))
3810                         return PTR_ERR(smmu->page1);
3811         } else {
3812                 smmu->page1 = smmu->base;
3813         }
3814
3815         /* Interrupt lines */
3816
3817         irq = platform_get_irq_byname_optional(pdev, "combined");
3818         if (irq > 0)
3819                 smmu->combined_irq = irq;
3820         else {
3821                 irq = platform_get_irq_byname_optional(pdev, "eventq");
3822                 if (irq > 0)
3823                         smmu->evtq.q.irq = irq;
3824
3825                 irq = platform_get_irq_byname_optional(pdev, "priq");
3826                 if (irq > 0)
3827                         smmu->priq.q.irq = irq;
3828
3829                 irq = platform_get_irq_byname_optional(pdev, "gerror");
3830                 if (irq > 0)
3831                         smmu->gerr_irq = irq;
3832         }
3833         /* Probe the h/w */
3834         ret = arm_smmu_device_hw_probe(smmu);
3835         if (ret)
3836                 return ret;
3837
3838         /* Initialise in-memory data structures */
3839         ret = arm_smmu_init_structures(smmu);
3840         if (ret)
3841                 return ret;
3842
3843         /* Record our private device structure */
3844         platform_set_drvdata(pdev, smmu);
3845
3846         /* Check for RMRs and install bypass STEs if any */
3847         arm_smmu_rmr_install_bypass_ste(smmu);
3848
3849         /* Reset the device */
3850         ret = arm_smmu_device_reset(smmu, bypass);
3851         if (ret)
3852                 return ret;
3853
3854         /* And we're up. Go go go! */
3855         ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3856                                      "smmu3.%pa", &ioaddr);
3857         if (ret)
3858                 return ret;
3859
3860         ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3861         if (ret) {
3862                 dev_err(dev, "Failed to register iommu\n");
3863                 iommu_device_sysfs_remove(&smmu->iommu);
3864                 return ret;
3865         }
3866
3867         return 0;
3868 }
3869
3870 static void arm_smmu_device_remove(struct platform_device *pdev)
3871 {
3872         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3873
3874         iommu_device_unregister(&smmu->iommu);
3875         iommu_device_sysfs_remove(&smmu->iommu);
3876         arm_smmu_device_disable(smmu);
3877         iopf_queue_free(smmu->evtq.iopf);
3878         ida_destroy(&smmu->vmid_map);
3879 }
3880
3881 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3882 {
3883         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3884
3885         arm_smmu_device_disable(smmu);
3886 }
3887
3888 static const struct of_device_id arm_smmu_of_match[] = {
3889         { .compatible = "arm,smmu-v3", },
3890         { },
3891 };
3892 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3893
3894 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3895 {
3896         arm_smmu_sva_notifier_synchronize();
3897         platform_driver_unregister(drv);
3898 }
3899
3900 static struct platform_driver arm_smmu_driver = {
3901         .driver = {
3902                 .name                   = "arm-smmu-v3",
3903                 .of_match_table         = arm_smmu_of_match,
3904                 .suppress_bind_attrs    = true,
3905         },
3906         .probe  = arm_smmu_device_probe,
3907         .remove_new = arm_smmu_device_remove,
3908         .shutdown = arm_smmu_device_shutdown,
3909 };
3910 module_driver(arm_smmu_driver, platform_driver_register,
3911               arm_smmu_driver_unregister);
3912
3913 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3914 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3915 MODULE_ALIAS("platform:arm-smmu-v3");
3916 MODULE_LICENSE("GPL v2");