Merge tag 'asoc-fix-v5.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/brooni...
[linux-2.6-microblaze.git] / drivers / iommu / arm-smmu-v3.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitfield.h>
15 #include <linux/bitops.h>
16 #include <linux/crash_dump.h>
17 #include <linux/delay.h>
18 #include <linux/dma-iommu.h>
19 #include <linux/err.h>
20 #include <linux/interrupt.h>
21 #include <linux/iommu.h>
22 #include <linux/iopoll.h>
23 #include <linux/init.h>
24 #include <linux/moduleparam.h>
25 #include <linux/msi.h>
26 #include <linux/of.h>
27 #include <linux/of_address.h>
28 #include <linux/of_iommu.h>
29 #include <linux/of_platform.h>
30 #include <linux/pci.h>
31 #include <linux/platform_device.h>
32
33 #include <linux/amba/bus.h>
34
35 #include "io-pgtable.h"
36
37 /* MMIO registers */
38 #define ARM_SMMU_IDR0                   0x0
39 #define IDR0_ST_LVL                     GENMASK(28, 27)
40 #define IDR0_ST_LVL_2LVL                1
41 #define IDR0_STALL_MODEL                GENMASK(25, 24)
42 #define IDR0_STALL_MODEL_STALL          0
43 #define IDR0_STALL_MODEL_FORCE          2
44 #define IDR0_TTENDIAN                   GENMASK(22, 21)
45 #define IDR0_TTENDIAN_MIXED             0
46 #define IDR0_TTENDIAN_LE                2
47 #define IDR0_TTENDIAN_BE                3
48 #define IDR0_CD2L                       (1 << 19)
49 #define IDR0_VMID16                     (1 << 18)
50 #define IDR0_PRI                        (1 << 16)
51 #define IDR0_SEV                        (1 << 14)
52 #define IDR0_MSI                        (1 << 13)
53 #define IDR0_ASID16                     (1 << 12)
54 #define IDR0_ATS                        (1 << 10)
55 #define IDR0_HYP                        (1 << 9)
56 #define IDR0_COHACC                     (1 << 4)
57 #define IDR0_TTF                        GENMASK(3, 2)
58 #define IDR0_TTF_AARCH64                2
59 #define IDR0_TTF_AARCH32_64             3
60 #define IDR0_S1P                        (1 << 1)
61 #define IDR0_S2P                        (1 << 0)
62
63 #define ARM_SMMU_IDR1                   0x4
64 #define IDR1_TABLES_PRESET              (1 << 30)
65 #define IDR1_QUEUES_PRESET              (1 << 29)
66 #define IDR1_REL                        (1 << 28)
67 #define IDR1_CMDQS                      GENMASK(25, 21)
68 #define IDR1_EVTQS                      GENMASK(20, 16)
69 #define IDR1_PRIQS                      GENMASK(15, 11)
70 #define IDR1_SSIDSIZE                   GENMASK(10, 6)
71 #define IDR1_SIDSIZE                    GENMASK(5, 0)
72
73 #define ARM_SMMU_IDR5                   0x14
74 #define IDR5_STALL_MAX                  GENMASK(31, 16)
75 #define IDR5_GRAN64K                    (1 << 6)
76 #define IDR5_GRAN16K                    (1 << 5)
77 #define IDR5_GRAN4K                     (1 << 4)
78 #define IDR5_OAS                        GENMASK(2, 0)
79 #define IDR5_OAS_32_BIT                 0
80 #define IDR5_OAS_36_BIT                 1
81 #define IDR5_OAS_40_BIT                 2
82 #define IDR5_OAS_42_BIT                 3
83 #define IDR5_OAS_44_BIT                 4
84 #define IDR5_OAS_48_BIT                 5
85 #define IDR5_OAS_52_BIT                 6
86 #define IDR5_VAX                        GENMASK(11, 10)
87 #define IDR5_VAX_52_BIT                 1
88
89 #define ARM_SMMU_CR0                    0x20
90 #define CR0_CMDQEN                      (1 << 3)
91 #define CR0_EVTQEN                      (1 << 2)
92 #define CR0_PRIQEN                      (1 << 1)
93 #define CR0_SMMUEN                      (1 << 0)
94
95 #define ARM_SMMU_CR0ACK                 0x24
96
97 #define ARM_SMMU_CR1                    0x28
98 #define CR1_TABLE_SH                    GENMASK(11, 10)
99 #define CR1_TABLE_OC                    GENMASK(9, 8)
100 #define CR1_TABLE_IC                    GENMASK(7, 6)
101 #define CR1_QUEUE_SH                    GENMASK(5, 4)
102 #define CR1_QUEUE_OC                    GENMASK(3, 2)
103 #define CR1_QUEUE_IC                    GENMASK(1, 0)
104 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
105 #define CR1_CACHE_NC                    0
106 #define CR1_CACHE_WB                    1
107 #define CR1_CACHE_WT                    2
108
109 #define ARM_SMMU_CR2                    0x2c
110 #define CR2_PTM                         (1 << 2)
111 #define CR2_RECINVSID                   (1 << 1)
112 #define CR2_E2H                         (1 << 0)
113
114 #define ARM_SMMU_GBPA                   0x44
115 #define GBPA_UPDATE                     (1 << 31)
116 #define GBPA_ABORT                      (1 << 20)
117
118 #define ARM_SMMU_IRQ_CTRL               0x50
119 #define IRQ_CTRL_EVTQ_IRQEN             (1 << 2)
120 #define IRQ_CTRL_PRIQ_IRQEN             (1 << 1)
121 #define IRQ_CTRL_GERROR_IRQEN           (1 << 0)
122
123 #define ARM_SMMU_IRQ_CTRLACK            0x54
124
125 #define ARM_SMMU_GERROR                 0x60
126 #define GERROR_SFM_ERR                  (1 << 8)
127 #define GERROR_MSI_GERROR_ABT_ERR       (1 << 7)
128 #define GERROR_MSI_PRIQ_ABT_ERR         (1 << 6)
129 #define GERROR_MSI_EVTQ_ABT_ERR         (1 << 5)
130 #define GERROR_MSI_CMDQ_ABT_ERR         (1 << 4)
131 #define GERROR_PRIQ_ABT_ERR             (1 << 3)
132 #define GERROR_EVTQ_ABT_ERR             (1 << 2)
133 #define GERROR_CMDQ_ERR                 (1 << 0)
134 #define GERROR_ERR_MASK                 0xfd
135
136 #define ARM_SMMU_GERRORN                0x64
137
138 #define ARM_SMMU_GERROR_IRQ_CFG0        0x68
139 #define ARM_SMMU_GERROR_IRQ_CFG1        0x70
140 #define ARM_SMMU_GERROR_IRQ_CFG2        0x74
141
142 #define ARM_SMMU_STRTAB_BASE            0x80
143 #define STRTAB_BASE_RA                  (1UL << 62)
144 #define STRTAB_BASE_ADDR_MASK           GENMASK_ULL(51, 6)
145
146 #define ARM_SMMU_STRTAB_BASE_CFG        0x88
147 #define STRTAB_BASE_CFG_FMT             GENMASK(17, 16)
148 #define STRTAB_BASE_CFG_FMT_LINEAR      0
149 #define STRTAB_BASE_CFG_FMT_2LVL        1
150 #define STRTAB_BASE_CFG_SPLIT           GENMASK(10, 6)
151 #define STRTAB_BASE_CFG_LOG2SIZE        GENMASK(5, 0)
152
153 #define ARM_SMMU_CMDQ_BASE              0x90
154 #define ARM_SMMU_CMDQ_PROD              0x98
155 #define ARM_SMMU_CMDQ_CONS              0x9c
156
157 #define ARM_SMMU_EVTQ_BASE              0xa0
158 #define ARM_SMMU_EVTQ_PROD              0x100a8
159 #define ARM_SMMU_EVTQ_CONS              0x100ac
160 #define ARM_SMMU_EVTQ_IRQ_CFG0          0xb0
161 #define ARM_SMMU_EVTQ_IRQ_CFG1          0xb8
162 #define ARM_SMMU_EVTQ_IRQ_CFG2          0xbc
163
164 #define ARM_SMMU_PRIQ_BASE              0xc0
165 #define ARM_SMMU_PRIQ_PROD              0x100c8
166 #define ARM_SMMU_PRIQ_CONS              0x100cc
167 #define ARM_SMMU_PRIQ_IRQ_CFG0          0xd0
168 #define ARM_SMMU_PRIQ_IRQ_CFG1          0xd8
169 #define ARM_SMMU_PRIQ_IRQ_CFG2          0xdc
170
171 /* Common MSI config fields */
172 #define MSI_CFG0_ADDR_MASK              GENMASK_ULL(51, 2)
173 #define MSI_CFG2_SH                     GENMASK(5, 4)
174 #define MSI_CFG2_MEMATTR                GENMASK(3, 0)
175
176 /* Common memory attribute values */
177 #define ARM_SMMU_SH_NSH                 0
178 #define ARM_SMMU_SH_OSH                 2
179 #define ARM_SMMU_SH_ISH                 3
180 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE   0x1
181 #define ARM_SMMU_MEMATTR_OIWB           0xf
182
183 #define Q_IDX(q, p)                     ((p) & ((1 << (q)->max_n_shift) - 1))
184 #define Q_WRP(q, p)                     ((p) & (1 << (q)->max_n_shift))
185 #define Q_OVERFLOW_FLAG                 (1 << 31)
186 #define Q_OVF(q, p)                     ((p) & Q_OVERFLOW_FLAG)
187 #define Q_ENT(q, p)                     ((q)->base +                    \
188                                          Q_IDX(q, p) * (q)->ent_dwords)
189
190 #define Q_BASE_RWA                      (1UL << 62)
191 #define Q_BASE_ADDR_MASK                GENMASK_ULL(51, 5)
192 #define Q_BASE_LOG2SIZE                 GENMASK(4, 0)
193
194 /*
195  * Stream table.
196  *
197  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
198  * 2lvl: 128k L1 entries,
199  *       256 lazy entries per table (each table covers a PCI bus)
200  */
201 #define STRTAB_L1_SZ_SHIFT              20
202 #define STRTAB_SPLIT                    8
203
204 #define STRTAB_L1_DESC_DWORDS           1
205 #define STRTAB_L1_DESC_SPAN             GENMASK_ULL(4, 0)
206 #define STRTAB_L1_DESC_L2PTR_MASK       GENMASK_ULL(51, 6)
207
208 #define STRTAB_STE_DWORDS               8
209 #define STRTAB_STE_0_V                  (1UL << 0)
210 #define STRTAB_STE_0_CFG                GENMASK_ULL(3, 1)
211 #define STRTAB_STE_0_CFG_ABORT          0
212 #define STRTAB_STE_0_CFG_BYPASS         4
213 #define STRTAB_STE_0_CFG_S1_TRANS       5
214 #define STRTAB_STE_0_CFG_S2_TRANS       6
215
216 #define STRTAB_STE_0_S1FMT              GENMASK_ULL(5, 4)
217 #define STRTAB_STE_0_S1FMT_LINEAR       0
218 #define STRTAB_STE_0_S1CTXPTR_MASK      GENMASK_ULL(51, 6)
219 #define STRTAB_STE_0_S1CDMAX            GENMASK_ULL(63, 59)
220
221 #define STRTAB_STE_1_S1C_CACHE_NC       0UL
222 #define STRTAB_STE_1_S1C_CACHE_WBRA     1UL
223 #define STRTAB_STE_1_S1C_CACHE_WT       2UL
224 #define STRTAB_STE_1_S1C_CACHE_WB       3UL
225 #define STRTAB_STE_1_S1CIR              GENMASK_ULL(3, 2)
226 #define STRTAB_STE_1_S1COR              GENMASK_ULL(5, 4)
227 #define STRTAB_STE_1_S1CSH              GENMASK_ULL(7, 6)
228
229 #define STRTAB_STE_1_S1STALLD           (1UL << 27)
230
231 #define STRTAB_STE_1_EATS               GENMASK_ULL(29, 28)
232 #define STRTAB_STE_1_EATS_ABT           0UL
233 #define STRTAB_STE_1_EATS_TRANS         1UL
234 #define STRTAB_STE_1_EATS_S1CHK         2UL
235
236 #define STRTAB_STE_1_STRW               GENMASK_ULL(31, 30)
237 #define STRTAB_STE_1_STRW_NSEL1         0UL
238 #define STRTAB_STE_1_STRW_EL2           2UL
239
240 #define STRTAB_STE_1_SHCFG              GENMASK_ULL(45, 44)
241 #define STRTAB_STE_1_SHCFG_INCOMING     1UL
242
243 #define STRTAB_STE_2_S2VMID             GENMASK_ULL(15, 0)
244 #define STRTAB_STE_2_VTCR               GENMASK_ULL(50, 32)
245 #define STRTAB_STE_2_S2AA64             (1UL << 51)
246 #define STRTAB_STE_2_S2ENDI             (1UL << 52)
247 #define STRTAB_STE_2_S2PTW              (1UL << 54)
248 #define STRTAB_STE_2_S2R                (1UL << 58)
249
250 #define STRTAB_STE_3_S2TTB_MASK         GENMASK_ULL(51, 4)
251
252 /* Context descriptor (stage-1 only) */
253 #define CTXDESC_CD_DWORDS               8
254 #define CTXDESC_CD_0_TCR_T0SZ           GENMASK_ULL(5, 0)
255 #define ARM64_TCR_T0SZ                  GENMASK_ULL(5, 0)
256 #define CTXDESC_CD_0_TCR_TG0            GENMASK_ULL(7, 6)
257 #define ARM64_TCR_TG0                   GENMASK_ULL(15, 14)
258 #define CTXDESC_CD_0_TCR_IRGN0          GENMASK_ULL(9, 8)
259 #define ARM64_TCR_IRGN0                 GENMASK_ULL(9, 8)
260 #define CTXDESC_CD_0_TCR_ORGN0          GENMASK_ULL(11, 10)
261 #define ARM64_TCR_ORGN0                 GENMASK_ULL(11, 10)
262 #define CTXDESC_CD_0_TCR_SH0            GENMASK_ULL(13, 12)
263 #define ARM64_TCR_SH0                   GENMASK_ULL(13, 12)
264 #define CTXDESC_CD_0_TCR_EPD0           (1ULL << 14)
265 #define ARM64_TCR_EPD0                  (1ULL << 7)
266 #define CTXDESC_CD_0_TCR_EPD1           (1ULL << 30)
267 #define ARM64_TCR_EPD1                  (1ULL << 23)
268
269 #define CTXDESC_CD_0_ENDI               (1UL << 15)
270 #define CTXDESC_CD_0_V                  (1UL << 31)
271
272 #define CTXDESC_CD_0_TCR_IPS            GENMASK_ULL(34, 32)
273 #define ARM64_TCR_IPS                   GENMASK_ULL(34, 32)
274 #define CTXDESC_CD_0_TCR_TBI0           (1ULL << 38)
275 #define ARM64_TCR_TBI0                  (1ULL << 37)
276
277 #define CTXDESC_CD_0_AA64               (1UL << 41)
278 #define CTXDESC_CD_0_S                  (1UL << 44)
279 #define CTXDESC_CD_0_R                  (1UL << 45)
280 #define CTXDESC_CD_0_A                  (1UL << 46)
281 #define CTXDESC_CD_0_ASET               (1UL << 47)
282 #define CTXDESC_CD_0_ASID               GENMASK_ULL(63, 48)
283
284 #define CTXDESC_CD_1_TTB0_MASK          GENMASK_ULL(51, 4)
285
286 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
287 #define ARM_SMMU_TCR2CD(tcr, fld)       FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
288                                         FIELD_GET(ARM64_TCR_##fld, tcr))
289
290 /* Command queue */
291 #define CMDQ_ENT_DWORDS                 2
292 #define CMDQ_MAX_SZ_SHIFT               8
293
294 #define CMDQ_CONS_ERR                   GENMASK(30, 24)
295 #define CMDQ_ERR_CERROR_NONE_IDX        0
296 #define CMDQ_ERR_CERROR_ILL_IDX         1
297 #define CMDQ_ERR_CERROR_ABT_IDX         2
298
299 #define CMDQ_0_OP                       GENMASK_ULL(7, 0)
300 #define CMDQ_0_SSV                      (1UL << 11)
301
302 #define CMDQ_PREFETCH_0_SID             GENMASK_ULL(63, 32)
303 #define CMDQ_PREFETCH_1_SIZE            GENMASK_ULL(4, 0)
304 #define CMDQ_PREFETCH_1_ADDR_MASK       GENMASK_ULL(63, 12)
305
306 #define CMDQ_CFGI_0_SID                 GENMASK_ULL(63, 32)
307 #define CMDQ_CFGI_1_LEAF                (1UL << 0)
308 #define CMDQ_CFGI_1_RANGE               GENMASK_ULL(4, 0)
309
310 #define CMDQ_TLBI_0_VMID                GENMASK_ULL(47, 32)
311 #define CMDQ_TLBI_0_ASID                GENMASK_ULL(63, 48)
312 #define CMDQ_TLBI_1_LEAF                (1UL << 0)
313 #define CMDQ_TLBI_1_VA_MASK             GENMASK_ULL(63, 12)
314 #define CMDQ_TLBI_1_IPA_MASK            GENMASK_ULL(51, 12)
315
316 #define CMDQ_PRI_0_SSID                 GENMASK_ULL(31, 12)
317 #define CMDQ_PRI_0_SID                  GENMASK_ULL(63, 32)
318 #define CMDQ_PRI_1_GRPID                GENMASK_ULL(8, 0)
319 #define CMDQ_PRI_1_RESP                 GENMASK_ULL(13, 12)
320
321 #define CMDQ_SYNC_0_CS                  GENMASK_ULL(13, 12)
322 #define CMDQ_SYNC_0_CS_NONE             0
323 #define CMDQ_SYNC_0_CS_IRQ              1
324 #define CMDQ_SYNC_0_CS_SEV              2
325 #define CMDQ_SYNC_0_MSH                 GENMASK_ULL(23, 22)
326 #define CMDQ_SYNC_0_MSIATTR             GENMASK_ULL(27, 24)
327 #define CMDQ_SYNC_0_MSIDATA             GENMASK_ULL(63, 32)
328 #define CMDQ_SYNC_1_MSIADDR_MASK        GENMASK_ULL(51, 2)
329
330 /* Event queue */
331 #define EVTQ_ENT_DWORDS                 4
332 #define EVTQ_MAX_SZ_SHIFT               7
333
334 #define EVTQ_0_ID                       GENMASK_ULL(7, 0)
335
336 /* PRI queue */
337 #define PRIQ_ENT_DWORDS                 2
338 #define PRIQ_MAX_SZ_SHIFT               8
339
340 #define PRIQ_0_SID                      GENMASK_ULL(31, 0)
341 #define PRIQ_0_SSID                     GENMASK_ULL(51, 32)
342 #define PRIQ_0_PERM_PRIV                (1UL << 58)
343 #define PRIQ_0_PERM_EXEC                (1UL << 59)
344 #define PRIQ_0_PERM_READ                (1UL << 60)
345 #define PRIQ_0_PERM_WRITE               (1UL << 61)
346 #define PRIQ_0_PRG_LAST                 (1UL << 62)
347 #define PRIQ_0_SSID_V                   (1UL << 63)
348
349 #define PRIQ_1_PRG_IDX                  GENMASK_ULL(8, 0)
350 #define PRIQ_1_ADDR_MASK                GENMASK_ULL(63, 12)
351
352 /* High-level queue structures */
353 #define ARM_SMMU_POLL_TIMEOUT_US        100
354 #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US   1000000 /* 1s! */
355 #define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT   10
356
357 #define MSI_IOVA_BASE                   0x8000000
358 #define MSI_IOVA_LENGTH                 0x100000
359
360 /*
361  * not really modular, but the easiest way to keep compat with existing
362  * bootargs behaviour is to continue using module_param_named here.
363  */
364 static bool disable_bypass = 1;
365 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
366 MODULE_PARM_DESC(disable_bypass,
367         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
368
369 enum pri_resp {
370         PRI_RESP_DENY = 0,
371         PRI_RESP_FAIL = 1,
372         PRI_RESP_SUCC = 2,
373 };
374
375 enum arm_smmu_msi_index {
376         EVTQ_MSI_INDEX,
377         GERROR_MSI_INDEX,
378         PRIQ_MSI_INDEX,
379         ARM_SMMU_MAX_MSIS,
380 };
381
382 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
383         [EVTQ_MSI_INDEX] = {
384                 ARM_SMMU_EVTQ_IRQ_CFG0,
385                 ARM_SMMU_EVTQ_IRQ_CFG1,
386                 ARM_SMMU_EVTQ_IRQ_CFG2,
387         },
388         [GERROR_MSI_INDEX] = {
389                 ARM_SMMU_GERROR_IRQ_CFG0,
390                 ARM_SMMU_GERROR_IRQ_CFG1,
391                 ARM_SMMU_GERROR_IRQ_CFG2,
392         },
393         [PRIQ_MSI_INDEX] = {
394                 ARM_SMMU_PRIQ_IRQ_CFG0,
395                 ARM_SMMU_PRIQ_IRQ_CFG1,
396                 ARM_SMMU_PRIQ_IRQ_CFG2,
397         },
398 };
399
400 struct arm_smmu_cmdq_ent {
401         /* Common fields */
402         u8                              opcode;
403         bool                            substream_valid;
404
405         /* Command-specific fields */
406         union {
407                 #define CMDQ_OP_PREFETCH_CFG    0x1
408                 struct {
409                         u32                     sid;
410                         u8                      size;
411                         u64                     addr;
412                 } prefetch;
413
414                 #define CMDQ_OP_CFGI_STE        0x3
415                 #define CMDQ_OP_CFGI_ALL        0x4
416                 struct {
417                         u32                     sid;
418                         union {
419                                 bool            leaf;
420                                 u8              span;
421                         };
422                 } cfgi;
423
424                 #define CMDQ_OP_TLBI_NH_ASID    0x11
425                 #define CMDQ_OP_TLBI_NH_VA      0x12
426                 #define CMDQ_OP_TLBI_EL2_ALL    0x20
427                 #define CMDQ_OP_TLBI_S12_VMALL  0x28
428                 #define CMDQ_OP_TLBI_S2_IPA     0x2a
429                 #define CMDQ_OP_TLBI_NSNH_ALL   0x30
430                 struct {
431                         u16                     asid;
432                         u16                     vmid;
433                         bool                    leaf;
434                         u64                     addr;
435                 } tlbi;
436
437                 #define CMDQ_OP_PRI_RESP        0x41
438                 struct {
439                         u32                     sid;
440                         u32                     ssid;
441                         u16                     grpid;
442                         enum pri_resp           resp;
443                 } pri;
444
445                 #define CMDQ_OP_CMD_SYNC        0x46
446                 struct {
447                         u32                     msidata;
448                         u64                     msiaddr;
449                 } sync;
450         };
451 };
452
453 struct arm_smmu_queue {
454         int                             irq; /* Wired interrupt */
455
456         __le64                          *base;
457         dma_addr_t                      base_dma;
458         u64                             q_base;
459
460         size_t                          ent_dwords;
461         u32                             max_n_shift;
462         u32                             prod;
463         u32                             cons;
464
465         u32 __iomem                     *prod_reg;
466         u32 __iomem                     *cons_reg;
467 };
468
469 struct arm_smmu_cmdq {
470         struct arm_smmu_queue           q;
471         spinlock_t                      lock;
472 };
473
474 struct arm_smmu_evtq {
475         struct arm_smmu_queue           q;
476         u32                             max_stalls;
477 };
478
479 struct arm_smmu_priq {
480         struct arm_smmu_queue           q;
481 };
482
483 /* High-level stream table and context descriptor structures */
484 struct arm_smmu_strtab_l1_desc {
485         u8                              span;
486
487         __le64                          *l2ptr;
488         dma_addr_t                      l2ptr_dma;
489 };
490
491 struct arm_smmu_s1_cfg {
492         __le64                          *cdptr;
493         dma_addr_t                      cdptr_dma;
494
495         struct arm_smmu_ctx_desc {
496                 u16     asid;
497                 u64     ttbr;
498                 u64     tcr;
499                 u64     mair;
500         }                               cd;
501 };
502
503 struct arm_smmu_s2_cfg {
504         u16                             vmid;
505         u64                             vttbr;
506         u64                             vtcr;
507 };
508
509 struct arm_smmu_strtab_ent {
510         /*
511          * An STE is "assigned" if the master emitting the corresponding SID
512          * is attached to a domain. The behaviour of an unassigned STE is
513          * determined by the disable_bypass parameter, whereas an assigned
514          * STE behaves according to s1_cfg/s2_cfg, which themselves are
515          * configured according to the domain type.
516          */
517         bool                            assigned;
518         struct arm_smmu_s1_cfg          *s1_cfg;
519         struct arm_smmu_s2_cfg          *s2_cfg;
520 };
521
522 struct arm_smmu_strtab_cfg {
523         __le64                          *strtab;
524         dma_addr_t                      strtab_dma;
525         struct arm_smmu_strtab_l1_desc  *l1_desc;
526         unsigned int                    num_l1_ents;
527
528         u64                             strtab_base;
529         u32                             strtab_base_cfg;
530 };
531
532 /* An SMMUv3 instance */
533 struct arm_smmu_device {
534         struct device                   *dev;
535         void __iomem                    *base;
536
537 #define ARM_SMMU_FEAT_2_LVL_STRTAB      (1 << 0)
538 #define ARM_SMMU_FEAT_2_LVL_CDTAB       (1 << 1)
539 #define ARM_SMMU_FEAT_TT_LE             (1 << 2)
540 #define ARM_SMMU_FEAT_TT_BE             (1 << 3)
541 #define ARM_SMMU_FEAT_PRI               (1 << 4)
542 #define ARM_SMMU_FEAT_ATS               (1 << 5)
543 #define ARM_SMMU_FEAT_SEV               (1 << 6)
544 #define ARM_SMMU_FEAT_MSI               (1 << 7)
545 #define ARM_SMMU_FEAT_COHERENCY         (1 << 8)
546 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 9)
547 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 10)
548 #define ARM_SMMU_FEAT_STALLS            (1 << 11)
549 #define ARM_SMMU_FEAT_HYP               (1 << 12)
550 #define ARM_SMMU_FEAT_STALL_FORCE       (1 << 13)
551 #define ARM_SMMU_FEAT_VAX               (1 << 14)
552         u32                             features;
553
554 #define ARM_SMMU_OPT_SKIP_PREFETCH      (1 << 0)
555 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY    (1 << 1)
556         u32                             options;
557
558         struct arm_smmu_cmdq            cmdq;
559         struct arm_smmu_evtq            evtq;
560         struct arm_smmu_priq            priq;
561
562         int                             gerr_irq;
563         int                             combined_irq;
564         u32                             sync_nr;
565         u8                              prev_cmd_opcode;
566
567         unsigned long                   ias; /* IPA */
568         unsigned long                   oas; /* PA */
569         unsigned long                   pgsize_bitmap;
570
571 #define ARM_SMMU_MAX_ASIDS              (1 << 16)
572         unsigned int                    asid_bits;
573         DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
574
575 #define ARM_SMMU_MAX_VMIDS              (1 << 16)
576         unsigned int                    vmid_bits;
577         DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
578
579         unsigned int                    ssid_bits;
580         unsigned int                    sid_bits;
581
582         struct arm_smmu_strtab_cfg      strtab_cfg;
583
584         /* Hi16xx adds an extra 32 bits of goodness to its MSI payload */
585         union {
586                 u32                     sync_count;
587                 u64                     padding;
588         };
589
590         /* IOMMU core code handle */
591         struct iommu_device             iommu;
592 };
593
594 /* SMMU private data for each master */
595 struct arm_smmu_master_data {
596         struct arm_smmu_device          *smmu;
597         struct arm_smmu_strtab_ent      ste;
598 };
599
600 /* SMMU private data for an IOMMU domain */
601 enum arm_smmu_domain_stage {
602         ARM_SMMU_DOMAIN_S1 = 0,
603         ARM_SMMU_DOMAIN_S2,
604         ARM_SMMU_DOMAIN_NESTED,
605         ARM_SMMU_DOMAIN_BYPASS,
606 };
607
608 struct arm_smmu_domain {
609         struct arm_smmu_device          *smmu;
610         struct mutex                    init_mutex; /* Protects smmu pointer */
611
612         struct io_pgtable_ops           *pgtbl_ops;
613         bool                            non_strict;
614
615         enum arm_smmu_domain_stage      stage;
616         union {
617                 struct arm_smmu_s1_cfg  s1_cfg;
618                 struct arm_smmu_s2_cfg  s2_cfg;
619         };
620
621         struct iommu_domain             domain;
622 };
623
624 struct arm_smmu_option_prop {
625         u32 opt;
626         const char *prop;
627 };
628
629 static struct arm_smmu_option_prop arm_smmu_options[] = {
630         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
631         { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
632         { 0, NULL},
633 };
634
635 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
636                                                  struct arm_smmu_device *smmu)
637 {
638         if ((offset > SZ_64K) &&
639             (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
640                 offset -= SZ_64K;
641
642         return smmu->base + offset;
643 }
644
645 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
646 {
647         return container_of(dom, struct arm_smmu_domain, domain);
648 }
649
650 static void parse_driver_options(struct arm_smmu_device *smmu)
651 {
652         int i = 0;
653
654         do {
655                 if (of_property_read_bool(smmu->dev->of_node,
656                                                 arm_smmu_options[i].prop)) {
657                         smmu->options |= arm_smmu_options[i].opt;
658                         dev_notice(smmu->dev, "option %s\n",
659                                 arm_smmu_options[i].prop);
660                 }
661         } while (arm_smmu_options[++i].opt);
662 }
663
664 /* Low-level queue manipulation functions */
665 static bool queue_full(struct arm_smmu_queue *q)
666 {
667         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
668                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
669 }
670
671 static bool queue_empty(struct arm_smmu_queue *q)
672 {
673         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
674                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
675 }
676
677 static void queue_sync_cons(struct arm_smmu_queue *q)
678 {
679         q->cons = readl_relaxed(q->cons_reg);
680 }
681
682 static void queue_inc_cons(struct arm_smmu_queue *q)
683 {
684         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
685
686         q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
687
688         /*
689          * Ensure that all CPU accesses (reads and writes) to the queue
690          * are complete before we update the cons pointer.
691          */
692         mb();
693         writel_relaxed(q->cons, q->cons_reg);
694 }
695
696 static int queue_sync_prod(struct arm_smmu_queue *q)
697 {
698         int ret = 0;
699         u32 prod = readl_relaxed(q->prod_reg);
700
701         if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
702                 ret = -EOVERFLOW;
703
704         q->prod = prod;
705         return ret;
706 }
707
708 static void queue_inc_prod(struct arm_smmu_queue *q)
709 {
710         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
711
712         q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
713         writel(q->prod, q->prod_reg);
714 }
715
716 /*
717  * Wait for the SMMU to consume items. If sync is true, wait until the queue
718  * is empty. Otherwise, wait until there is at least one free slot.
719  */
720 static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
721 {
722         ktime_t timeout;
723         unsigned int delay = 1, spin_cnt = 0;
724
725         /* Wait longer if it's a CMD_SYNC */
726         timeout = ktime_add_us(ktime_get(), sync ?
727                                             ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
728                                             ARM_SMMU_POLL_TIMEOUT_US);
729
730         while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
731                 if (ktime_compare(ktime_get(), timeout) > 0)
732                         return -ETIMEDOUT;
733
734                 if (wfe) {
735                         wfe();
736                 } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
737                         cpu_relax();
738                         continue;
739                 } else {
740                         udelay(delay);
741                         delay *= 2;
742                         spin_cnt = 0;
743                 }
744         }
745
746         return 0;
747 }
748
749 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
750 {
751         int i;
752
753         for (i = 0; i < n_dwords; ++i)
754                 *dst++ = cpu_to_le64(*src++);
755 }
756
757 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
758 {
759         if (queue_full(q))
760                 return -ENOSPC;
761
762         queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
763         queue_inc_prod(q);
764         return 0;
765 }
766
767 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
768 {
769         int i;
770
771         for (i = 0; i < n_dwords; ++i)
772                 *dst++ = le64_to_cpu(*src++);
773 }
774
775 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
776 {
777         if (queue_empty(q))
778                 return -EAGAIN;
779
780         queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
781         queue_inc_cons(q);
782         return 0;
783 }
784
785 /* High-level queue accessors */
786 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
787 {
788         memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
789         cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
790
791         switch (ent->opcode) {
792         case CMDQ_OP_TLBI_EL2_ALL:
793         case CMDQ_OP_TLBI_NSNH_ALL:
794                 break;
795         case CMDQ_OP_PREFETCH_CFG:
796                 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
797                 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
798                 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
799                 break;
800         case CMDQ_OP_CFGI_STE:
801                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
802                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
803                 break;
804         case CMDQ_OP_CFGI_ALL:
805                 /* Cover the entire SID range */
806                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
807                 break;
808         case CMDQ_OP_TLBI_NH_VA:
809                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
810                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
811                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
812                 break;
813         case CMDQ_OP_TLBI_S2_IPA:
814                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
815                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
816                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
817                 break;
818         case CMDQ_OP_TLBI_NH_ASID:
819                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
820                 /* Fallthrough */
821         case CMDQ_OP_TLBI_S12_VMALL:
822                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
823                 break;
824         case CMDQ_OP_PRI_RESP:
825                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
826                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
827                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
828                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
829                 switch (ent->pri.resp) {
830                 case PRI_RESP_DENY:
831                 case PRI_RESP_FAIL:
832                 case PRI_RESP_SUCC:
833                         break;
834                 default:
835                         return -EINVAL;
836                 }
837                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
838                 break;
839         case CMDQ_OP_CMD_SYNC:
840                 if (ent->sync.msiaddr)
841                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
842                 else
843                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
844                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
845                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
846                 /*
847                  * Commands are written little-endian, but we want the SMMU to
848                  * receive MSIData, and thus write it back to memory, in CPU
849                  * byte order, so big-endian needs an extra byteswap here.
850                  */
851                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA,
852                                      cpu_to_le32(ent->sync.msidata));
853                 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
854                 break;
855         default:
856                 return -ENOENT;
857         }
858
859         return 0;
860 }
861
862 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
863 {
864         static const char *cerror_str[] = {
865                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
866                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
867                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
868         };
869
870         int i;
871         u64 cmd[CMDQ_ENT_DWORDS];
872         struct arm_smmu_queue *q = &smmu->cmdq.q;
873         u32 cons = readl_relaxed(q->cons_reg);
874         u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
875         struct arm_smmu_cmdq_ent cmd_sync = {
876                 .opcode = CMDQ_OP_CMD_SYNC,
877         };
878
879         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
880                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
881
882         switch (idx) {
883         case CMDQ_ERR_CERROR_ABT_IDX:
884                 dev_err(smmu->dev, "retrying command fetch\n");
885         case CMDQ_ERR_CERROR_NONE_IDX:
886                 return;
887         case CMDQ_ERR_CERROR_ILL_IDX:
888                 /* Fallthrough */
889         default:
890                 break;
891         }
892
893         /*
894          * We may have concurrent producers, so we need to be careful
895          * not to touch any of the shadow cmdq state.
896          */
897         queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
898         dev_err(smmu->dev, "skipping command in error state:\n");
899         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
900                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
901
902         /* Convert the erroneous command into a CMD_SYNC */
903         if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
904                 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
905                 return;
906         }
907
908         queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
909 }
910
911 static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
912 {
913         struct arm_smmu_queue *q = &smmu->cmdq.q;
914         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
915
916         smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
917
918         while (queue_insert_raw(q, cmd) == -ENOSPC) {
919                 if (queue_poll_cons(q, false, wfe))
920                         dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
921         }
922 }
923
924 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
925                                     struct arm_smmu_cmdq_ent *ent)
926 {
927         u64 cmd[CMDQ_ENT_DWORDS];
928         unsigned long flags;
929
930         if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
931                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
932                          ent->opcode);
933                 return;
934         }
935
936         spin_lock_irqsave(&smmu->cmdq.lock, flags);
937         arm_smmu_cmdq_insert_cmd(smmu, cmd);
938         spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
939 }
940
941 /*
942  * The difference between val and sync_idx is bounded by the maximum size of
943  * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
944  */
945 static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
946 {
947         ktime_t timeout;
948         u32 val;
949
950         timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
951         val = smp_cond_load_acquire(&smmu->sync_count,
952                                     (int)(VAL - sync_idx) >= 0 ||
953                                     !ktime_before(ktime_get(), timeout));
954
955         return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
956 }
957
958 static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
959 {
960         u64 cmd[CMDQ_ENT_DWORDS];
961         unsigned long flags;
962         struct arm_smmu_cmdq_ent ent = {
963                 .opcode = CMDQ_OP_CMD_SYNC,
964                 .sync   = {
965                         .msiaddr = virt_to_phys(&smmu->sync_count),
966                 },
967         };
968
969         spin_lock_irqsave(&smmu->cmdq.lock, flags);
970
971         /* Piggy-back on the previous command if it's a SYNC */
972         if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
973                 ent.sync.msidata = smmu->sync_nr;
974         } else {
975                 ent.sync.msidata = ++smmu->sync_nr;
976                 arm_smmu_cmdq_build_cmd(cmd, &ent);
977                 arm_smmu_cmdq_insert_cmd(smmu, cmd);
978         }
979
980         spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
981
982         return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
983 }
984
985 static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
986 {
987         u64 cmd[CMDQ_ENT_DWORDS];
988         unsigned long flags;
989         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
990         struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
991         int ret;
992
993         arm_smmu_cmdq_build_cmd(cmd, &ent);
994
995         spin_lock_irqsave(&smmu->cmdq.lock, flags);
996         arm_smmu_cmdq_insert_cmd(smmu, cmd);
997         ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
998         spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
999
1000         return ret;
1001 }
1002
1003 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1004 {
1005         int ret;
1006         bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
1007                    (smmu->features & ARM_SMMU_FEAT_COHERENCY);
1008
1009         ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
1010                   : __arm_smmu_cmdq_issue_sync(smmu);
1011         if (ret)
1012                 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
1013 }
1014
1015 /* Context descriptor manipulation functions */
1016 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
1017 {
1018         u64 val = 0;
1019
1020         /* Repack the TCR. Just care about TTBR0 for now */
1021         val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
1022         val |= ARM_SMMU_TCR2CD(tcr, TG0);
1023         val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
1024         val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
1025         val |= ARM_SMMU_TCR2CD(tcr, SH0);
1026         val |= ARM_SMMU_TCR2CD(tcr, EPD0);
1027         val |= ARM_SMMU_TCR2CD(tcr, EPD1);
1028         val |= ARM_SMMU_TCR2CD(tcr, IPS);
1029         val |= ARM_SMMU_TCR2CD(tcr, TBI0);
1030
1031         return val;
1032 }
1033
1034 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
1035                                     struct arm_smmu_s1_cfg *cfg)
1036 {
1037         u64 val;
1038
1039         /*
1040          * We don't need to issue any invalidation here, as we'll invalidate
1041          * the STE when installing the new entry anyway.
1042          */
1043         val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
1044 #ifdef __BIG_ENDIAN
1045               CTXDESC_CD_0_ENDI |
1046 #endif
1047               CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1048               CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
1049               CTXDESC_CD_0_V;
1050
1051         /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1052         if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1053                 val |= CTXDESC_CD_0_S;
1054
1055         cfg->cdptr[0] = cpu_to_le64(val);
1056
1057         val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
1058         cfg->cdptr[1] = cpu_to_le64(val);
1059
1060         cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
1061 }
1062
1063 /* Stream table manipulation functions */
1064 static void
1065 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1066 {
1067         u64 val = 0;
1068
1069         val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1070         val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1071
1072         *dst = cpu_to_le64(val);
1073 }
1074
1075 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1076 {
1077         struct arm_smmu_cmdq_ent cmd = {
1078                 .opcode = CMDQ_OP_CFGI_STE,
1079                 .cfgi   = {
1080                         .sid    = sid,
1081                         .leaf   = true,
1082                 },
1083         };
1084
1085         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1086         arm_smmu_cmdq_issue_sync(smmu);
1087 }
1088
1089 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1090                                       __le64 *dst, struct arm_smmu_strtab_ent *ste)
1091 {
1092         /*
1093          * This is hideously complicated, but we only really care about
1094          * three cases at the moment:
1095          *
1096          * 1. Invalid (all zero) -> bypass/fault (init)
1097          * 2. Bypass/fault -> translation/bypass (attach)
1098          * 3. Translation/bypass -> bypass/fault (detach)
1099          *
1100          * Given that we can't update the STE atomically and the SMMU
1101          * doesn't read the thing in a defined order, that leaves us
1102          * with the following maintenance requirements:
1103          *
1104          * 1. Update Config, return (init time STEs aren't live)
1105          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1106          * 3. Update Config, sync
1107          */
1108         u64 val = le64_to_cpu(dst[0]);
1109         bool ste_live = false;
1110         struct arm_smmu_cmdq_ent prefetch_cmd = {
1111                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1112                 .prefetch       = {
1113                         .sid    = sid,
1114                 },
1115         };
1116
1117         if (val & STRTAB_STE_0_V) {
1118                 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1119                 case STRTAB_STE_0_CFG_BYPASS:
1120                         break;
1121                 case STRTAB_STE_0_CFG_S1_TRANS:
1122                 case STRTAB_STE_0_CFG_S2_TRANS:
1123                         ste_live = true;
1124                         break;
1125                 case STRTAB_STE_0_CFG_ABORT:
1126                         if (disable_bypass)
1127                                 break;
1128                 default:
1129                         BUG(); /* STE corruption */
1130                 }
1131         }
1132
1133         /* Nuke the existing STE_0 value, as we're going to rewrite it */
1134         val = STRTAB_STE_0_V;
1135
1136         /* Bypass/fault */
1137         if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
1138                 if (!ste->assigned && disable_bypass)
1139                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1140                 else
1141                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1142
1143                 dst[0] = cpu_to_le64(val);
1144                 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1145                                                 STRTAB_STE_1_SHCFG_INCOMING));
1146                 dst[2] = 0; /* Nuke the VMID */
1147                 /*
1148                  * The SMMU can perform negative caching, so we must sync
1149                  * the STE regardless of whether the old value was live.
1150                  */
1151                 if (smmu)
1152                         arm_smmu_sync_ste_for_sid(smmu, sid);
1153                 return;
1154         }
1155
1156         if (ste->s1_cfg) {
1157                 BUG_ON(ste_live);
1158                 dst[1] = cpu_to_le64(
1159                          FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1160                          FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1161                          FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1162 #ifdef CONFIG_PCI_ATS
1163                          FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
1164 #endif
1165                          FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1166
1167                 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1168                    !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1169                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1170
1171                 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1172                         FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
1173         }
1174
1175         if (ste->s2_cfg) {
1176                 BUG_ON(ste_live);
1177                 dst[2] = cpu_to_le64(
1178                          FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
1179                          FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
1180 #ifdef __BIG_ENDIAN
1181                          STRTAB_STE_2_S2ENDI |
1182 #endif
1183                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1184                          STRTAB_STE_2_S2R);
1185
1186                 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1187
1188                 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1189         }
1190
1191         arm_smmu_sync_ste_for_sid(smmu, sid);
1192         dst[0] = cpu_to_le64(val);
1193         arm_smmu_sync_ste_for_sid(smmu, sid);
1194
1195         /* It's likely that we'll want to use the new STE soon */
1196         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1197                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1198 }
1199
1200 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1201 {
1202         unsigned int i;
1203         struct arm_smmu_strtab_ent ste = { .assigned = false };
1204
1205         for (i = 0; i < nent; ++i) {
1206                 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1207                 strtab += STRTAB_STE_DWORDS;
1208         }
1209 }
1210
1211 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1212 {
1213         size_t size;
1214         void *strtab;
1215         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1216         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1217
1218         if (desc->l2ptr)
1219                 return 0;
1220
1221         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1222         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1223
1224         desc->span = STRTAB_SPLIT + 1;
1225         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1226                                           GFP_KERNEL | __GFP_ZERO);
1227         if (!desc->l2ptr) {
1228                 dev_err(smmu->dev,
1229                         "failed to allocate l2 stream table for SID %u\n",
1230                         sid);
1231                 return -ENOMEM;
1232         }
1233
1234         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1235         arm_smmu_write_strtab_l1_desc(strtab, desc);
1236         return 0;
1237 }
1238
1239 /* IRQ and event handlers */
1240 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1241 {
1242         int i;
1243         struct arm_smmu_device *smmu = dev;
1244         struct arm_smmu_queue *q = &smmu->evtq.q;
1245         u64 evt[EVTQ_ENT_DWORDS];
1246
1247         do {
1248                 while (!queue_remove_raw(q, evt)) {
1249                         u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1250
1251                         dev_info(smmu->dev, "event 0x%02x received:\n", id);
1252                         for (i = 0; i < ARRAY_SIZE(evt); ++i)
1253                                 dev_info(smmu->dev, "\t0x%016llx\n",
1254                                          (unsigned long long)evt[i]);
1255
1256                 }
1257
1258                 /*
1259                  * Not much we can do on overflow, so scream and pretend we're
1260                  * trying harder.
1261                  */
1262                 if (queue_sync_prod(q) == -EOVERFLOW)
1263                         dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1264         } while (!queue_empty(q));
1265
1266         /* Sync our overflow flag, as we believe we're up to speed */
1267         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1268         return IRQ_HANDLED;
1269 }
1270
1271 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1272 {
1273         u32 sid, ssid;
1274         u16 grpid;
1275         bool ssv, last;
1276
1277         sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1278         ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1279         ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1280         last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1281         grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1282
1283         dev_info(smmu->dev, "unexpected PRI request received:\n");
1284         dev_info(smmu->dev,
1285                  "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1286                  sid, ssid, grpid, last ? "L" : "",
1287                  evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1288                  evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1289                  evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1290                  evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1291                  evt[1] & PRIQ_1_ADDR_MASK);
1292
1293         if (last) {
1294                 struct arm_smmu_cmdq_ent cmd = {
1295                         .opcode                 = CMDQ_OP_PRI_RESP,
1296                         .substream_valid        = ssv,
1297                         .pri                    = {
1298                                 .sid    = sid,
1299                                 .ssid   = ssid,
1300                                 .grpid  = grpid,
1301                                 .resp   = PRI_RESP_DENY,
1302                         },
1303                 };
1304
1305                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1306         }
1307 }
1308
1309 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1310 {
1311         struct arm_smmu_device *smmu = dev;
1312         struct arm_smmu_queue *q = &smmu->priq.q;
1313         u64 evt[PRIQ_ENT_DWORDS];
1314
1315         do {
1316                 while (!queue_remove_raw(q, evt))
1317                         arm_smmu_handle_ppr(smmu, evt);
1318
1319                 if (queue_sync_prod(q) == -EOVERFLOW)
1320                         dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1321         } while (!queue_empty(q));
1322
1323         /* Sync our overflow flag, as we believe we're up to speed */
1324         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1325         writel(q->cons, q->cons_reg);
1326         return IRQ_HANDLED;
1327 }
1328
1329 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1330
1331 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1332 {
1333         u32 gerror, gerrorn, active;
1334         struct arm_smmu_device *smmu = dev;
1335
1336         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1337         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1338
1339         active = gerror ^ gerrorn;
1340         if (!(active & GERROR_ERR_MASK))
1341                 return IRQ_NONE; /* No errors pending */
1342
1343         dev_warn(smmu->dev,
1344                  "unexpected global error reported (0x%08x), this could be serious\n",
1345                  active);
1346
1347         if (active & GERROR_SFM_ERR) {
1348                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1349                 arm_smmu_device_disable(smmu);
1350         }
1351
1352         if (active & GERROR_MSI_GERROR_ABT_ERR)
1353                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1354
1355         if (active & GERROR_MSI_PRIQ_ABT_ERR)
1356                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1357
1358         if (active & GERROR_MSI_EVTQ_ABT_ERR)
1359                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1360
1361         if (active & GERROR_MSI_CMDQ_ABT_ERR)
1362                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1363
1364         if (active & GERROR_PRIQ_ABT_ERR)
1365                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1366
1367         if (active & GERROR_EVTQ_ABT_ERR)
1368                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1369
1370         if (active & GERROR_CMDQ_ERR)
1371                 arm_smmu_cmdq_skip_err(smmu);
1372
1373         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1374         return IRQ_HANDLED;
1375 }
1376
1377 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1378 {
1379         struct arm_smmu_device *smmu = dev;
1380
1381         arm_smmu_evtq_thread(irq, dev);
1382         if (smmu->features & ARM_SMMU_FEAT_PRI)
1383                 arm_smmu_priq_thread(irq, dev);
1384
1385         return IRQ_HANDLED;
1386 }
1387
1388 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1389 {
1390         arm_smmu_gerror_handler(irq, dev);
1391         return IRQ_WAKE_THREAD;
1392 }
1393
1394 /* IO_PGTABLE API */
1395 static void arm_smmu_tlb_sync(void *cookie)
1396 {
1397         struct arm_smmu_domain *smmu_domain = cookie;
1398
1399         arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
1400 }
1401
1402 static void arm_smmu_tlb_inv_context(void *cookie)
1403 {
1404         struct arm_smmu_domain *smmu_domain = cookie;
1405         struct arm_smmu_device *smmu = smmu_domain->smmu;
1406         struct arm_smmu_cmdq_ent cmd;
1407
1408         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1409                 cmd.opcode      = CMDQ_OP_TLBI_NH_ASID;
1410                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1411                 cmd.tlbi.vmid   = 0;
1412         } else {
1413                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1414                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1415         }
1416
1417         /*
1418          * NOTE: when io-pgtable is in non-strict mode, we may get here with
1419          * PTEs previously cleared by unmaps on the current CPU not yet visible
1420          * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
1421          * to guarantee those are observed before the TLBI. Do be careful, 007.
1422          */
1423         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1424         arm_smmu_cmdq_issue_sync(smmu);
1425 }
1426
1427 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1428                                           size_t granule, bool leaf, void *cookie)
1429 {
1430         struct arm_smmu_domain *smmu_domain = cookie;
1431         struct arm_smmu_device *smmu = smmu_domain->smmu;
1432         struct arm_smmu_cmdq_ent cmd = {
1433                 .tlbi = {
1434                         .leaf   = leaf,
1435                         .addr   = iova,
1436                 },
1437         };
1438
1439         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1440                 cmd.opcode      = CMDQ_OP_TLBI_NH_VA;
1441                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1442         } else {
1443                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1444                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1445         }
1446
1447         do {
1448                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1449                 cmd.tlbi.addr += granule;
1450         } while (size -= granule);
1451 }
1452
1453 static const struct iommu_gather_ops arm_smmu_gather_ops = {
1454         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1455         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
1456         .tlb_sync       = arm_smmu_tlb_sync,
1457 };
1458
1459 /* IOMMU API */
1460 static bool arm_smmu_capable(enum iommu_cap cap)
1461 {
1462         switch (cap) {
1463         case IOMMU_CAP_CACHE_COHERENCY:
1464                 return true;
1465         case IOMMU_CAP_NOEXEC:
1466                 return true;
1467         default:
1468                 return false;
1469         }
1470 }
1471
1472 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1473 {
1474         struct arm_smmu_domain *smmu_domain;
1475
1476         if (type != IOMMU_DOMAIN_UNMANAGED &&
1477             type != IOMMU_DOMAIN_DMA &&
1478             type != IOMMU_DOMAIN_IDENTITY)
1479                 return NULL;
1480
1481         /*
1482          * Allocate the domain and initialise some of its data structures.
1483          * We can't really do anything meaningful until we've added a
1484          * master.
1485          */
1486         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1487         if (!smmu_domain)
1488                 return NULL;
1489
1490         if (type == IOMMU_DOMAIN_DMA &&
1491             iommu_get_dma_cookie(&smmu_domain->domain)) {
1492                 kfree(smmu_domain);
1493                 return NULL;
1494         }
1495
1496         mutex_init(&smmu_domain->init_mutex);
1497         return &smmu_domain->domain;
1498 }
1499
1500 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1501 {
1502         int idx, size = 1 << span;
1503
1504         do {
1505                 idx = find_first_zero_bit(map, size);
1506                 if (idx == size)
1507                         return -ENOSPC;
1508         } while (test_and_set_bit(idx, map));
1509
1510         return idx;
1511 }
1512
1513 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1514 {
1515         clear_bit(idx, map);
1516 }
1517
1518 static void arm_smmu_domain_free(struct iommu_domain *domain)
1519 {
1520         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1521         struct arm_smmu_device *smmu = smmu_domain->smmu;
1522
1523         iommu_put_dma_cookie(domain);
1524         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1525
1526         /* Free the CD and ASID, if we allocated them */
1527         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1528                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1529
1530                 if (cfg->cdptr) {
1531                         dmam_free_coherent(smmu_domain->smmu->dev,
1532                                            CTXDESC_CD_DWORDS << 3,
1533                                            cfg->cdptr,
1534                                            cfg->cdptr_dma);
1535
1536                         arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1537                 }
1538         } else {
1539                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1540                 if (cfg->vmid)
1541                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1542         }
1543
1544         kfree(smmu_domain);
1545 }
1546
1547 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1548                                        struct io_pgtable_cfg *pgtbl_cfg)
1549 {
1550         int ret;
1551         int asid;
1552         struct arm_smmu_device *smmu = smmu_domain->smmu;
1553         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1554
1555         asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1556         if (asid < 0)
1557                 return asid;
1558
1559         cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1560                                          &cfg->cdptr_dma,
1561                                          GFP_KERNEL | __GFP_ZERO);
1562         if (!cfg->cdptr) {
1563                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1564                 ret = -ENOMEM;
1565                 goto out_free_asid;
1566         }
1567
1568         cfg->cd.asid    = (u16)asid;
1569         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1570         cfg->cd.tcr     = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1571         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1572         return 0;
1573
1574 out_free_asid:
1575         arm_smmu_bitmap_free(smmu->asid_map, asid);
1576         return ret;
1577 }
1578
1579 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1580                                        struct io_pgtable_cfg *pgtbl_cfg)
1581 {
1582         int vmid;
1583         struct arm_smmu_device *smmu = smmu_domain->smmu;
1584         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1585
1586         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1587         if (vmid < 0)
1588                 return vmid;
1589
1590         cfg->vmid       = (u16)vmid;
1591         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1592         cfg->vtcr       = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1593         return 0;
1594 }
1595
1596 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1597 {
1598         int ret;
1599         unsigned long ias, oas;
1600         enum io_pgtable_fmt fmt;
1601         struct io_pgtable_cfg pgtbl_cfg;
1602         struct io_pgtable_ops *pgtbl_ops;
1603         int (*finalise_stage_fn)(struct arm_smmu_domain *,
1604                                  struct io_pgtable_cfg *);
1605         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1606         struct arm_smmu_device *smmu = smmu_domain->smmu;
1607
1608         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1609                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1610                 return 0;
1611         }
1612
1613         /* Restrict the stage to what we can actually support */
1614         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1615                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1616         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1617                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1618
1619         switch (smmu_domain->stage) {
1620         case ARM_SMMU_DOMAIN_S1:
1621                 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1622                 ias = min_t(unsigned long, ias, VA_BITS);
1623                 oas = smmu->ias;
1624                 fmt = ARM_64_LPAE_S1;
1625                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1626                 break;
1627         case ARM_SMMU_DOMAIN_NESTED:
1628         case ARM_SMMU_DOMAIN_S2:
1629                 ias = smmu->ias;
1630                 oas = smmu->oas;
1631                 fmt = ARM_64_LPAE_S2;
1632                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1633                 break;
1634         default:
1635                 return -EINVAL;
1636         }
1637
1638         pgtbl_cfg = (struct io_pgtable_cfg) {
1639                 .pgsize_bitmap  = smmu->pgsize_bitmap,
1640                 .ias            = ias,
1641                 .oas            = oas,
1642                 .tlb            = &arm_smmu_gather_ops,
1643                 .iommu_dev      = smmu->dev,
1644         };
1645
1646         if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
1647                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
1648
1649         if (smmu_domain->non_strict)
1650                 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1651
1652         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1653         if (!pgtbl_ops)
1654                 return -ENOMEM;
1655
1656         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1657         domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1658         domain->geometry.force_aperture = true;
1659
1660         ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1661         if (ret < 0) {
1662                 free_io_pgtable_ops(pgtbl_ops);
1663                 return ret;
1664         }
1665
1666         smmu_domain->pgtbl_ops = pgtbl_ops;
1667         return 0;
1668 }
1669
1670 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1671 {
1672         __le64 *step;
1673         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1674
1675         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1676                 struct arm_smmu_strtab_l1_desc *l1_desc;
1677                 int idx;
1678
1679                 /* Two-level walk */
1680                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1681                 l1_desc = &cfg->l1_desc[idx];
1682                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1683                 step = &l1_desc->l2ptr[idx];
1684         } else {
1685                 /* Simple linear lookup */
1686                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1687         }
1688
1689         return step;
1690 }
1691
1692 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1693 {
1694         int i, j;
1695         struct arm_smmu_master_data *master = fwspec->iommu_priv;
1696         struct arm_smmu_device *smmu = master->smmu;
1697
1698         for (i = 0; i < fwspec->num_ids; ++i) {
1699                 u32 sid = fwspec->ids[i];
1700                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1701
1702                 /* Bridged PCI devices may end up with duplicated IDs */
1703                 for (j = 0; j < i; j++)
1704                         if (fwspec->ids[j] == sid)
1705                                 break;
1706                 if (j < i)
1707                         continue;
1708
1709                 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1710         }
1711 }
1712
1713 static void arm_smmu_detach_dev(struct device *dev)
1714 {
1715         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1716         struct arm_smmu_master_data *master = fwspec->iommu_priv;
1717
1718         master->ste.assigned = false;
1719         arm_smmu_install_ste_for_dev(fwspec);
1720 }
1721
1722 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1723 {
1724         int ret = 0;
1725         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1726         struct arm_smmu_device *smmu;
1727         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1728         struct arm_smmu_master_data *master;
1729         struct arm_smmu_strtab_ent *ste;
1730
1731         if (!fwspec)
1732                 return -ENOENT;
1733
1734         master = fwspec->iommu_priv;
1735         smmu = master->smmu;
1736         ste = &master->ste;
1737
1738         /* Already attached to a different domain? */
1739         if (ste->assigned)
1740                 arm_smmu_detach_dev(dev);
1741
1742         mutex_lock(&smmu_domain->init_mutex);
1743
1744         if (!smmu_domain->smmu) {
1745                 smmu_domain->smmu = smmu;
1746                 ret = arm_smmu_domain_finalise(domain);
1747                 if (ret) {
1748                         smmu_domain->smmu = NULL;
1749                         goto out_unlock;
1750                 }
1751         } else if (smmu_domain->smmu != smmu) {
1752                 dev_err(dev,
1753                         "cannot attach to SMMU %s (upstream of %s)\n",
1754                         dev_name(smmu_domain->smmu->dev),
1755                         dev_name(smmu->dev));
1756                 ret = -ENXIO;
1757                 goto out_unlock;
1758         }
1759
1760         ste->assigned = true;
1761
1762         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
1763                 ste->s1_cfg = NULL;
1764                 ste->s2_cfg = NULL;
1765         } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1766                 ste->s1_cfg = &smmu_domain->s1_cfg;
1767                 ste->s2_cfg = NULL;
1768                 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1769         } else {
1770                 ste->s1_cfg = NULL;
1771                 ste->s2_cfg = &smmu_domain->s2_cfg;
1772         }
1773
1774         arm_smmu_install_ste_for_dev(fwspec);
1775 out_unlock:
1776         mutex_unlock(&smmu_domain->init_mutex);
1777         return ret;
1778 }
1779
1780 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1781                         phys_addr_t paddr, size_t size, int prot)
1782 {
1783         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1784
1785         if (!ops)
1786                 return -ENODEV;
1787
1788         return ops->map(ops, iova, paddr, size, prot);
1789 }
1790
1791 static size_t
1792 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1793 {
1794         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1795
1796         if (!ops)
1797                 return 0;
1798
1799         return ops->unmap(ops, iova, size);
1800 }
1801
1802 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1803 {
1804         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1805
1806         if (smmu_domain->smmu)
1807                 arm_smmu_tlb_inv_context(smmu_domain);
1808 }
1809
1810 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1811 {
1812         struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1813
1814         if (smmu)
1815                 arm_smmu_cmdq_issue_sync(smmu);
1816 }
1817
1818 static phys_addr_t
1819 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1820 {
1821         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1822
1823         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1824                 return iova;
1825
1826         if (!ops)
1827                 return 0;
1828
1829         return ops->iova_to_phys(ops, iova);
1830 }
1831
1832 static struct platform_driver arm_smmu_driver;
1833
1834 static int arm_smmu_match_node(struct device *dev, void *data)
1835 {
1836         return dev->fwnode == data;
1837 }
1838
1839 static
1840 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1841 {
1842         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1843                                                 fwnode, arm_smmu_match_node);
1844         put_device(dev);
1845         return dev ? dev_get_drvdata(dev) : NULL;
1846 }
1847
1848 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1849 {
1850         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1851
1852         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1853                 limit *= 1UL << STRTAB_SPLIT;
1854
1855         return sid < limit;
1856 }
1857
1858 static struct iommu_ops arm_smmu_ops;
1859
1860 static int arm_smmu_add_device(struct device *dev)
1861 {
1862         int i, ret;
1863         struct arm_smmu_device *smmu;
1864         struct arm_smmu_master_data *master;
1865         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1866         struct iommu_group *group;
1867
1868         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1869                 return -ENODEV;
1870         /*
1871          * We _can_ actually withstand dodgy bus code re-calling add_device()
1872          * without an intervening remove_device()/of_xlate() sequence, but
1873          * we're not going to do so quietly...
1874          */
1875         if (WARN_ON_ONCE(fwspec->iommu_priv)) {
1876                 master = fwspec->iommu_priv;
1877                 smmu = master->smmu;
1878         } else {
1879                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1880                 if (!smmu)
1881                         return -ENODEV;
1882                 master = kzalloc(sizeof(*master), GFP_KERNEL);
1883                 if (!master)
1884                         return -ENOMEM;
1885
1886                 master->smmu = smmu;
1887                 fwspec->iommu_priv = master;
1888         }
1889
1890         /* Check the SIDs are in range of the SMMU and our stream table */
1891         for (i = 0; i < fwspec->num_ids; i++) {
1892                 u32 sid = fwspec->ids[i];
1893
1894                 if (!arm_smmu_sid_in_range(smmu, sid))
1895                         return -ERANGE;
1896
1897                 /* Ensure l2 strtab is initialised */
1898                 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1899                         ret = arm_smmu_init_l2_strtab(smmu, sid);
1900                         if (ret)
1901                                 return ret;
1902                 }
1903         }
1904
1905         group = iommu_group_get_for_dev(dev);
1906         if (!IS_ERR(group)) {
1907                 iommu_group_put(group);
1908                 iommu_device_link(&smmu->iommu, dev);
1909         }
1910
1911         return PTR_ERR_OR_ZERO(group);
1912 }
1913
1914 static void arm_smmu_remove_device(struct device *dev)
1915 {
1916         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1917         struct arm_smmu_master_data *master;
1918         struct arm_smmu_device *smmu;
1919
1920         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1921                 return;
1922
1923         master = fwspec->iommu_priv;
1924         smmu = master->smmu;
1925         if (master && master->ste.assigned)
1926                 arm_smmu_detach_dev(dev);
1927         iommu_group_remove_device(dev);
1928         iommu_device_unlink(&smmu->iommu, dev);
1929         kfree(master);
1930         iommu_fwspec_free(dev);
1931 }
1932
1933 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1934 {
1935         struct iommu_group *group;
1936
1937         /*
1938          * We don't support devices sharing stream IDs other than PCI RID
1939          * aliases, since the necessary ID-to-device lookup becomes rather
1940          * impractical given a potential sparse 32-bit stream ID space.
1941          */
1942         if (dev_is_pci(dev))
1943                 group = pci_device_group(dev);
1944         else
1945                 group = generic_device_group(dev);
1946
1947         return group;
1948 }
1949
1950 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1951                                     enum iommu_attr attr, void *data)
1952 {
1953         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1954
1955         switch (domain->type) {
1956         case IOMMU_DOMAIN_UNMANAGED:
1957                 switch (attr) {
1958                 case DOMAIN_ATTR_NESTING:
1959                         *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1960                         return 0;
1961                 default:
1962                         return -ENODEV;
1963                 }
1964                 break;
1965         case IOMMU_DOMAIN_DMA:
1966                 switch (attr) {
1967                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1968                         *(int *)data = smmu_domain->non_strict;
1969                         return 0;
1970                 default:
1971                         return -ENODEV;
1972                 }
1973                 break;
1974         default:
1975                 return -EINVAL;
1976         }
1977 }
1978
1979 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1980                                     enum iommu_attr attr, void *data)
1981 {
1982         int ret = 0;
1983         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1984
1985         mutex_lock(&smmu_domain->init_mutex);
1986
1987         switch (domain->type) {
1988         case IOMMU_DOMAIN_UNMANAGED:
1989                 switch (attr) {
1990                 case DOMAIN_ATTR_NESTING:
1991                         if (smmu_domain->smmu) {
1992                                 ret = -EPERM;
1993                                 goto out_unlock;
1994                         }
1995
1996                         if (*(int *)data)
1997                                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1998                         else
1999                                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2000                         break;
2001                 default:
2002                         ret = -ENODEV;
2003                 }
2004                 break;
2005         case IOMMU_DOMAIN_DMA:
2006                 switch(attr) {
2007                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2008                         smmu_domain->non_strict = *(int *)data;
2009                         break;
2010                 default:
2011                         ret = -ENODEV;
2012                 }
2013                 break;
2014         default:
2015                 ret = -EINVAL;
2016         }
2017
2018 out_unlock:
2019         mutex_unlock(&smmu_domain->init_mutex);
2020         return ret;
2021 }
2022
2023 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2024 {
2025         return iommu_fwspec_add_ids(dev, args->args, 1);
2026 }
2027
2028 static void arm_smmu_get_resv_regions(struct device *dev,
2029                                       struct list_head *head)
2030 {
2031         struct iommu_resv_region *region;
2032         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2033
2034         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2035                                          prot, IOMMU_RESV_SW_MSI);
2036         if (!region)
2037                 return;
2038
2039         list_add_tail(&region->list, head);
2040
2041         iommu_dma_get_resv_regions(dev, head);
2042 }
2043
2044 static void arm_smmu_put_resv_regions(struct device *dev,
2045                                       struct list_head *head)
2046 {
2047         struct iommu_resv_region *entry, *next;
2048
2049         list_for_each_entry_safe(entry, next, head, list)
2050                 kfree(entry);
2051 }
2052
2053 static struct iommu_ops arm_smmu_ops = {
2054         .capable                = arm_smmu_capable,
2055         .domain_alloc           = arm_smmu_domain_alloc,
2056         .domain_free            = arm_smmu_domain_free,
2057         .attach_dev             = arm_smmu_attach_dev,
2058         .map                    = arm_smmu_map,
2059         .unmap                  = arm_smmu_unmap,
2060         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
2061         .iotlb_sync             = arm_smmu_iotlb_sync,
2062         .iova_to_phys           = arm_smmu_iova_to_phys,
2063         .add_device             = arm_smmu_add_device,
2064         .remove_device          = arm_smmu_remove_device,
2065         .device_group           = arm_smmu_device_group,
2066         .domain_get_attr        = arm_smmu_domain_get_attr,
2067         .domain_set_attr        = arm_smmu_domain_set_attr,
2068         .of_xlate               = arm_smmu_of_xlate,
2069         .get_resv_regions       = arm_smmu_get_resv_regions,
2070         .put_resv_regions       = arm_smmu_put_resv_regions,
2071         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
2072 };
2073
2074 /* Probing and initialisation functions */
2075 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2076                                    struct arm_smmu_queue *q,
2077                                    unsigned long prod_off,
2078                                    unsigned long cons_off,
2079                                    size_t dwords)
2080 {
2081         size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
2082
2083         q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
2084         if (!q->base) {
2085                 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
2086                         qsz);
2087                 return -ENOMEM;
2088         }
2089
2090         q->prod_reg     = arm_smmu_page1_fixup(prod_off, smmu);
2091         q->cons_reg     = arm_smmu_page1_fixup(cons_off, smmu);
2092         q->ent_dwords   = dwords;
2093
2094         q->q_base  = Q_BASE_RWA;
2095         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2096         q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
2097
2098         q->prod = q->cons = 0;
2099         return 0;
2100 }
2101
2102 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2103 {
2104         int ret;
2105
2106         /* cmdq */
2107         spin_lock_init(&smmu->cmdq.lock);
2108         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2109                                       ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
2110         if (ret)
2111                 return ret;
2112
2113         /* evtq */
2114         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2115                                       ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2116         if (ret)
2117                 return ret;
2118
2119         /* priq */
2120         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2121                 return 0;
2122
2123         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2124                                        ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2125 }
2126
2127 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2128 {
2129         unsigned int i;
2130         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2131         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2132         void *strtab = smmu->strtab_cfg.strtab;
2133
2134         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2135         if (!cfg->l1_desc) {
2136                 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2137                 return -ENOMEM;
2138         }
2139
2140         for (i = 0; i < cfg->num_l1_ents; ++i) {
2141                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2142                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2143         }
2144
2145         return 0;
2146 }
2147
2148 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2149 {
2150         void *strtab;
2151         u64 reg;
2152         u32 size, l1size;
2153         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2154
2155         /* Calculate the L1 size, capped to the SIDSIZE. */
2156         size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2157         size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2158         cfg->num_l1_ents = 1 << size;
2159
2160         size += STRTAB_SPLIT;
2161         if (size < smmu->sid_bits)
2162                 dev_warn(smmu->dev,
2163                          "2-level strtab only covers %u/%u bits of SID\n",
2164                          size, smmu->sid_bits);
2165
2166         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2167         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2168                                      GFP_KERNEL | __GFP_ZERO);
2169         if (!strtab) {
2170                 dev_err(smmu->dev,
2171                         "failed to allocate l1 stream table (%u bytes)\n",
2172                         size);
2173                 return -ENOMEM;
2174         }
2175         cfg->strtab = strtab;
2176
2177         /* Configure strtab_base_cfg for 2 levels */
2178         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2179         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2180         reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2181         cfg->strtab_base_cfg = reg;
2182
2183         return arm_smmu_init_l1_strtab(smmu);
2184 }
2185
2186 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2187 {
2188         void *strtab;
2189         u64 reg;
2190         u32 size;
2191         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2192
2193         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2194         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2195                                      GFP_KERNEL | __GFP_ZERO);
2196         if (!strtab) {
2197                 dev_err(smmu->dev,
2198                         "failed to allocate linear stream table (%u bytes)\n",
2199                         size);
2200                 return -ENOMEM;
2201         }
2202         cfg->strtab = strtab;
2203         cfg->num_l1_ents = 1 << smmu->sid_bits;
2204
2205         /* Configure strtab_base_cfg for a linear table covering all SIDs */
2206         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2207         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2208         cfg->strtab_base_cfg = reg;
2209
2210         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2211         return 0;
2212 }
2213
2214 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2215 {
2216         u64 reg;
2217         int ret;
2218
2219         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2220                 ret = arm_smmu_init_strtab_2lvl(smmu);
2221         else
2222                 ret = arm_smmu_init_strtab_linear(smmu);
2223
2224         if (ret)
2225                 return ret;
2226
2227         /* Set the strtab base address */
2228         reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2229         reg |= STRTAB_BASE_RA;
2230         smmu->strtab_cfg.strtab_base = reg;
2231
2232         /* Allocate the first VMID for stage-2 bypass STEs */
2233         set_bit(0, smmu->vmid_map);
2234         return 0;
2235 }
2236
2237 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2238 {
2239         int ret;
2240
2241         ret = arm_smmu_init_queues(smmu);
2242         if (ret)
2243                 return ret;
2244
2245         return arm_smmu_init_strtab(smmu);
2246 }
2247
2248 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2249                                    unsigned int reg_off, unsigned int ack_off)
2250 {
2251         u32 reg;
2252
2253         writel_relaxed(val, smmu->base + reg_off);
2254         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2255                                           1, ARM_SMMU_POLL_TIMEOUT_US);
2256 }
2257
2258 /* GBPA is "special" */
2259 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2260 {
2261         int ret;
2262         u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2263
2264         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2265                                          1, ARM_SMMU_POLL_TIMEOUT_US);
2266         if (ret)
2267                 return ret;
2268
2269         reg &= ~clr;
2270         reg |= set;
2271         writel_relaxed(reg | GBPA_UPDATE, gbpa);
2272         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2273                                          1, ARM_SMMU_POLL_TIMEOUT_US);
2274
2275         if (ret)
2276                 dev_err(smmu->dev, "GBPA not responding to update\n");
2277         return ret;
2278 }
2279
2280 static void arm_smmu_free_msis(void *data)
2281 {
2282         struct device *dev = data;
2283         platform_msi_domain_free_irqs(dev);
2284 }
2285
2286 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2287 {
2288         phys_addr_t doorbell;
2289         struct device *dev = msi_desc_to_dev(desc);
2290         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2291         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2292
2293         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2294         doorbell &= MSI_CFG0_ADDR_MASK;
2295
2296         writeq_relaxed(doorbell, smmu->base + cfg[0]);
2297         writel_relaxed(msg->data, smmu->base + cfg[1]);
2298         writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2299 }
2300
2301 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2302 {
2303         struct msi_desc *desc;
2304         int ret, nvec = ARM_SMMU_MAX_MSIS;
2305         struct device *dev = smmu->dev;
2306
2307         /* Clear the MSI address regs */
2308         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2309         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2310
2311         if (smmu->features & ARM_SMMU_FEAT_PRI)
2312                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2313         else
2314                 nvec--;
2315
2316         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2317                 return;
2318
2319         if (!dev->msi_domain) {
2320                 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2321                 return;
2322         }
2323
2324         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2325         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2326         if (ret) {
2327                 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2328                 return;
2329         }
2330
2331         for_each_msi_entry(desc, dev) {
2332                 switch (desc->platform.msi_index) {
2333                 case EVTQ_MSI_INDEX:
2334                         smmu->evtq.q.irq = desc->irq;
2335                         break;
2336                 case GERROR_MSI_INDEX:
2337                         smmu->gerr_irq = desc->irq;
2338                         break;
2339                 case PRIQ_MSI_INDEX:
2340                         smmu->priq.q.irq = desc->irq;
2341                         break;
2342                 default:        /* Unknown */
2343                         continue;
2344                 }
2345         }
2346
2347         /* Add callback to free MSIs on teardown */
2348         devm_add_action(dev, arm_smmu_free_msis, dev);
2349 }
2350
2351 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2352 {
2353         int irq, ret;
2354
2355         arm_smmu_setup_msis(smmu);
2356
2357         /* Request interrupt lines */
2358         irq = smmu->evtq.q.irq;
2359         if (irq) {
2360                 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2361                                                 arm_smmu_evtq_thread,
2362                                                 IRQF_ONESHOT,
2363                                                 "arm-smmu-v3-evtq", smmu);
2364                 if (ret < 0)
2365                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
2366         } else {
2367                 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2368         }
2369
2370         irq = smmu->gerr_irq;
2371         if (irq) {
2372                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2373                                        0, "arm-smmu-v3-gerror", smmu);
2374                 if (ret < 0)
2375                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
2376         } else {
2377                 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2378         }
2379
2380         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2381                 irq = smmu->priq.q.irq;
2382                 if (irq) {
2383                         ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2384                                                         arm_smmu_priq_thread,
2385                                                         IRQF_ONESHOT,
2386                                                         "arm-smmu-v3-priq",
2387                                                         smmu);
2388                         if (ret < 0)
2389                                 dev_warn(smmu->dev,
2390                                          "failed to enable priq irq\n");
2391                 } else {
2392                         dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2393                 }
2394         }
2395 }
2396
2397 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2398 {
2399         int ret, irq;
2400         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2401
2402         /* Disable IRQs first */
2403         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2404                                       ARM_SMMU_IRQ_CTRLACK);
2405         if (ret) {
2406                 dev_err(smmu->dev, "failed to disable irqs\n");
2407                 return ret;
2408         }
2409
2410         irq = smmu->combined_irq;
2411         if (irq) {
2412                 /*
2413                  * Cavium ThunderX2 implementation doesn't support unique irq
2414                  * lines. Use a single irq line for all the SMMUv3 interrupts.
2415                  */
2416                 ret = devm_request_threaded_irq(smmu->dev, irq,
2417                                         arm_smmu_combined_irq_handler,
2418                                         arm_smmu_combined_irq_thread,
2419                                         IRQF_ONESHOT,
2420                                         "arm-smmu-v3-combined-irq", smmu);
2421                 if (ret < 0)
2422                         dev_warn(smmu->dev, "failed to enable combined irq\n");
2423         } else
2424                 arm_smmu_setup_unique_irqs(smmu);
2425
2426         if (smmu->features & ARM_SMMU_FEAT_PRI)
2427                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2428
2429         /* Enable interrupt generation on the SMMU */
2430         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2431                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2432         if (ret)
2433                 dev_warn(smmu->dev, "failed to enable irqs\n");
2434
2435         return 0;
2436 }
2437
2438 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2439 {
2440         int ret;
2441
2442         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2443         if (ret)
2444                 dev_err(smmu->dev, "failed to clear cr0\n");
2445
2446         return ret;
2447 }
2448
2449 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2450 {
2451         int ret;
2452         u32 reg, enables;
2453         struct arm_smmu_cmdq_ent cmd;
2454
2455         /* Clear CR0 and sync (disables SMMU and queue processing) */
2456         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2457         if (reg & CR0_SMMUEN) {
2458                 if (is_kdump_kernel()) {
2459                         arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
2460                         arm_smmu_device_disable(smmu);
2461                         return -EBUSY;
2462                 }
2463
2464                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2465         }
2466
2467         ret = arm_smmu_device_disable(smmu);
2468         if (ret)
2469                 return ret;
2470
2471         /* CR1 (table and queue memory attributes) */
2472         reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
2473               FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
2474               FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
2475               FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
2476               FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
2477               FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
2478         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2479
2480         /* CR2 (random crap) */
2481         reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2482         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2483
2484         /* Stream table */
2485         writeq_relaxed(smmu->strtab_cfg.strtab_base,
2486                        smmu->base + ARM_SMMU_STRTAB_BASE);
2487         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2488                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2489
2490         /* Command queue */
2491         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2492         writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2493         writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2494
2495         enables = CR0_CMDQEN;
2496         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2497                                       ARM_SMMU_CR0ACK);
2498         if (ret) {
2499                 dev_err(smmu->dev, "failed to enable command queue\n");
2500                 return ret;
2501         }
2502
2503         /* Invalidate any cached configuration */
2504         cmd.opcode = CMDQ_OP_CFGI_ALL;
2505         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2506         arm_smmu_cmdq_issue_sync(smmu);
2507
2508         /* Invalidate any stale TLB entries */
2509         if (smmu->features & ARM_SMMU_FEAT_HYP) {
2510                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2511                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2512         }
2513
2514         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2515         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2516         arm_smmu_cmdq_issue_sync(smmu);
2517
2518         /* Event queue */
2519         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2520         writel_relaxed(smmu->evtq.q.prod,
2521                        arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
2522         writel_relaxed(smmu->evtq.q.cons,
2523                        arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
2524
2525         enables |= CR0_EVTQEN;
2526         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2527                                       ARM_SMMU_CR0ACK);
2528         if (ret) {
2529                 dev_err(smmu->dev, "failed to enable event queue\n");
2530                 return ret;
2531         }
2532
2533         /* PRI queue */
2534         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2535                 writeq_relaxed(smmu->priq.q.q_base,
2536                                smmu->base + ARM_SMMU_PRIQ_BASE);
2537                 writel_relaxed(smmu->priq.q.prod,
2538                                arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
2539                 writel_relaxed(smmu->priq.q.cons,
2540                                arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
2541
2542                 enables |= CR0_PRIQEN;
2543                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2544                                               ARM_SMMU_CR0ACK);
2545                 if (ret) {
2546                         dev_err(smmu->dev, "failed to enable PRI queue\n");
2547                         return ret;
2548                 }
2549         }
2550
2551         ret = arm_smmu_setup_irqs(smmu);
2552         if (ret) {
2553                 dev_err(smmu->dev, "failed to setup irqs\n");
2554                 return ret;
2555         }
2556
2557
2558         /* Enable the SMMU interface, or ensure bypass */
2559         if (!bypass || disable_bypass) {
2560                 enables |= CR0_SMMUEN;
2561         } else {
2562                 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
2563                 if (ret)
2564                         return ret;
2565         }
2566         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2567                                       ARM_SMMU_CR0ACK);
2568         if (ret) {
2569                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2570                 return ret;
2571         }
2572
2573         return 0;
2574 }
2575
2576 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
2577 {
2578         u32 reg;
2579         bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
2580
2581         /* IDR0 */
2582         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2583
2584         /* 2-level structures */
2585         if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
2586                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2587
2588         if (reg & IDR0_CD2L)
2589                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2590
2591         /*
2592          * Translation table endianness.
2593          * We currently require the same endianness as the CPU, but this
2594          * could be changed later by adding a new IO_PGTABLE_QUIRK.
2595          */
2596         switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
2597         case IDR0_TTENDIAN_MIXED:
2598                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2599                 break;
2600 #ifdef __BIG_ENDIAN
2601         case IDR0_TTENDIAN_BE:
2602                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2603                 break;
2604 #else
2605         case IDR0_TTENDIAN_LE:
2606                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2607                 break;
2608 #endif
2609         default:
2610                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2611                 return -ENXIO;
2612         }
2613
2614         /* Boolean feature flags */
2615         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2616                 smmu->features |= ARM_SMMU_FEAT_PRI;
2617
2618         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2619                 smmu->features |= ARM_SMMU_FEAT_ATS;
2620
2621         if (reg & IDR0_SEV)
2622                 smmu->features |= ARM_SMMU_FEAT_SEV;
2623
2624         if (reg & IDR0_MSI)
2625                 smmu->features |= ARM_SMMU_FEAT_MSI;
2626
2627         if (reg & IDR0_HYP)
2628                 smmu->features |= ARM_SMMU_FEAT_HYP;
2629
2630         /*
2631          * The coherency feature as set by FW is used in preference to the ID
2632          * register, but warn on mismatch.
2633          */
2634         if (!!(reg & IDR0_COHACC) != coherent)
2635                 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
2636                          coherent ? "true" : "false");
2637
2638         switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
2639         case IDR0_STALL_MODEL_FORCE:
2640                 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
2641                 /* Fallthrough */
2642         case IDR0_STALL_MODEL_STALL:
2643                 smmu->features |= ARM_SMMU_FEAT_STALLS;
2644         }
2645
2646         if (reg & IDR0_S1P)
2647                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2648
2649         if (reg & IDR0_S2P)
2650                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2651
2652         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2653                 dev_err(smmu->dev, "no translation support!\n");
2654                 return -ENXIO;
2655         }
2656
2657         /* We only support the AArch64 table format at present */
2658         switch (FIELD_GET(IDR0_TTF, reg)) {
2659         case IDR0_TTF_AARCH32_64:
2660                 smmu->ias = 40;
2661                 /* Fallthrough */
2662         case IDR0_TTF_AARCH64:
2663                 break;
2664         default:
2665                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2666                 return -ENXIO;
2667         }
2668
2669         /* ASID/VMID sizes */
2670         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2671         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2672
2673         /* IDR1 */
2674         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2675         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2676                 dev_err(smmu->dev, "embedded implementation not supported\n");
2677                 return -ENXIO;
2678         }
2679
2680         /* Queue sizes, capped at 4k */
2681         smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
2682                                          FIELD_GET(IDR1_CMDQS, reg));
2683         if (!smmu->cmdq.q.max_n_shift) {
2684                 /* Odd alignment restrictions on the base, so ignore for now */
2685                 dev_err(smmu->dev, "unit-length command queue not supported\n");
2686                 return -ENXIO;
2687         }
2688
2689         smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
2690                                          FIELD_GET(IDR1_EVTQS, reg));
2691         smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
2692                                          FIELD_GET(IDR1_PRIQS, reg));
2693
2694         /* SID/SSID sizes */
2695         smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
2696         smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
2697
2698         /*
2699          * If the SMMU supports fewer bits than would fill a single L2 stream
2700          * table, use a linear table instead.
2701          */
2702         if (smmu->sid_bits <= STRTAB_SPLIT)
2703                 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
2704
2705         /* IDR5 */
2706         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2707
2708         /* Maximum number of outstanding stalls */
2709         smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
2710
2711         /* Page sizes */
2712         if (reg & IDR5_GRAN64K)
2713                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2714         if (reg & IDR5_GRAN16K)
2715                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2716         if (reg & IDR5_GRAN4K)
2717                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2718
2719         /* Input address size */
2720         if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
2721                 smmu->features |= ARM_SMMU_FEAT_VAX;
2722
2723         /* Output address size */
2724         switch (FIELD_GET(IDR5_OAS, reg)) {
2725         case IDR5_OAS_32_BIT:
2726                 smmu->oas = 32;
2727                 break;
2728         case IDR5_OAS_36_BIT:
2729                 smmu->oas = 36;
2730                 break;
2731         case IDR5_OAS_40_BIT:
2732                 smmu->oas = 40;
2733                 break;
2734         case IDR5_OAS_42_BIT:
2735                 smmu->oas = 42;
2736                 break;
2737         case IDR5_OAS_44_BIT:
2738                 smmu->oas = 44;
2739                 break;
2740         case IDR5_OAS_52_BIT:
2741                 smmu->oas = 52;
2742                 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
2743                 break;
2744         default:
2745                 dev_info(smmu->dev,
2746                         "unknown output address size. Truncating to 48-bit\n");
2747                 /* Fallthrough */
2748         case IDR5_OAS_48_BIT:
2749                 smmu->oas = 48;
2750         }
2751
2752         if (arm_smmu_ops.pgsize_bitmap == -1UL)
2753                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2754         else
2755                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2756
2757         /* Set the DMA mask for our table walker */
2758         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2759                 dev_warn(smmu->dev,
2760                          "failed to set DMA mask for table walker\n");
2761
2762         smmu->ias = max(smmu->ias, smmu->oas);
2763
2764         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2765                  smmu->ias, smmu->oas, smmu->features);
2766         return 0;
2767 }
2768
2769 #ifdef CONFIG_ACPI
2770 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
2771 {
2772         switch (model) {
2773         case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
2774                 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
2775                 break;
2776         case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
2777                 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
2778                 break;
2779         }
2780
2781         dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
2782 }
2783
2784 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2785                                       struct arm_smmu_device *smmu)
2786 {
2787         struct acpi_iort_smmu_v3 *iort_smmu;
2788         struct device *dev = smmu->dev;
2789         struct acpi_iort_node *node;
2790
2791         node = *(struct acpi_iort_node **)dev_get_platdata(dev);
2792
2793         /* Retrieve SMMUv3 specific data */
2794         iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
2795
2796         acpi_smmu_get_options(iort_smmu->model, smmu);
2797
2798         if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
2799                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2800
2801         return 0;
2802 }
2803 #else
2804 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2805                                              struct arm_smmu_device *smmu)
2806 {
2807         return -ENODEV;
2808 }
2809 #endif
2810
2811 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2812                                     struct arm_smmu_device *smmu)
2813 {
2814         struct device *dev = &pdev->dev;
2815         u32 cells;
2816         int ret = -EINVAL;
2817
2818         if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
2819                 dev_err(dev, "missing #iommu-cells property\n");
2820         else if (cells != 1)
2821                 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
2822         else
2823                 ret = 0;
2824
2825         parse_driver_options(smmu);
2826
2827         if (of_dma_is_coherent(dev->of_node))
2828                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2829
2830         return ret;
2831 }
2832
2833 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
2834 {
2835         if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
2836                 return SZ_64K;
2837         else
2838                 return SZ_128K;
2839 }
2840
2841 static int arm_smmu_device_probe(struct platform_device *pdev)
2842 {
2843         int irq, ret;
2844         struct resource *res;
2845         resource_size_t ioaddr;
2846         struct arm_smmu_device *smmu;
2847         struct device *dev = &pdev->dev;
2848         bool bypass;
2849
2850         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2851         if (!smmu) {
2852                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2853                 return -ENOMEM;
2854         }
2855         smmu->dev = dev;
2856
2857         if (dev->of_node) {
2858                 ret = arm_smmu_device_dt_probe(pdev, smmu);
2859         } else {
2860                 ret = arm_smmu_device_acpi_probe(pdev, smmu);
2861                 if (ret == -ENODEV)
2862                         return ret;
2863         }
2864
2865         /* Set bypass mode according to firmware probing result */
2866         bypass = !!ret;
2867
2868         /* Base address */
2869         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2870         if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
2871                 dev_err(dev, "MMIO region too small (%pr)\n", res);
2872                 return -EINVAL;
2873         }
2874         ioaddr = res->start;
2875
2876         smmu->base = devm_ioremap_resource(dev, res);
2877         if (IS_ERR(smmu->base))
2878                 return PTR_ERR(smmu->base);
2879
2880         /* Interrupt lines */
2881
2882         irq = platform_get_irq_byname(pdev, "combined");
2883         if (irq > 0)
2884                 smmu->combined_irq = irq;
2885         else {
2886                 irq = platform_get_irq_byname(pdev, "eventq");
2887                 if (irq > 0)
2888                         smmu->evtq.q.irq = irq;
2889
2890                 irq = platform_get_irq_byname(pdev, "priq");
2891                 if (irq > 0)
2892                         smmu->priq.q.irq = irq;
2893
2894                 irq = platform_get_irq_byname(pdev, "gerror");
2895                 if (irq > 0)
2896                         smmu->gerr_irq = irq;
2897         }
2898         /* Probe the h/w */
2899         ret = arm_smmu_device_hw_probe(smmu);
2900         if (ret)
2901                 return ret;
2902
2903         /* Initialise in-memory data structures */
2904         ret = arm_smmu_init_structures(smmu);
2905         if (ret)
2906                 return ret;
2907
2908         /* Record our private device structure */
2909         platform_set_drvdata(pdev, smmu);
2910
2911         /* Reset the device */
2912         ret = arm_smmu_device_reset(smmu, bypass);
2913         if (ret)
2914                 return ret;
2915
2916         /* And we're up. Go go go! */
2917         ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
2918                                      "smmu3.%pa", &ioaddr);
2919         if (ret)
2920                 return ret;
2921
2922         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2923         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2924
2925         ret = iommu_device_register(&smmu->iommu);
2926         if (ret) {
2927                 dev_err(dev, "Failed to register iommu\n");
2928                 return ret;
2929         }
2930
2931 #ifdef CONFIG_PCI
2932         if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
2933                 pci_request_acs();
2934                 ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2935                 if (ret)
2936                         return ret;
2937         }
2938 #endif
2939 #ifdef CONFIG_ARM_AMBA
2940         if (amba_bustype.iommu_ops != &arm_smmu_ops) {
2941                 ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2942                 if (ret)
2943                         return ret;
2944         }
2945 #endif
2946         if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
2947                 ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2948                 if (ret)
2949                         return ret;
2950         }
2951         return 0;
2952 }
2953
2954 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2955 {
2956         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2957
2958         arm_smmu_device_disable(smmu);
2959 }
2960
2961 static const struct of_device_id arm_smmu_of_match[] = {
2962         { .compatible = "arm,smmu-v3", },
2963         { },
2964 };
2965
2966 static struct platform_driver arm_smmu_driver = {
2967         .driver = {
2968                 .name           = "arm-smmu-v3",
2969                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2970                 .suppress_bind_attrs = true,
2971         },
2972         .probe  = arm_smmu_device_probe,
2973         .shutdown = arm_smmu_device_shutdown,
2974 };
2975 builtin_platform_driver(arm_smmu_driver);