1 // SPDX-License-Identifier: GPL-2.0
3 * IOMMU API for ARM architected SMMUv3 implementations.
5 * Copyright (C) 2015 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver is powered by bad coffee and bombay mix.
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitfield.h>
15 #include <linux/bitops.h>
16 #include <linux/crash_dump.h>
17 #include <linux/delay.h>
18 #include <linux/dma-iommu.h>
19 #include <linux/err.h>
20 #include <linux/interrupt.h>
21 #include <linux/io-pgtable.h>
22 #include <linux/iommu.h>
23 #include <linux/iopoll.h>
24 #include <linux/init.h>
25 #include <linux/moduleparam.h>
26 #include <linux/msi.h>
28 #include <linux/of_address.h>
29 #include <linux/of_iommu.h>
30 #include <linux/of_platform.h>
31 #include <linux/pci.h>
32 #include <linux/platform_device.h>
34 #include <linux/amba/bus.h>
37 #define ARM_SMMU_IDR0 0x0
38 #define IDR0_ST_LVL GENMASK(28, 27)
39 #define IDR0_ST_LVL_2LVL 1
40 #define IDR0_STALL_MODEL GENMASK(25, 24)
41 #define IDR0_STALL_MODEL_STALL 0
42 #define IDR0_STALL_MODEL_FORCE 2
43 #define IDR0_TTENDIAN GENMASK(22, 21)
44 #define IDR0_TTENDIAN_MIXED 0
45 #define IDR0_TTENDIAN_LE 2
46 #define IDR0_TTENDIAN_BE 3
47 #define IDR0_CD2L (1 << 19)
48 #define IDR0_VMID16 (1 << 18)
49 #define IDR0_PRI (1 << 16)
50 #define IDR0_SEV (1 << 14)
51 #define IDR0_MSI (1 << 13)
52 #define IDR0_ASID16 (1 << 12)
53 #define IDR0_ATS (1 << 10)
54 #define IDR0_HYP (1 << 9)
55 #define IDR0_COHACC (1 << 4)
56 #define IDR0_TTF GENMASK(3, 2)
57 #define IDR0_TTF_AARCH64 2
58 #define IDR0_TTF_AARCH32_64 3
59 #define IDR0_S1P (1 << 1)
60 #define IDR0_S2P (1 << 0)
62 #define ARM_SMMU_IDR1 0x4
63 #define IDR1_TABLES_PRESET (1 << 30)
64 #define IDR1_QUEUES_PRESET (1 << 29)
65 #define IDR1_REL (1 << 28)
66 #define IDR1_CMDQS GENMASK(25, 21)
67 #define IDR1_EVTQS GENMASK(20, 16)
68 #define IDR1_PRIQS GENMASK(15, 11)
69 #define IDR1_SSIDSIZE GENMASK(10, 6)
70 #define IDR1_SIDSIZE GENMASK(5, 0)
72 #define ARM_SMMU_IDR5 0x14
73 #define IDR5_STALL_MAX GENMASK(31, 16)
74 #define IDR5_GRAN64K (1 << 6)
75 #define IDR5_GRAN16K (1 << 5)
76 #define IDR5_GRAN4K (1 << 4)
77 #define IDR5_OAS GENMASK(2, 0)
78 #define IDR5_OAS_32_BIT 0
79 #define IDR5_OAS_36_BIT 1
80 #define IDR5_OAS_40_BIT 2
81 #define IDR5_OAS_42_BIT 3
82 #define IDR5_OAS_44_BIT 4
83 #define IDR5_OAS_48_BIT 5
84 #define IDR5_OAS_52_BIT 6
85 #define IDR5_VAX GENMASK(11, 10)
86 #define IDR5_VAX_52_BIT 1
88 #define ARM_SMMU_CR0 0x20
89 #define CR0_CMDQEN (1 << 3)
90 #define CR0_EVTQEN (1 << 2)
91 #define CR0_PRIQEN (1 << 1)
92 #define CR0_SMMUEN (1 << 0)
94 #define ARM_SMMU_CR0ACK 0x24
96 #define ARM_SMMU_CR1 0x28
97 #define CR1_TABLE_SH GENMASK(11, 10)
98 #define CR1_TABLE_OC GENMASK(9, 8)
99 #define CR1_TABLE_IC GENMASK(7, 6)
100 #define CR1_QUEUE_SH GENMASK(5, 4)
101 #define CR1_QUEUE_OC GENMASK(3, 2)
102 #define CR1_QUEUE_IC GENMASK(1, 0)
103 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
104 #define CR1_CACHE_NC 0
105 #define CR1_CACHE_WB 1
106 #define CR1_CACHE_WT 2
108 #define ARM_SMMU_CR2 0x2c
109 #define CR2_PTM (1 << 2)
110 #define CR2_RECINVSID (1 << 1)
111 #define CR2_E2H (1 << 0)
113 #define ARM_SMMU_GBPA 0x44
114 #define GBPA_UPDATE (1 << 31)
115 #define GBPA_ABORT (1 << 20)
117 #define ARM_SMMU_IRQ_CTRL 0x50
118 #define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
119 #define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
120 #define IRQ_CTRL_GERROR_IRQEN (1 << 0)
122 #define ARM_SMMU_IRQ_CTRLACK 0x54
124 #define ARM_SMMU_GERROR 0x60
125 #define GERROR_SFM_ERR (1 << 8)
126 #define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
127 #define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
128 #define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
129 #define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
130 #define GERROR_PRIQ_ABT_ERR (1 << 3)
131 #define GERROR_EVTQ_ABT_ERR (1 << 2)
132 #define GERROR_CMDQ_ERR (1 << 0)
133 #define GERROR_ERR_MASK 0xfd
135 #define ARM_SMMU_GERRORN 0x64
137 #define ARM_SMMU_GERROR_IRQ_CFG0 0x68
138 #define ARM_SMMU_GERROR_IRQ_CFG1 0x70
139 #define ARM_SMMU_GERROR_IRQ_CFG2 0x74
141 #define ARM_SMMU_STRTAB_BASE 0x80
142 #define STRTAB_BASE_RA (1UL << 62)
143 #define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6)
145 #define ARM_SMMU_STRTAB_BASE_CFG 0x88
146 #define STRTAB_BASE_CFG_FMT GENMASK(17, 16)
147 #define STRTAB_BASE_CFG_FMT_LINEAR 0
148 #define STRTAB_BASE_CFG_FMT_2LVL 1
149 #define STRTAB_BASE_CFG_SPLIT GENMASK(10, 6)
150 #define STRTAB_BASE_CFG_LOG2SIZE GENMASK(5, 0)
152 #define ARM_SMMU_CMDQ_BASE 0x90
153 #define ARM_SMMU_CMDQ_PROD 0x98
154 #define ARM_SMMU_CMDQ_CONS 0x9c
156 #define ARM_SMMU_EVTQ_BASE 0xa0
157 #define ARM_SMMU_EVTQ_PROD 0x100a8
158 #define ARM_SMMU_EVTQ_CONS 0x100ac
159 #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
160 #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
161 #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
163 #define ARM_SMMU_PRIQ_BASE 0xc0
164 #define ARM_SMMU_PRIQ_PROD 0x100c8
165 #define ARM_SMMU_PRIQ_CONS 0x100cc
166 #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
167 #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
168 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
170 /* Common MSI config fields */
171 #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
172 #define MSI_CFG2_SH GENMASK(5, 4)
173 #define MSI_CFG2_MEMATTR GENMASK(3, 0)
175 /* Common memory attribute values */
176 #define ARM_SMMU_SH_NSH 0
177 #define ARM_SMMU_SH_OSH 2
178 #define ARM_SMMU_SH_ISH 3
179 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
180 #define ARM_SMMU_MEMATTR_OIWB 0xf
182 #define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1))
183 #define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift))
184 #define Q_OVERFLOW_FLAG (1 << 31)
185 #define Q_OVF(q, p) ((p) & Q_OVERFLOW_FLAG)
186 #define Q_ENT(q, p) ((q)->base + \
187 Q_IDX(q, p) * (q)->ent_dwords)
189 #define Q_BASE_RWA (1UL << 62)
190 #define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
191 #define Q_BASE_LOG2SIZE GENMASK(4, 0)
196 * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
197 * 2lvl: 128k L1 entries,
198 * 256 lazy entries per table (each table covers a PCI bus)
200 #define STRTAB_L1_SZ_SHIFT 20
201 #define STRTAB_SPLIT 8
203 #define STRTAB_L1_DESC_DWORDS 1
204 #define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
205 #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
207 #define STRTAB_STE_DWORDS 8
208 #define STRTAB_STE_0_V (1UL << 0)
209 #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
210 #define STRTAB_STE_0_CFG_ABORT 0
211 #define STRTAB_STE_0_CFG_BYPASS 4
212 #define STRTAB_STE_0_CFG_S1_TRANS 5
213 #define STRTAB_STE_0_CFG_S2_TRANS 6
215 #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
216 #define STRTAB_STE_0_S1FMT_LINEAR 0
217 #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
218 #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
220 #define STRTAB_STE_1_S1C_CACHE_NC 0UL
221 #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
222 #define STRTAB_STE_1_S1C_CACHE_WT 2UL
223 #define STRTAB_STE_1_S1C_CACHE_WB 3UL
224 #define STRTAB_STE_1_S1CIR GENMASK_ULL(3, 2)
225 #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
226 #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
228 #define STRTAB_STE_1_S1STALLD (1UL << 27)
230 #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
231 #define STRTAB_STE_1_EATS_ABT 0UL
232 #define STRTAB_STE_1_EATS_TRANS 1UL
233 #define STRTAB_STE_1_EATS_S1CHK 2UL
235 #define STRTAB_STE_1_STRW GENMASK_ULL(31, 30)
236 #define STRTAB_STE_1_STRW_NSEL1 0UL
237 #define STRTAB_STE_1_STRW_EL2 2UL
239 #define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
240 #define STRTAB_STE_1_SHCFG_INCOMING 1UL
242 #define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)
243 #define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32)
244 #define STRTAB_STE_2_S2AA64 (1UL << 51)
245 #define STRTAB_STE_2_S2ENDI (1UL << 52)
246 #define STRTAB_STE_2_S2PTW (1UL << 54)
247 #define STRTAB_STE_2_S2R (1UL << 58)
249 #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
251 /* Context descriptor (stage-1 only) */
252 #define CTXDESC_CD_DWORDS 8
253 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
254 #define ARM64_TCR_T0SZ GENMASK_ULL(5, 0)
255 #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
256 #define ARM64_TCR_TG0 GENMASK_ULL(15, 14)
257 #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
258 #define ARM64_TCR_IRGN0 GENMASK_ULL(9, 8)
259 #define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10)
260 #define ARM64_TCR_ORGN0 GENMASK_ULL(11, 10)
261 #define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12)
262 #define ARM64_TCR_SH0 GENMASK_ULL(13, 12)
263 #define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14)
264 #define ARM64_TCR_EPD0 (1ULL << 7)
265 #define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30)
266 #define ARM64_TCR_EPD1 (1ULL << 23)
268 #define CTXDESC_CD_0_ENDI (1UL << 15)
269 #define CTXDESC_CD_0_V (1UL << 31)
271 #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32)
272 #define ARM64_TCR_IPS GENMASK_ULL(34, 32)
273 #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
274 #define ARM64_TCR_TBI0 (1ULL << 37)
276 #define CTXDESC_CD_0_AA64 (1UL << 41)
277 #define CTXDESC_CD_0_S (1UL << 44)
278 #define CTXDESC_CD_0_R (1UL << 45)
279 #define CTXDESC_CD_0_A (1UL << 46)
280 #define CTXDESC_CD_0_ASET (1UL << 47)
281 #define CTXDESC_CD_0_ASID GENMASK_ULL(63, 48)
283 #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
285 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
286 #define ARM_SMMU_TCR2CD(tcr, fld) FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
287 FIELD_GET(ARM64_TCR_##fld, tcr))
290 #define CMDQ_ENT_DWORDS 2
291 #define CMDQ_MAX_SZ_SHIFT 8
293 #define CMDQ_CONS_ERR GENMASK(30, 24)
294 #define CMDQ_ERR_CERROR_NONE_IDX 0
295 #define CMDQ_ERR_CERROR_ILL_IDX 1
296 #define CMDQ_ERR_CERROR_ABT_IDX 2
298 #define CMDQ_0_OP GENMASK_ULL(7, 0)
299 #define CMDQ_0_SSV (1UL << 11)
301 #define CMDQ_PREFETCH_0_SID GENMASK_ULL(63, 32)
302 #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
303 #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12)
305 #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32)
306 #define CMDQ_CFGI_1_LEAF (1UL << 0)
307 #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0)
309 #define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32)
310 #define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48)
311 #define CMDQ_TLBI_1_LEAF (1UL << 0)
312 #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
313 #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
315 #define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
316 #define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
317 #define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
318 #define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
320 #define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
321 #define CMDQ_SYNC_0_CS_NONE 0
322 #define CMDQ_SYNC_0_CS_IRQ 1
323 #define CMDQ_SYNC_0_CS_SEV 2
324 #define CMDQ_SYNC_0_MSH GENMASK_ULL(23, 22)
325 #define CMDQ_SYNC_0_MSIATTR GENMASK_ULL(27, 24)
326 #define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
327 #define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
330 #define EVTQ_ENT_DWORDS 4
331 #define EVTQ_MAX_SZ_SHIFT 7
333 #define EVTQ_0_ID GENMASK_ULL(7, 0)
336 #define PRIQ_ENT_DWORDS 2
337 #define PRIQ_MAX_SZ_SHIFT 8
339 #define PRIQ_0_SID GENMASK_ULL(31, 0)
340 #define PRIQ_0_SSID GENMASK_ULL(51, 32)
341 #define PRIQ_0_PERM_PRIV (1UL << 58)
342 #define PRIQ_0_PERM_EXEC (1UL << 59)
343 #define PRIQ_0_PERM_READ (1UL << 60)
344 #define PRIQ_0_PERM_WRITE (1UL << 61)
345 #define PRIQ_0_PRG_LAST (1UL << 62)
346 #define PRIQ_0_SSID_V (1UL << 63)
348 #define PRIQ_1_PRG_IDX GENMASK_ULL(8, 0)
349 #define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
351 /* High-level queue structures */
352 #define ARM_SMMU_POLL_TIMEOUT_US 100
353 #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
354 #define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
356 #define MSI_IOVA_BASE 0x8000000
357 #define MSI_IOVA_LENGTH 0x100000
360 * not really modular, but the easiest way to keep compat with existing
361 * bootargs behaviour is to continue using module_param_named here.
363 static bool disable_bypass = 1;
364 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
365 MODULE_PARM_DESC(disable_bypass,
366 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
374 enum arm_smmu_msi_index {
381 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
383 ARM_SMMU_EVTQ_IRQ_CFG0,
384 ARM_SMMU_EVTQ_IRQ_CFG1,
385 ARM_SMMU_EVTQ_IRQ_CFG2,
387 [GERROR_MSI_INDEX] = {
388 ARM_SMMU_GERROR_IRQ_CFG0,
389 ARM_SMMU_GERROR_IRQ_CFG1,
390 ARM_SMMU_GERROR_IRQ_CFG2,
393 ARM_SMMU_PRIQ_IRQ_CFG0,
394 ARM_SMMU_PRIQ_IRQ_CFG1,
395 ARM_SMMU_PRIQ_IRQ_CFG2,
399 struct arm_smmu_cmdq_ent {
402 bool substream_valid;
404 /* Command-specific fields */
406 #define CMDQ_OP_PREFETCH_CFG 0x1
413 #define CMDQ_OP_CFGI_STE 0x3
414 #define CMDQ_OP_CFGI_ALL 0x4
423 #define CMDQ_OP_TLBI_NH_ASID 0x11
424 #define CMDQ_OP_TLBI_NH_VA 0x12
425 #define CMDQ_OP_TLBI_EL2_ALL 0x20
426 #define CMDQ_OP_TLBI_S12_VMALL 0x28
427 #define CMDQ_OP_TLBI_S2_IPA 0x2a
428 #define CMDQ_OP_TLBI_NSNH_ALL 0x30
436 #define CMDQ_OP_PRI_RESP 0x41
444 #define CMDQ_OP_CMD_SYNC 0x46
452 struct arm_smmu_queue {
453 int irq; /* Wired interrupt */
464 u32 __iomem *prod_reg;
465 u32 __iomem *cons_reg;
468 struct arm_smmu_cmdq {
469 struct arm_smmu_queue q;
473 struct arm_smmu_evtq {
474 struct arm_smmu_queue q;
478 struct arm_smmu_priq {
479 struct arm_smmu_queue q;
482 /* High-level stream table and context descriptor structures */
483 struct arm_smmu_strtab_l1_desc {
487 dma_addr_t l2ptr_dma;
490 struct arm_smmu_s1_cfg {
492 dma_addr_t cdptr_dma;
494 struct arm_smmu_ctx_desc {
502 struct arm_smmu_s2_cfg {
508 struct arm_smmu_strtab_ent {
510 * An STE is "assigned" if the master emitting the corresponding SID
511 * is attached to a domain. The behaviour of an unassigned STE is
512 * determined by the disable_bypass parameter, whereas an assigned
513 * STE behaves according to s1_cfg/s2_cfg, which themselves are
514 * configured according to the domain type.
517 struct arm_smmu_s1_cfg *s1_cfg;
518 struct arm_smmu_s2_cfg *s2_cfg;
521 struct arm_smmu_strtab_cfg {
523 dma_addr_t strtab_dma;
524 struct arm_smmu_strtab_l1_desc *l1_desc;
525 unsigned int num_l1_ents;
531 /* An SMMUv3 instance */
532 struct arm_smmu_device {
536 #define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
537 #define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
538 #define ARM_SMMU_FEAT_TT_LE (1 << 2)
539 #define ARM_SMMU_FEAT_TT_BE (1 << 3)
540 #define ARM_SMMU_FEAT_PRI (1 << 4)
541 #define ARM_SMMU_FEAT_ATS (1 << 5)
542 #define ARM_SMMU_FEAT_SEV (1 << 6)
543 #define ARM_SMMU_FEAT_MSI (1 << 7)
544 #define ARM_SMMU_FEAT_COHERENCY (1 << 8)
545 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
546 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
547 #define ARM_SMMU_FEAT_STALLS (1 << 11)
548 #define ARM_SMMU_FEAT_HYP (1 << 12)
549 #define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
550 #define ARM_SMMU_FEAT_VAX (1 << 14)
553 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
554 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
557 struct arm_smmu_cmdq cmdq;
558 struct arm_smmu_evtq evtq;
559 struct arm_smmu_priq priq;
566 unsigned long ias; /* IPA */
567 unsigned long oas; /* PA */
568 unsigned long pgsize_bitmap;
570 #define ARM_SMMU_MAX_ASIDS (1 << 16)
571 unsigned int asid_bits;
572 DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
574 #define ARM_SMMU_MAX_VMIDS (1 << 16)
575 unsigned int vmid_bits;
576 DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
578 unsigned int ssid_bits;
579 unsigned int sid_bits;
581 struct arm_smmu_strtab_cfg strtab_cfg;
583 /* Hi16xx adds an extra 32 bits of goodness to its MSI payload */
589 /* IOMMU core code handle */
590 struct iommu_device iommu;
593 /* SMMU private data for each master */
594 struct arm_smmu_master_data {
595 struct arm_smmu_device *smmu;
596 struct arm_smmu_strtab_ent ste;
599 /* SMMU private data for an IOMMU domain */
600 enum arm_smmu_domain_stage {
601 ARM_SMMU_DOMAIN_S1 = 0,
603 ARM_SMMU_DOMAIN_NESTED,
604 ARM_SMMU_DOMAIN_BYPASS,
607 struct arm_smmu_domain {
608 struct arm_smmu_device *smmu;
609 struct mutex init_mutex; /* Protects smmu pointer */
611 struct io_pgtable_ops *pgtbl_ops;
614 enum arm_smmu_domain_stage stage;
616 struct arm_smmu_s1_cfg s1_cfg;
617 struct arm_smmu_s2_cfg s2_cfg;
620 struct iommu_domain domain;
623 struct arm_smmu_option_prop {
628 static struct arm_smmu_option_prop arm_smmu_options[] = {
629 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
630 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
634 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
635 struct arm_smmu_device *smmu)
637 if ((offset > SZ_64K) &&
638 (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
641 return smmu->base + offset;
644 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
646 return container_of(dom, struct arm_smmu_domain, domain);
649 static void parse_driver_options(struct arm_smmu_device *smmu)
654 if (of_property_read_bool(smmu->dev->of_node,
655 arm_smmu_options[i].prop)) {
656 smmu->options |= arm_smmu_options[i].opt;
657 dev_notice(smmu->dev, "option %s\n",
658 arm_smmu_options[i].prop);
660 } while (arm_smmu_options[++i].opt);
663 /* Low-level queue manipulation functions */
664 static bool queue_full(struct arm_smmu_queue *q)
666 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
667 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
670 static bool queue_empty(struct arm_smmu_queue *q)
672 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
673 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
676 static void queue_sync_cons(struct arm_smmu_queue *q)
678 q->cons = readl_relaxed(q->cons_reg);
681 static void queue_inc_cons(struct arm_smmu_queue *q)
683 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
685 q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
688 * Ensure that all CPU accesses (reads and writes) to the queue
689 * are complete before we update the cons pointer.
692 writel_relaxed(q->cons, q->cons_reg);
695 static int queue_sync_prod(struct arm_smmu_queue *q)
698 u32 prod = readl_relaxed(q->prod_reg);
700 if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
707 static void queue_inc_prod(struct arm_smmu_queue *q)
709 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
711 q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
712 writel(q->prod, q->prod_reg);
716 * Wait for the SMMU to consume items. If sync is true, wait until the queue
717 * is empty. Otherwise, wait until there is at least one free slot.
719 static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
722 unsigned int delay = 1, spin_cnt = 0;
724 /* Wait longer if it's a CMD_SYNC */
725 timeout = ktime_add_us(ktime_get(), sync ?
726 ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
727 ARM_SMMU_POLL_TIMEOUT_US);
729 while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
730 if (ktime_compare(ktime_get(), timeout) > 0)
735 } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
748 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
752 for (i = 0; i < n_dwords; ++i)
753 *dst++ = cpu_to_le64(*src++);
756 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
761 queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
766 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
770 for (i = 0; i < n_dwords; ++i)
771 *dst++ = le64_to_cpu(*src++);
774 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
779 queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
784 /* High-level queue accessors */
785 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
787 memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
788 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
790 switch (ent->opcode) {
791 case CMDQ_OP_TLBI_EL2_ALL:
792 case CMDQ_OP_TLBI_NSNH_ALL:
794 case CMDQ_OP_PREFETCH_CFG:
795 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
796 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
797 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
799 case CMDQ_OP_CFGI_STE:
800 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
801 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
803 case CMDQ_OP_CFGI_ALL:
804 /* Cover the entire SID range */
805 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
807 case CMDQ_OP_TLBI_NH_VA:
808 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
809 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
810 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
812 case CMDQ_OP_TLBI_S2_IPA:
813 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
814 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
815 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
817 case CMDQ_OP_TLBI_NH_ASID:
818 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
820 case CMDQ_OP_TLBI_S12_VMALL:
821 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
823 case CMDQ_OP_PRI_RESP:
824 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
825 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
826 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
827 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
828 switch (ent->pri.resp) {
836 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
838 case CMDQ_OP_CMD_SYNC:
839 if (ent->sync.msiaddr)
840 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
842 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
843 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
844 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
846 * Commands are written little-endian, but we want the SMMU to
847 * receive MSIData, and thus write it back to memory, in CPU
848 * byte order, so big-endian needs an extra byteswap here.
850 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA,
851 cpu_to_le32(ent->sync.msidata));
852 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
861 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
863 static const char *cerror_str[] = {
864 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
865 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
866 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
870 u64 cmd[CMDQ_ENT_DWORDS];
871 struct arm_smmu_queue *q = &smmu->cmdq.q;
872 u32 cons = readl_relaxed(q->cons_reg);
873 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
874 struct arm_smmu_cmdq_ent cmd_sync = {
875 .opcode = CMDQ_OP_CMD_SYNC,
878 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
879 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
882 case CMDQ_ERR_CERROR_ABT_IDX:
883 dev_err(smmu->dev, "retrying command fetch\n");
884 case CMDQ_ERR_CERROR_NONE_IDX:
886 case CMDQ_ERR_CERROR_ILL_IDX:
893 * We may have concurrent producers, so we need to be careful
894 * not to touch any of the shadow cmdq state.
896 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
897 dev_err(smmu->dev, "skipping command in error state:\n");
898 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
899 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
901 /* Convert the erroneous command into a CMD_SYNC */
902 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
903 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
907 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
910 static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
912 struct arm_smmu_queue *q = &smmu->cmdq.q;
913 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
915 smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
917 while (queue_insert_raw(q, cmd) == -ENOSPC) {
918 if (queue_poll_cons(q, false, wfe))
919 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
923 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
924 struct arm_smmu_cmdq_ent *ent)
926 u64 cmd[CMDQ_ENT_DWORDS];
929 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
930 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
935 spin_lock_irqsave(&smmu->cmdq.lock, flags);
936 arm_smmu_cmdq_insert_cmd(smmu, cmd);
937 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
941 * The difference between val and sync_idx is bounded by the maximum size of
942 * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
944 static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
949 timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
950 val = smp_cond_load_acquire(&smmu->sync_count,
951 (int)(VAL - sync_idx) >= 0 ||
952 !ktime_before(ktime_get(), timeout));
954 return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
957 static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
959 u64 cmd[CMDQ_ENT_DWORDS];
961 struct arm_smmu_cmdq_ent ent = {
962 .opcode = CMDQ_OP_CMD_SYNC,
964 .msiaddr = virt_to_phys(&smmu->sync_count),
968 spin_lock_irqsave(&smmu->cmdq.lock, flags);
970 /* Piggy-back on the previous command if it's a SYNC */
971 if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
972 ent.sync.msidata = smmu->sync_nr;
974 ent.sync.msidata = ++smmu->sync_nr;
975 arm_smmu_cmdq_build_cmd(cmd, &ent);
976 arm_smmu_cmdq_insert_cmd(smmu, cmd);
979 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
981 return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
984 static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
986 u64 cmd[CMDQ_ENT_DWORDS];
988 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
989 struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
992 arm_smmu_cmdq_build_cmd(cmd, &ent);
994 spin_lock_irqsave(&smmu->cmdq.lock, flags);
995 arm_smmu_cmdq_insert_cmd(smmu, cmd);
996 ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
997 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
1002 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1005 bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
1006 (smmu->features & ARM_SMMU_FEAT_COHERENCY);
1008 ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
1009 : __arm_smmu_cmdq_issue_sync(smmu);
1011 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
1014 /* Context descriptor manipulation functions */
1015 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
1019 /* Repack the TCR. Just care about TTBR0 for now */
1020 val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
1021 val |= ARM_SMMU_TCR2CD(tcr, TG0);
1022 val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
1023 val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
1024 val |= ARM_SMMU_TCR2CD(tcr, SH0);
1025 val |= ARM_SMMU_TCR2CD(tcr, EPD0);
1026 val |= ARM_SMMU_TCR2CD(tcr, EPD1);
1027 val |= ARM_SMMU_TCR2CD(tcr, IPS);
1028 val |= ARM_SMMU_TCR2CD(tcr, TBI0);
1033 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
1034 struct arm_smmu_s1_cfg *cfg)
1039 * We don't need to issue any invalidation here, as we'll invalidate
1040 * the STE when installing the new entry anyway.
1042 val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
1046 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1047 CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
1050 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1051 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1052 val |= CTXDESC_CD_0_S;
1054 cfg->cdptr[0] = cpu_to_le64(val);
1056 val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
1057 cfg->cdptr[1] = cpu_to_le64(val);
1059 cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
1062 /* Stream table manipulation functions */
1064 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1068 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1069 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1071 *dst = cpu_to_le64(val);
1074 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1076 struct arm_smmu_cmdq_ent cmd = {
1077 .opcode = CMDQ_OP_CFGI_STE,
1084 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1085 arm_smmu_cmdq_issue_sync(smmu);
1088 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1089 __le64 *dst, struct arm_smmu_strtab_ent *ste)
1092 * This is hideously complicated, but we only really care about
1093 * three cases at the moment:
1095 * 1. Invalid (all zero) -> bypass/fault (init)
1096 * 2. Bypass/fault -> translation/bypass (attach)
1097 * 3. Translation/bypass -> bypass/fault (detach)
1099 * Given that we can't update the STE atomically and the SMMU
1100 * doesn't read the thing in a defined order, that leaves us
1101 * with the following maintenance requirements:
1103 * 1. Update Config, return (init time STEs aren't live)
1104 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1105 * 3. Update Config, sync
1107 u64 val = le64_to_cpu(dst[0]);
1108 bool ste_live = false;
1109 struct arm_smmu_cmdq_ent prefetch_cmd = {
1110 .opcode = CMDQ_OP_PREFETCH_CFG,
1116 if (val & STRTAB_STE_0_V) {
1117 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1118 case STRTAB_STE_0_CFG_BYPASS:
1120 case STRTAB_STE_0_CFG_S1_TRANS:
1121 case STRTAB_STE_0_CFG_S2_TRANS:
1124 case STRTAB_STE_0_CFG_ABORT:
1128 BUG(); /* STE corruption */
1132 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1133 val = STRTAB_STE_0_V;
1136 if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
1137 if (!ste->assigned && disable_bypass)
1138 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1140 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1142 dst[0] = cpu_to_le64(val);
1143 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1144 STRTAB_STE_1_SHCFG_INCOMING));
1145 dst[2] = 0; /* Nuke the VMID */
1147 * The SMMU can perform negative caching, so we must sync
1148 * the STE regardless of whether the old value was live.
1151 arm_smmu_sync_ste_for_sid(smmu, sid);
1157 dst[1] = cpu_to_le64(
1158 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1159 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1160 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1161 #ifdef CONFIG_PCI_ATS
1162 FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
1164 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1166 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1167 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1168 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1170 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1171 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
1176 dst[2] = cpu_to_le64(
1177 FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
1178 FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
1180 STRTAB_STE_2_S2ENDI |
1182 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1185 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1187 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1190 arm_smmu_sync_ste_for_sid(smmu, sid);
1191 dst[0] = cpu_to_le64(val);
1192 arm_smmu_sync_ste_for_sid(smmu, sid);
1194 /* It's likely that we'll want to use the new STE soon */
1195 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1196 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1199 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1202 struct arm_smmu_strtab_ent ste = { .assigned = false };
1204 for (i = 0; i < nent; ++i) {
1205 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1206 strtab += STRTAB_STE_DWORDS;
1210 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1214 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1215 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1220 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1221 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1223 desc->span = STRTAB_SPLIT + 1;
1224 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1225 GFP_KERNEL | __GFP_ZERO);
1228 "failed to allocate l2 stream table for SID %u\n",
1233 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1234 arm_smmu_write_strtab_l1_desc(strtab, desc);
1238 /* IRQ and event handlers */
1239 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1242 struct arm_smmu_device *smmu = dev;
1243 struct arm_smmu_queue *q = &smmu->evtq.q;
1244 u64 evt[EVTQ_ENT_DWORDS];
1247 while (!queue_remove_raw(q, evt)) {
1248 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1250 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1251 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1252 dev_info(smmu->dev, "\t0x%016llx\n",
1253 (unsigned long long)evt[i]);
1258 * Not much we can do on overflow, so scream and pretend we're
1261 if (queue_sync_prod(q) == -EOVERFLOW)
1262 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1263 } while (!queue_empty(q));
1265 /* Sync our overflow flag, as we believe we're up to speed */
1266 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1270 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1276 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1277 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1278 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1279 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1280 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1282 dev_info(smmu->dev, "unexpected PRI request received:\n");
1284 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1285 sid, ssid, grpid, last ? "L" : "",
1286 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1287 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1288 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1289 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1290 evt[1] & PRIQ_1_ADDR_MASK);
1293 struct arm_smmu_cmdq_ent cmd = {
1294 .opcode = CMDQ_OP_PRI_RESP,
1295 .substream_valid = ssv,
1300 .resp = PRI_RESP_DENY,
1304 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1308 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1310 struct arm_smmu_device *smmu = dev;
1311 struct arm_smmu_queue *q = &smmu->priq.q;
1312 u64 evt[PRIQ_ENT_DWORDS];
1315 while (!queue_remove_raw(q, evt))
1316 arm_smmu_handle_ppr(smmu, evt);
1318 if (queue_sync_prod(q) == -EOVERFLOW)
1319 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1320 } while (!queue_empty(q));
1322 /* Sync our overflow flag, as we believe we're up to speed */
1323 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1324 writel(q->cons, q->cons_reg);
1328 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1330 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1332 u32 gerror, gerrorn, active;
1333 struct arm_smmu_device *smmu = dev;
1335 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1336 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1338 active = gerror ^ gerrorn;
1339 if (!(active & GERROR_ERR_MASK))
1340 return IRQ_NONE; /* No errors pending */
1343 "unexpected global error reported (0x%08x), this could be serious\n",
1346 if (active & GERROR_SFM_ERR) {
1347 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1348 arm_smmu_device_disable(smmu);
1351 if (active & GERROR_MSI_GERROR_ABT_ERR)
1352 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1354 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1355 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1357 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1358 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1360 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1361 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1363 if (active & GERROR_PRIQ_ABT_ERR)
1364 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1366 if (active & GERROR_EVTQ_ABT_ERR)
1367 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1369 if (active & GERROR_CMDQ_ERR)
1370 arm_smmu_cmdq_skip_err(smmu);
1372 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1376 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1378 struct arm_smmu_device *smmu = dev;
1380 arm_smmu_evtq_thread(irq, dev);
1381 if (smmu->features & ARM_SMMU_FEAT_PRI)
1382 arm_smmu_priq_thread(irq, dev);
1387 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1389 arm_smmu_gerror_handler(irq, dev);
1390 return IRQ_WAKE_THREAD;
1393 /* IO_PGTABLE API */
1394 static void arm_smmu_tlb_sync(void *cookie)
1396 struct arm_smmu_domain *smmu_domain = cookie;
1398 arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
1401 static void arm_smmu_tlb_inv_context(void *cookie)
1403 struct arm_smmu_domain *smmu_domain = cookie;
1404 struct arm_smmu_device *smmu = smmu_domain->smmu;
1405 struct arm_smmu_cmdq_ent cmd;
1407 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1408 cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
1409 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1412 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1413 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1417 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1418 * PTEs previously cleared by unmaps on the current CPU not yet visible
1419 * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
1420 * to guarantee those are observed before the TLBI. Do be careful, 007.
1422 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1423 arm_smmu_cmdq_issue_sync(smmu);
1426 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1427 size_t granule, bool leaf, void *cookie)
1429 struct arm_smmu_domain *smmu_domain = cookie;
1430 struct arm_smmu_device *smmu = smmu_domain->smmu;
1431 struct arm_smmu_cmdq_ent cmd = {
1438 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1439 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
1440 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1442 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1443 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1447 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1448 cmd.tlbi.addr += granule;
1449 } while (size -= granule);
1452 static const struct iommu_gather_ops arm_smmu_gather_ops = {
1453 .tlb_flush_all = arm_smmu_tlb_inv_context,
1454 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
1455 .tlb_sync = arm_smmu_tlb_sync,
1459 static bool arm_smmu_capable(enum iommu_cap cap)
1462 case IOMMU_CAP_CACHE_COHERENCY:
1464 case IOMMU_CAP_NOEXEC:
1471 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1473 struct arm_smmu_domain *smmu_domain;
1475 if (type != IOMMU_DOMAIN_UNMANAGED &&
1476 type != IOMMU_DOMAIN_DMA &&
1477 type != IOMMU_DOMAIN_IDENTITY)
1481 * Allocate the domain and initialise some of its data structures.
1482 * We can't really do anything meaningful until we've added a
1485 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1489 if (type == IOMMU_DOMAIN_DMA &&
1490 iommu_get_dma_cookie(&smmu_domain->domain)) {
1495 mutex_init(&smmu_domain->init_mutex);
1496 return &smmu_domain->domain;
1499 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1501 int idx, size = 1 << span;
1504 idx = find_first_zero_bit(map, size);
1507 } while (test_and_set_bit(idx, map));
1512 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1514 clear_bit(idx, map);
1517 static void arm_smmu_domain_free(struct iommu_domain *domain)
1519 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1520 struct arm_smmu_device *smmu = smmu_domain->smmu;
1522 iommu_put_dma_cookie(domain);
1523 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1525 /* Free the CD and ASID, if we allocated them */
1526 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1527 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1530 dmam_free_coherent(smmu_domain->smmu->dev,
1531 CTXDESC_CD_DWORDS << 3,
1535 arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1538 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1540 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1546 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1547 struct io_pgtable_cfg *pgtbl_cfg)
1551 struct arm_smmu_device *smmu = smmu_domain->smmu;
1552 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1554 asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1558 cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1560 GFP_KERNEL | __GFP_ZERO);
1562 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1567 cfg->cd.asid = (u16)asid;
1568 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1569 cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1570 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1574 arm_smmu_bitmap_free(smmu->asid_map, asid);
1578 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1579 struct io_pgtable_cfg *pgtbl_cfg)
1582 struct arm_smmu_device *smmu = smmu_domain->smmu;
1583 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1585 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1589 cfg->vmid = (u16)vmid;
1590 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1591 cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1595 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1598 unsigned long ias, oas;
1599 enum io_pgtable_fmt fmt;
1600 struct io_pgtable_cfg pgtbl_cfg;
1601 struct io_pgtable_ops *pgtbl_ops;
1602 int (*finalise_stage_fn)(struct arm_smmu_domain *,
1603 struct io_pgtable_cfg *);
1604 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1605 struct arm_smmu_device *smmu = smmu_domain->smmu;
1607 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1608 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1612 /* Restrict the stage to what we can actually support */
1613 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1614 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1615 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1616 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1618 switch (smmu_domain->stage) {
1619 case ARM_SMMU_DOMAIN_S1:
1620 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1621 ias = min_t(unsigned long, ias, VA_BITS);
1623 fmt = ARM_64_LPAE_S1;
1624 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1626 case ARM_SMMU_DOMAIN_NESTED:
1627 case ARM_SMMU_DOMAIN_S2:
1630 fmt = ARM_64_LPAE_S2;
1631 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1637 pgtbl_cfg = (struct io_pgtable_cfg) {
1638 .pgsize_bitmap = smmu->pgsize_bitmap,
1641 .tlb = &arm_smmu_gather_ops,
1642 .iommu_dev = smmu->dev,
1645 if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
1646 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
1648 if (smmu_domain->non_strict)
1649 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1651 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1655 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1656 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1657 domain->geometry.force_aperture = true;
1659 ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1661 free_io_pgtable_ops(pgtbl_ops);
1665 smmu_domain->pgtbl_ops = pgtbl_ops;
1669 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1672 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1674 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1675 struct arm_smmu_strtab_l1_desc *l1_desc;
1678 /* Two-level walk */
1679 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1680 l1_desc = &cfg->l1_desc[idx];
1681 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1682 step = &l1_desc->l2ptr[idx];
1684 /* Simple linear lookup */
1685 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1691 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1694 struct arm_smmu_master_data *master = fwspec->iommu_priv;
1695 struct arm_smmu_device *smmu = master->smmu;
1697 for (i = 0; i < fwspec->num_ids; ++i) {
1698 u32 sid = fwspec->ids[i];
1699 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1701 /* Bridged PCI devices may end up with duplicated IDs */
1702 for (j = 0; j < i; j++)
1703 if (fwspec->ids[j] == sid)
1708 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1712 static void arm_smmu_detach_dev(struct device *dev)
1714 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1715 struct arm_smmu_master_data *master = fwspec->iommu_priv;
1717 master->ste.assigned = false;
1718 arm_smmu_install_ste_for_dev(fwspec);
1721 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1724 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1725 struct arm_smmu_device *smmu;
1726 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1727 struct arm_smmu_master_data *master;
1728 struct arm_smmu_strtab_ent *ste;
1733 master = fwspec->iommu_priv;
1734 smmu = master->smmu;
1737 /* Already attached to a different domain? */
1739 arm_smmu_detach_dev(dev);
1741 mutex_lock(&smmu_domain->init_mutex);
1743 if (!smmu_domain->smmu) {
1744 smmu_domain->smmu = smmu;
1745 ret = arm_smmu_domain_finalise(domain);
1747 smmu_domain->smmu = NULL;
1750 } else if (smmu_domain->smmu != smmu) {
1752 "cannot attach to SMMU %s (upstream of %s)\n",
1753 dev_name(smmu_domain->smmu->dev),
1754 dev_name(smmu->dev));
1759 ste->assigned = true;
1761 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
1764 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1765 ste->s1_cfg = &smmu_domain->s1_cfg;
1767 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1770 ste->s2_cfg = &smmu_domain->s2_cfg;
1773 arm_smmu_install_ste_for_dev(fwspec);
1775 mutex_unlock(&smmu_domain->init_mutex);
1779 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1780 phys_addr_t paddr, size_t size, int prot)
1782 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1787 return ops->map(ops, iova, paddr, size, prot);
1791 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1793 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1798 return ops->unmap(ops, iova, size);
1801 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1803 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1805 if (smmu_domain->smmu)
1806 arm_smmu_tlb_inv_context(smmu_domain);
1809 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1811 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1814 arm_smmu_cmdq_issue_sync(smmu);
1818 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1820 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1822 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1828 return ops->iova_to_phys(ops, iova);
1831 static struct platform_driver arm_smmu_driver;
1833 static int arm_smmu_match_node(struct device *dev, void *data)
1835 return dev->fwnode == data;
1839 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1841 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1842 fwnode, arm_smmu_match_node);
1844 return dev ? dev_get_drvdata(dev) : NULL;
1847 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1849 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1851 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1852 limit *= 1UL << STRTAB_SPLIT;
1857 static struct iommu_ops arm_smmu_ops;
1859 static int arm_smmu_add_device(struct device *dev)
1862 struct arm_smmu_device *smmu;
1863 struct arm_smmu_master_data *master;
1864 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1865 struct iommu_group *group;
1867 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1870 * We _can_ actually withstand dodgy bus code re-calling add_device()
1871 * without an intervening remove_device()/of_xlate() sequence, but
1872 * we're not going to do so quietly...
1874 if (WARN_ON_ONCE(fwspec->iommu_priv)) {
1875 master = fwspec->iommu_priv;
1876 smmu = master->smmu;
1878 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1881 master = kzalloc(sizeof(*master), GFP_KERNEL);
1885 master->smmu = smmu;
1886 fwspec->iommu_priv = master;
1889 /* Check the SIDs are in range of the SMMU and our stream table */
1890 for (i = 0; i < fwspec->num_ids; i++) {
1891 u32 sid = fwspec->ids[i];
1893 if (!arm_smmu_sid_in_range(smmu, sid))
1896 /* Ensure l2 strtab is initialised */
1897 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1898 ret = arm_smmu_init_l2_strtab(smmu, sid);
1904 group = iommu_group_get_for_dev(dev);
1905 if (!IS_ERR(group)) {
1906 iommu_group_put(group);
1907 iommu_device_link(&smmu->iommu, dev);
1910 return PTR_ERR_OR_ZERO(group);
1913 static void arm_smmu_remove_device(struct device *dev)
1915 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1916 struct arm_smmu_master_data *master;
1917 struct arm_smmu_device *smmu;
1919 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1922 master = fwspec->iommu_priv;
1923 smmu = master->smmu;
1924 if (master && master->ste.assigned)
1925 arm_smmu_detach_dev(dev);
1926 iommu_group_remove_device(dev);
1927 iommu_device_unlink(&smmu->iommu, dev);
1929 iommu_fwspec_free(dev);
1932 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1934 struct iommu_group *group;
1937 * We don't support devices sharing stream IDs other than PCI RID
1938 * aliases, since the necessary ID-to-device lookup becomes rather
1939 * impractical given a potential sparse 32-bit stream ID space.
1941 if (dev_is_pci(dev))
1942 group = pci_device_group(dev);
1944 group = generic_device_group(dev);
1949 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1950 enum iommu_attr attr, void *data)
1952 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1954 switch (domain->type) {
1955 case IOMMU_DOMAIN_UNMANAGED:
1957 case DOMAIN_ATTR_NESTING:
1958 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1964 case IOMMU_DOMAIN_DMA:
1966 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1967 *(int *)data = smmu_domain->non_strict;
1978 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1979 enum iommu_attr attr, void *data)
1982 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1984 mutex_lock(&smmu_domain->init_mutex);
1986 switch (domain->type) {
1987 case IOMMU_DOMAIN_UNMANAGED:
1989 case DOMAIN_ATTR_NESTING:
1990 if (smmu_domain->smmu) {
1996 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1998 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2004 case IOMMU_DOMAIN_DMA:
2006 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2007 smmu_domain->non_strict = *(int *)data;
2018 mutex_unlock(&smmu_domain->init_mutex);
2022 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2024 return iommu_fwspec_add_ids(dev, args->args, 1);
2027 static void arm_smmu_get_resv_regions(struct device *dev,
2028 struct list_head *head)
2030 struct iommu_resv_region *region;
2031 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2033 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2034 prot, IOMMU_RESV_SW_MSI);
2038 list_add_tail(®ion->list, head);
2040 iommu_dma_get_resv_regions(dev, head);
2043 static void arm_smmu_put_resv_regions(struct device *dev,
2044 struct list_head *head)
2046 struct iommu_resv_region *entry, *next;
2048 list_for_each_entry_safe(entry, next, head, list)
2052 static struct iommu_ops arm_smmu_ops = {
2053 .capable = arm_smmu_capable,
2054 .domain_alloc = arm_smmu_domain_alloc,
2055 .domain_free = arm_smmu_domain_free,
2056 .attach_dev = arm_smmu_attach_dev,
2057 .map = arm_smmu_map,
2058 .unmap = arm_smmu_unmap,
2059 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2060 .iotlb_sync = arm_smmu_iotlb_sync,
2061 .iova_to_phys = arm_smmu_iova_to_phys,
2062 .add_device = arm_smmu_add_device,
2063 .remove_device = arm_smmu_remove_device,
2064 .device_group = arm_smmu_device_group,
2065 .domain_get_attr = arm_smmu_domain_get_attr,
2066 .domain_set_attr = arm_smmu_domain_set_attr,
2067 .of_xlate = arm_smmu_of_xlate,
2068 .get_resv_regions = arm_smmu_get_resv_regions,
2069 .put_resv_regions = arm_smmu_put_resv_regions,
2070 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2073 /* Probing and initialisation functions */
2074 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2075 struct arm_smmu_queue *q,
2076 unsigned long prod_off,
2077 unsigned long cons_off,
2080 size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
2082 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
2084 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
2089 q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
2090 q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
2091 q->ent_dwords = dwords;
2093 q->q_base = Q_BASE_RWA;
2094 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2095 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
2097 q->prod = q->cons = 0;
2101 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2106 spin_lock_init(&smmu->cmdq.lock);
2107 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2108 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
2113 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2114 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2119 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2122 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2123 ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2126 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2129 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2130 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2131 void *strtab = smmu->strtab_cfg.strtab;
2133 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2134 if (!cfg->l1_desc) {
2135 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2139 for (i = 0; i < cfg->num_l1_ents; ++i) {
2140 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2141 strtab += STRTAB_L1_DESC_DWORDS << 3;
2147 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2152 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2154 /* Calculate the L1 size, capped to the SIDSIZE. */
2155 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2156 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2157 cfg->num_l1_ents = 1 << size;
2159 size += STRTAB_SPLIT;
2160 if (size < smmu->sid_bits)
2162 "2-level strtab only covers %u/%u bits of SID\n",
2163 size, smmu->sid_bits);
2165 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2166 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2167 GFP_KERNEL | __GFP_ZERO);
2170 "failed to allocate l1 stream table (%u bytes)\n",
2174 cfg->strtab = strtab;
2176 /* Configure strtab_base_cfg for 2 levels */
2177 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2178 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2179 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2180 cfg->strtab_base_cfg = reg;
2182 return arm_smmu_init_l1_strtab(smmu);
2185 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2190 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2192 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2193 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2194 GFP_KERNEL | __GFP_ZERO);
2197 "failed to allocate linear stream table (%u bytes)\n",
2201 cfg->strtab = strtab;
2202 cfg->num_l1_ents = 1 << smmu->sid_bits;
2204 /* Configure strtab_base_cfg for a linear table covering all SIDs */
2205 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2206 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2207 cfg->strtab_base_cfg = reg;
2209 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2213 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2218 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2219 ret = arm_smmu_init_strtab_2lvl(smmu);
2221 ret = arm_smmu_init_strtab_linear(smmu);
2226 /* Set the strtab base address */
2227 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2228 reg |= STRTAB_BASE_RA;
2229 smmu->strtab_cfg.strtab_base = reg;
2231 /* Allocate the first VMID for stage-2 bypass STEs */
2232 set_bit(0, smmu->vmid_map);
2236 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2240 ret = arm_smmu_init_queues(smmu);
2244 return arm_smmu_init_strtab(smmu);
2247 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2248 unsigned int reg_off, unsigned int ack_off)
2252 writel_relaxed(val, smmu->base + reg_off);
2253 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2254 1, ARM_SMMU_POLL_TIMEOUT_US);
2257 /* GBPA is "special" */
2258 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2261 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2263 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2264 1, ARM_SMMU_POLL_TIMEOUT_US);
2270 writel_relaxed(reg | GBPA_UPDATE, gbpa);
2271 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2272 1, ARM_SMMU_POLL_TIMEOUT_US);
2275 dev_err(smmu->dev, "GBPA not responding to update\n");
2279 static void arm_smmu_free_msis(void *data)
2281 struct device *dev = data;
2282 platform_msi_domain_free_irqs(dev);
2285 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2287 phys_addr_t doorbell;
2288 struct device *dev = msi_desc_to_dev(desc);
2289 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2290 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2292 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2293 doorbell &= MSI_CFG0_ADDR_MASK;
2295 writeq_relaxed(doorbell, smmu->base + cfg[0]);
2296 writel_relaxed(msg->data, smmu->base + cfg[1]);
2297 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2300 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2302 struct msi_desc *desc;
2303 int ret, nvec = ARM_SMMU_MAX_MSIS;
2304 struct device *dev = smmu->dev;
2306 /* Clear the MSI address regs */
2307 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2308 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2310 if (smmu->features & ARM_SMMU_FEAT_PRI)
2311 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2315 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2318 if (!dev->msi_domain) {
2319 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2323 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2324 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2326 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2330 for_each_msi_entry(desc, dev) {
2331 switch (desc->platform.msi_index) {
2332 case EVTQ_MSI_INDEX:
2333 smmu->evtq.q.irq = desc->irq;
2335 case GERROR_MSI_INDEX:
2336 smmu->gerr_irq = desc->irq;
2338 case PRIQ_MSI_INDEX:
2339 smmu->priq.q.irq = desc->irq;
2341 default: /* Unknown */
2346 /* Add callback to free MSIs on teardown */
2347 devm_add_action(dev, arm_smmu_free_msis, dev);
2350 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2354 arm_smmu_setup_msis(smmu);
2356 /* Request interrupt lines */
2357 irq = smmu->evtq.q.irq;
2359 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2360 arm_smmu_evtq_thread,
2362 "arm-smmu-v3-evtq", smmu);
2364 dev_warn(smmu->dev, "failed to enable evtq irq\n");
2366 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2369 irq = smmu->gerr_irq;
2371 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2372 0, "arm-smmu-v3-gerror", smmu);
2374 dev_warn(smmu->dev, "failed to enable gerror irq\n");
2376 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2379 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2380 irq = smmu->priq.q.irq;
2382 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2383 arm_smmu_priq_thread,
2389 "failed to enable priq irq\n");
2391 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2396 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2399 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2401 /* Disable IRQs first */
2402 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2403 ARM_SMMU_IRQ_CTRLACK);
2405 dev_err(smmu->dev, "failed to disable irqs\n");
2409 irq = smmu->combined_irq;
2412 * Cavium ThunderX2 implementation doesn't support unique irq
2413 * lines. Use a single irq line for all the SMMUv3 interrupts.
2415 ret = devm_request_threaded_irq(smmu->dev, irq,
2416 arm_smmu_combined_irq_handler,
2417 arm_smmu_combined_irq_thread,
2419 "arm-smmu-v3-combined-irq", smmu);
2421 dev_warn(smmu->dev, "failed to enable combined irq\n");
2423 arm_smmu_setup_unique_irqs(smmu);
2425 if (smmu->features & ARM_SMMU_FEAT_PRI)
2426 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2428 /* Enable interrupt generation on the SMMU */
2429 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2430 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2432 dev_warn(smmu->dev, "failed to enable irqs\n");
2437 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2441 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2443 dev_err(smmu->dev, "failed to clear cr0\n");
2448 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2452 struct arm_smmu_cmdq_ent cmd;
2454 /* Clear CR0 and sync (disables SMMU and queue processing) */
2455 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2456 if (reg & CR0_SMMUEN) {
2457 if (is_kdump_kernel()) {
2458 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
2459 arm_smmu_device_disable(smmu);
2463 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2466 ret = arm_smmu_device_disable(smmu);
2470 /* CR1 (table and queue memory attributes) */
2471 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
2472 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
2473 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
2474 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
2475 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
2476 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
2477 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2479 /* CR2 (random crap) */
2480 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2481 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2484 writeq_relaxed(smmu->strtab_cfg.strtab_base,
2485 smmu->base + ARM_SMMU_STRTAB_BASE);
2486 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2487 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2490 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2491 writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2492 writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2494 enables = CR0_CMDQEN;
2495 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2498 dev_err(smmu->dev, "failed to enable command queue\n");
2502 /* Invalidate any cached configuration */
2503 cmd.opcode = CMDQ_OP_CFGI_ALL;
2504 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2505 arm_smmu_cmdq_issue_sync(smmu);
2507 /* Invalidate any stale TLB entries */
2508 if (smmu->features & ARM_SMMU_FEAT_HYP) {
2509 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2510 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2513 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2514 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2515 arm_smmu_cmdq_issue_sync(smmu);
2518 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2519 writel_relaxed(smmu->evtq.q.prod,
2520 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
2521 writel_relaxed(smmu->evtq.q.cons,
2522 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
2524 enables |= CR0_EVTQEN;
2525 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2528 dev_err(smmu->dev, "failed to enable event queue\n");
2533 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2534 writeq_relaxed(smmu->priq.q.q_base,
2535 smmu->base + ARM_SMMU_PRIQ_BASE);
2536 writel_relaxed(smmu->priq.q.prod,
2537 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
2538 writel_relaxed(smmu->priq.q.cons,
2539 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
2541 enables |= CR0_PRIQEN;
2542 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2545 dev_err(smmu->dev, "failed to enable PRI queue\n");
2550 ret = arm_smmu_setup_irqs(smmu);
2552 dev_err(smmu->dev, "failed to setup irqs\n");
2557 /* Enable the SMMU interface, or ensure bypass */
2558 if (!bypass || disable_bypass) {
2559 enables |= CR0_SMMUEN;
2561 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
2565 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2568 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2575 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
2578 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
2581 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2583 /* 2-level structures */
2584 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
2585 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2587 if (reg & IDR0_CD2L)
2588 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2591 * Translation table endianness.
2592 * We currently require the same endianness as the CPU, but this
2593 * could be changed later by adding a new IO_PGTABLE_QUIRK.
2595 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
2596 case IDR0_TTENDIAN_MIXED:
2597 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2600 case IDR0_TTENDIAN_BE:
2601 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2604 case IDR0_TTENDIAN_LE:
2605 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2609 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2613 /* Boolean feature flags */
2614 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2615 smmu->features |= ARM_SMMU_FEAT_PRI;
2617 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2618 smmu->features |= ARM_SMMU_FEAT_ATS;
2621 smmu->features |= ARM_SMMU_FEAT_SEV;
2624 smmu->features |= ARM_SMMU_FEAT_MSI;
2627 smmu->features |= ARM_SMMU_FEAT_HYP;
2630 * The coherency feature as set by FW is used in preference to the ID
2631 * register, but warn on mismatch.
2633 if (!!(reg & IDR0_COHACC) != coherent)
2634 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
2635 coherent ? "true" : "false");
2637 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
2638 case IDR0_STALL_MODEL_FORCE:
2639 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
2641 case IDR0_STALL_MODEL_STALL:
2642 smmu->features |= ARM_SMMU_FEAT_STALLS;
2646 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2649 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2651 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2652 dev_err(smmu->dev, "no translation support!\n");
2656 /* We only support the AArch64 table format at present */
2657 switch (FIELD_GET(IDR0_TTF, reg)) {
2658 case IDR0_TTF_AARCH32_64:
2661 case IDR0_TTF_AARCH64:
2664 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2668 /* ASID/VMID sizes */
2669 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2670 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2673 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2674 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2675 dev_err(smmu->dev, "embedded implementation not supported\n");
2679 /* Queue sizes, capped at 4k */
2680 smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
2681 FIELD_GET(IDR1_CMDQS, reg));
2682 if (!smmu->cmdq.q.max_n_shift) {
2683 /* Odd alignment restrictions on the base, so ignore for now */
2684 dev_err(smmu->dev, "unit-length command queue not supported\n");
2688 smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
2689 FIELD_GET(IDR1_EVTQS, reg));
2690 smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
2691 FIELD_GET(IDR1_PRIQS, reg));
2693 /* SID/SSID sizes */
2694 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
2695 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
2698 * If the SMMU supports fewer bits than would fill a single L2 stream
2699 * table, use a linear table instead.
2701 if (smmu->sid_bits <= STRTAB_SPLIT)
2702 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
2705 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2707 /* Maximum number of outstanding stalls */
2708 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
2711 if (reg & IDR5_GRAN64K)
2712 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2713 if (reg & IDR5_GRAN16K)
2714 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2715 if (reg & IDR5_GRAN4K)
2716 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2718 /* Input address size */
2719 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
2720 smmu->features |= ARM_SMMU_FEAT_VAX;
2722 /* Output address size */
2723 switch (FIELD_GET(IDR5_OAS, reg)) {
2724 case IDR5_OAS_32_BIT:
2727 case IDR5_OAS_36_BIT:
2730 case IDR5_OAS_40_BIT:
2733 case IDR5_OAS_42_BIT:
2736 case IDR5_OAS_44_BIT:
2739 case IDR5_OAS_52_BIT:
2741 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
2745 "unknown output address size. Truncating to 48-bit\n");
2747 case IDR5_OAS_48_BIT:
2751 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2752 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2754 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2756 /* Set the DMA mask for our table walker */
2757 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2759 "failed to set DMA mask for table walker\n");
2761 smmu->ias = max(smmu->ias, smmu->oas);
2763 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2764 smmu->ias, smmu->oas, smmu->features);
2769 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
2772 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
2773 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
2775 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
2776 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
2780 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
2783 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2784 struct arm_smmu_device *smmu)
2786 struct acpi_iort_smmu_v3 *iort_smmu;
2787 struct device *dev = smmu->dev;
2788 struct acpi_iort_node *node;
2790 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
2792 /* Retrieve SMMUv3 specific data */
2793 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
2795 acpi_smmu_get_options(iort_smmu->model, smmu);
2797 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
2798 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2803 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2804 struct arm_smmu_device *smmu)
2810 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2811 struct arm_smmu_device *smmu)
2813 struct device *dev = &pdev->dev;
2817 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
2818 dev_err(dev, "missing #iommu-cells property\n");
2819 else if (cells != 1)
2820 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
2824 parse_driver_options(smmu);
2826 if (of_dma_is_coherent(dev->of_node))
2827 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2832 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
2834 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
2840 static int arm_smmu_device_probe(struct platform_device *pdev)
2843 struct resource *res;
2844 resource_size_t ioaddr;
2845 struct arm_smmu_device *smmu;
2846 struct device *dev = &pdev->dev;
2849 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2851 dev_err(dev, "failed to allocate arm_smmu_device\n");
2857 ret = arm_smmu_device_dt_probe(pdev, smmu);
2859 ret = arm_smmu_device_acpi_probe(pdev, smmu);
2864 /* Set bypass mode according to firmware probing result */
2868 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2869 if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
2870 dev_err(dev, "MMIO region too small (%pr)\n", res);
2873 ioaddr = res->start;
2875 smmu->base = devm_ioremap_resource(dev, res);
2876 if (IS_ERR(smmu->base))
2877 return PTR_ERR(smmu->base);
2879 /* Interrupt lines */
2881 irq = platform_get_irq_byname(pdev, "combined");
2883 smmu->combined_irq = irq;
2885 irq = platform_get_irq_byname(pdev, "eventq");
2887 smmu->evtq.q.irq = irq;
2889 irq = platform_get_irq_byname(pdev, "priq");
2891 smmu->priq.q.irq = irq;
2893 irq = platform_get_irq_byname(pdev, "gerror");
2895 smmu->gerr_irq = irq;
2898 ret = arm_smmu_device_hw_probe(smmu);
2902 /* Initialise in-memory data structures */
2903 ret = arm_smmu_init_structures(smmu);
2907 /* Record our private device structure */
2908 platform_set_drvdata(pdev, smmu);
2910 /* Reset the device */
2911 ret = arm_smmu_device_reset(smmu, bypass);
2915 /* And we're up. Go go go! */
2916 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
2917 "smmu3.%pa", &ioaddr);
2921 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2922 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2924 ret = iommu_device_register(&smmu->iommu);
2926 dev_err(dev, "Failed to register iommu\n");
2931 if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
2933 ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2938 #ifdef CONFIG_ARM_AMBA
2939 if (amba_bustype.iommu_ops != &arm_smmu_ops) {
2940 ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2945 if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
2946 ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2953 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2955 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2957 arm_smmu_device_disable(smmu);
2960 static const struct of_device_id arm_smmu_of_match[] = {
2961 { .compatible = "arm,smmu-v3", },
2965 static struct platform_driver arm_smmu_driver = {
2967 .name = "arm-smmu-v3",
2968 .of_match_table = of_match_ptr(arm_smmu_of_match),
2969 .suppress_bind_attrs = true,
2971 .probe = arm_smmu_device_probe,
2972 .shutdown = arm_smmu_device_shutdown,
2974 builtin_platform_driver(arm_smmu_driver);