1 // SPDX-License-Identifier: GPL-2.0
3 * IOMMU API for ARM architected SMMUv3 implementations.
5 * Copyright (C) 2015 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver is powered by bad coffee and bombay mix.
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitfield.h>
15 #include <linux/bitops.h>
16 #include <linux/crash_dump.h>
17 #include <linux/delay.h>
18 #include <linux/dma-iommu.h>
19 #include <linux/err.h>
20 #include <linux/interrupt.h>
21 #include <linux/iommu.h>
22 #include <linux/iopoll.h>
23 #include <linux/init.h>
24 #include <linux/moduleparam.h>
25 #include <linux/msi.h>
27 #include <linux/of_address.h>
28 #include <linux/of_iommu.h>
29 #include <linux/of_platform.h>
30 #include <linux/pci.h>
31 #include <linux/platform_device.h>
33 #include <linux/amba/bus.h>
35 #include "io-pgtable.h"
38 #define ARM_SMMU_IDR0 0x0
39 #define IDR0_ST_LVL GENMASK(28, 27)
40 #define IDR0_ST_LVL_2LVL 1
41 #define IDR0_STALL_MODEL GENMASK(25, 24)
42 #define IDR0_STALL_MODEL_STALL 0
43 #define IDR0_STALL_MODEL_FORCE 2
44 #define IDR0_TTENDIAN GENMASK(22, 21)
45 #define IDR0_TTENDIAN_MIXED 0
46 #define IDR0_TTENDIAN_LE 2
47 #define IDR0_TTENDIAN_BE 3
48 #define IDR0_CD2L (1 << 19)
49 #define IDR0_VMID16 (1 << 18)
50 #define IDR0_PRI (1 << 16)
51 #define IDR0_SEV (1 << 14)
52 #define IDR0_MSI (1 << 13)
53 #define IDR0_ASID16 (1 << 12)
54 #define IDR0_ATS (1 << 10)
55 #define IDR0_HYP (1 << 9)
56 #define IDR0_COHACC (1 << 4)
57 #define IDR0_TTF GENMASK(3, 2)
58 #define IDR0_TTF_AARCH64 2
59 #define IDR0_TTF_AARCH32_64 3
60 #define IDR0_S1P (1 << 1)
61 #define IDR0_S2P (1 << 0)
63 #define ARM_SMMU_IDR1 0x4
64 #define IDR1_TABLES_PRESET (1 << 30)
65 #define IDR1_QUEUES_PRESET (1 << 29)
66 #define IDR1_REL (1 << 28)
67 #define IDR1_CMDQS GENMASK(25, 21)
68 #define IDR1_EVTQS GENMASK(20, 16)
69 #define IDR1_PRIQS GENMASK(15, 11)
70 #define IDR1_SSIDSIZE GENMASK(10, 6)
71 #define IDR1_SIDSIZE GENMASK(5, 0)
73 #define ARM_SMMU_IDR5 0x14
74 #define IDR5_STALL_MAX GENMASK(31, 16)
75 #define IDR5_GRAN64K (1 << 6)
76 #define IDR5_GRAN16K (1 << 5)
77 #define IDR5_GRAN4K (1 << 4)
78 #define IDR5_OAS GENMASK(2, 0)
79 #define IDR5_OAS_32_BIT 0
80 #define IDR5_OAS_36_BIT 1
81 #define IDR5_OAS_40_BIT 2
82 #define IDR5_OAS_42_BIT 3
83 #define IDR5_OAS_44_BIT 4
84 #define IDR5_OAS_48_BIT 5
85 #define IDR5_OAS_52_BIT 6
86 #define IDR5_VAX GENMASK(11, 10)
87 #define IDR5_VAX_52_BIT 1
89 #define ARM_SMMU_CR0 0x20
90 #define CR0_CMDQEN (1 << 3)
91 #define CR0_EVTQEN (1 << 2)
92 #define CR0_PRIQEN (1 << 1)
93 #define CR0_SMMUEN (1 << 0)
95 #define ARM_SMMU_CR0ACK 0x24
97 #define ARM_SMMU_CR1 0x28
98 #define CR1_TABLE_SH GENMASK(11, 10)
99 #define CR1_TABLE_OC GENMASK(9, 8)
100 #define CR1_TABLE_IC GENMASK(7, 6)
101 #define CR1_QUEUE_SH GENMASK(5, 4)
102 #define CR1_QUEUE_OC GENMASK(3, 2)
103 #define CR1_QUEUE_IC GENMASK(1, 0)
104 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
105 #define CR1_CACHE_NC 0
106 #define CR1_CACHE_WB 1
107 #define CR1_CACHE_WT 2
109 #define ARM_SMMU_CR2 0x2c
110 #define CR2_PTM (1 << 2)
111 #define CR2_RECINVSID (1 << 1)
112 #define CR2_E2H (1 << 0)
114 #define ARM_SMMU_GBPA 0x44
115 #define GBPA_UPDATE (1 << 31)
116 #define GBPA_ABORT (1 << 20)
118 #define ARM_SMMU_IRQ_CTRL 0x50
119 #define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
120 #define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
121 #define IRQ_CTRL_GERROR_IRQEN (1 << 0)
123 #define ARM_SMMU_IRQ_CTRLACK 0x54
125 #define ARM_SMMU_GERROR 0x60
126 #define GERROR_SFM_ERR (1 << 8)
127 #define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
128 #define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
129 #define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
130 #define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
131 #define GERROR_PRIQ_ABT_ERR (1 << 3)
132 #define GERROR_EVTQ_ABT_ERR (1 << 2)
133 #define GERROR_CMDQ_ERR (1 << 0)
134 #define GERROR_ERR_MASK 0xfd
136 #define ARM_SMMU_GERRORN 0x64
138 #define ARM_SMMU_GERROR_IRQ_CFG0 0x68
139 #define ARM_SMMU_GERROR_IRQ_CFG1 0x70
140 #define ARM_SMMU_GERROR_IRQ_CFG2 0x74
142 #define ARM_SMMU_STRTAB_BASE 0x80
143 #define STRTAB_BASE_RA (1UL << 62)
144 #define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6)
146 #define ARM_SMMU_STRTAB_BASE_CFG 0x88
147 #define STRTAB_BASE_CFG_FMT GENMASK(17, 16)
148 #define STRTAB_BASE_CFG_FMT_LINEAR 0
149 #define STRTAB_BASE_CFG_FMT_2LVL 1
150 #define STRTAB_BASE_CFG_SPLIT GENMASK(10, 6)
151 #define STRTAB_BASE_CFG_LOG2SIZE GENMASK(5, 0)
153 #define ARM_SMMU_CMDQ_BASE 0x90
154 #define ARM_SMMU_CMDQ_PROD 0x98
155 #define ARM_SMMU_CMDQ_CONS 0x9c
157 #define ARM_SMMU_EVTQ_BASE 0xa0
158 #define ARM_SMMU_EVTQ_PROD 0x100a8
159 #define ARM_SMMU_EVTQ_CONS 0x100ac
160 #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
161 #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
162 #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
164 #define ARM_SMMU_PRIQ_BASE 0xc0
165 #define ARM_SMMU_PRIQ_PROD 0x100c8
166 #define ARM_SMMU_PRIQ_CONS 0x100cc
167 #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
168 #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
169 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
171 /* Common MSI config fields */
172 #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
173 #define MSI_CFG2_SH GENMASK(5, 4)
174 #define MSI_CFG2_MEMATTR GENMASK(3, 0)
176 /* Common memory attribute values */
177 #define ARM_SMMU_SH_NSH 0
178 #define ARM_SMMU_SH_OSH 2
179 #define ARM_SMMU_SH_ISH 3
180 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
181 #define ARM_SMMU_MEMATTR_OIWB 0xf
183 #define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1))
184 #define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift))
185 #define Q_OVERFLOW_FLAG (1 << 31)
186 #define Q_OVF(q, p) ((p) & Q_OVERFLOW_FLAG)
187 #define Q_ENT(q, p) ((q)->base + \
188 Q_IDX(q, p) * (q)->ent_dwords)
190 #define Q_BASE_RWA (1UL << 62)
191 #define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
192 #define Q_BASE_LOG2SIZE GENMASK(4, 0)
197 * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
198 * 2lvl: 128k L1 entries,
199 * 256 lazy entries per table (each table covers a PCI bus)
201 #define STRTAB_L1_SZ_SHIFT 20
202 #define STRTAB_SPLIT 8
204 #define STRTAB_L1_DESC_DWORDS 1
205 #define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
206 #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
208 #define STRTAB_STE_DWORDS 8
209 #define STRTAB_STE_0_V (1UL << 0)
210 #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
211 #define STRTAB_STE_0_CFG_ABORT 0
212 #define STRTAB_STE_0_CFG_BYPASS 4
213 #define STRTAB_STE_0_CFG_S1_TRANS 5
214 #define STRTAB_STE_0_CFG_S2_TRANS 6
216 #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
217 #define STRTAB_STE_0_S1FMT_LINEAR 0
218 #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
219 #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
221 #define STRTAB_STE_1_S1C_CACHE_NC 0UL
222 #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
223 #define STRTAB_STE_1_S1C_CACHE_WT 2UL
224 #define STRTAB_STE_1_S1C_CACHE_WB 3UL
225 #define STRTAB_STE_1_S1CIR GENMASK_ULL(3, 2)
226 #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
227 #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
229 #define STRTAB_STE_1_S1STALLD (1UL << 27)
231 #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
232 #define STRTAB_STE_1_EATS_ABT 0UL
233 #define STRTAB_STE_1_EATS_TRANS 1UL
234 #define STRTAB_STE_1_EATS_S1CHK 2UL
236 #define STRTAB_STE_1_STRW GENMASK_ULL(31, 30)
237 #define STRTAB_STE_1_STRW_NSEL1 0UL
238 #define STRTAB_STE_1_STRW_EL2 2UL
240 #define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
241 #define STRTAB_STE_1_SHCFG_INCOMING 1UL
243 #define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)
244 #define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32)
245 #define STRTAB_STE_2_S2AA64 (1UL << 51)
246 #define STRTAB_STE_2_S2ENDI (1UL << 52)
247 #define STRTAB_STE_2_S2PTW (1UL << 54)
248 #define STRTAB_STE_2_S2R (1UL << 58)
250 #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
252 /* Context descriptor (stage-1 only) */
253 #define CTXDESC_CD_DWORDS 8
254 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
255 #define ARM64_TCR_T0SZ GENMASK_ULL(5, 0)
256 #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
257 #define ARM64_TCR_TG0 GENMASK_ULL(15, 14)
258 #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
259 #define ARM64_TCR_IRGN0 GENMASK_ULL(9, 8)
260 #define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10)
261 #define ARM64_TCR_ORGN0 GENMASK_ULL(11, 10)
262 #define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12)
263 #define ARM64_TCR_SH0 GENMASK_ULL(13, 12)
264 #define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14)
265 #define ARM64_TCR_EPD0 (1ULL << 7)
266 #define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30)
267 #define ARM64_TCR_EPD1 (1ULL << 23)
269 #define CTXDESC_CD_0_ENDI (1UL << 15)
270 #define CTXDESC_CD_0_V (1UL << 31)
272 #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32)
273 #define ARM64_TCR_IPS GENMASK_ULL(34, 32)
274 #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
275 #define ARM64_TCR_TBI0 (1ULL << 37)
277 #define CTXDESC_CD_0_AA64 (1UL << 41)
278 #define CTXDESC_CD_0_S (1UL << 44)
279 #define CTXDESC_CD_0_R (1UL << 45)
280 #define CTXDESC_CD_0_A (1UL << 46)
281 #define CTXDESC_CD_0_ASET (1UL << 47)
282 #define CTXDESC_CD_0_ASID GENMASK_ULL(63, 48)
284 #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
286 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
287 #define ARM_SMMU_TCR2CD(tcr, fld) FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
288 FIELD_GET(ARM64_TCR_##fld, tcr))
291 #define CMDQ_ENT_DWORDS 2
292 #define CMDQ_MAX_SZ_SHIFT 8
294 #define CMDQ_CONS_ERR GENMASK(30, 24)
295 #define CMDQ_ERR_CERROR_NONE_IDX 0
296 #define CMDQ_ERR_CERROR_ILL_IDX 1
297 #define CMDQ_ERR_CERROR_ABT_IDX 2
299 #define CMDQ_0_OP GENMASK_ULL(7, 0)
300 #define CMDQ_0_SSV (1UL << 11)
302 #define CMDQ_PREFETCH_0_SID GENMASK_ULL(63, 32)
303 #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
304 #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12)
306 #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32)
307 #define CMDQ_CFGI_1_LEAF (1UL << 0)
308 #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0)
310 #define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32)
311 #define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48)
312 #define CMDQ_TLBI_1_LEAF (1UL << 0)
313 #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
314 #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
316 #define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
317 #define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
318 #define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
319 #define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
321 #define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
322 #define CMDQ_SYNC_0_CS_NONE 0
323 #define CMDQ_SYNC_0_CS_IRQ 1
324 #define CMDQ_SYNC_0_CS_SEV 2
325 #define CMDQ_SYNC_0_MSH GENMASK_ULL(23, 22)
326 #define CMDQ_SYNC_0_MSIATTR GENMASK_ULL(27, 24)
327 #define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
328 #define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
331 #define EVTQ_ENT_DWORDS 4
332 #define EVTQ_MAX_SZ_SHIFT 7
334 #define EVTQ_0_ID GENMASK_ULL(7, 0)
337 #define PRIQ_ENT_DWORDS 2
338 #define PRIQ_MAX_SZ_SHIFT 8
340 #define PRIQ_0_SID GENMASK_ULL(31, 0)
341 #define PRIQ_0_SSID GENMASK_ULL(51, 32)
342 #define PRIQ_0_PERM_PRIV (1UL << 58)
343 #define PRIQ_0_PERM_EXEC (1UL << 59)
344 #define PRIQ_0_PERM_READ (1UL << 60)
345 #define PRIQ_0_PERM_WRITE (1UL << 61)
346 #define PRIQ_0_PRG_LAST (1UL << 62)
347 #define PRIQ_0_SSID_V (1UL << 63)
349 #define PRIQ_1_PRG_IDX GENMASK_ULL(8, 0)
350 #define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
352 /* High-level queue structures */
353 #define ARM_SMMU_POLL_TIMEOUT_US 100
354 #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
355 #define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
357 #define MSI_IOVA_BASE 0x8000000
358 #define MSI_IOVA_LENGTH 0x100000
361 * not really modular, but the easiest way to keep compat with existing
362 * bootargs behaviour is to continue using module_param_named here.
364 static bool disable_bypass = 1;
365 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
366 MODULE_PARM_DESC(disable_bypass,
367 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
375 enum arm_smmu_msi_index {
382 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
384 ARM_SMMU_EVTQ_IRQ_CFG0,
385 ARM_SMMU_EVTQ_IRQ_CFG1,
386 ARM_SMMU_EVTQ_IRQ_CFG2,
388 [GERROR_MSI_INDEX] = {
389 ARM_SMMU_GERROR_IRQ_CFG0,
390 ARM_SMMU_GERROR_IRQ_CFG1,
391 ARM_SMMU_GERROR_IRQ_CFG2,
394 ARM_SMMU_PRIQ_IRQ_CFG0,
395 ARM_SMMU_PRIQ_IRQ_CFG1,
396 ARM_SMMU_PRIQ_IRQ_CFG2,
400 struct arm_smmu_cmdq_ent {
403 bool substream_valid;
405 /* Command-specific fields */
407 #define CMDQ_OP_PREFETCH_CFG 0x1
414 #define CMDQ_OP_CFGI_STE 0x3
415 #define CMDQ_OP_CFGI_ALL 0x4
424 #define CMDQ_OP_TLBI_NH_ASID 0x11
425 #define CMDQ_OP_TLBI_NH_VA 0x12
426 #define CMDQ_OP_TLBI_EL2_ALL 0x20
427 #define CMDQ_OP_TLBI_S12_VMALL 0x28
428 #define CMDQ_OP_TLBI_S2_IPA 0x2a
429 #define CMDQ_OP_TLBI_NSNH_ALL 0x30
437 #define CMDQ_OP_PRI_RESP 0x41
445 #define CMDQ_OP_CMD_SYNC 0x46
453 struct arm_smmu_queue {
454 int irq; /* Wired interrupt */
465 u32 __iomem *prod_reg;
466 u32 __iomem *cons_reg;
469 struct arm_smmu_cmdq {
470 struct arm_smmu_queue q;
474 struct arm_smmu_evtq {
475 struct arm_smmu_queue q;
479 struct arm_smmu_priq {
480 struct arm_smmu_queue q;
483 /* High-level stream table and context descriptor structures */
484 struct arm_smmu_strtab_l1_desc {
488 dma_addr_t l2ptr_dma;
491 struct arm_smmu_s1_cfg {
493 dma_addr_t cdptr_dma;
495 struct arm_smmu_ctx_desc {
503 struct arm_smmu_s2_cfg {
509 struct arm_smmu_strtab_ent {
511 * An STE is "assigned" if the master emitting the corresponding SID
512 * is attached to a domain. The behaviour of an unassigned STE is
513 * determined by the disable_bypass parameter, whereas an assigned
514 * STE behaves according to s1_cfg/s2_cfg, which themselves are
515 * configured according to the domain type.
518 struct arm_smmu_s1_cfg *s1_cfg;
519 struct arm_smmu_s2_cfg *s2_cfg;
522 struct arm_smmu_strtab_cfg {
524 dma_addr_t strtab_dma;
525 struct arm_smmu_strtab_l1_desc *l1_desc;
526 unsigned int num_l1_ents;
532 /* An SMMUv3 instance */
533 struct arm_smmu_device {
537 #define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
538 #define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
539 #define ARM_SMMU_FEAT_TT_LE (1 << 2)
540 #define ARM_SMMU_FEAT_TT_BE (1 << 3)
541 #define ARM_SMMU_FEAT_PRI (1 << 4)
542 #define ARM_SMMU_FEAT_ATS (1 << 5)
543 #define ARM_SMMU_FEAT_SEV (1 << 6)
544 #define ARM_SMMU_FEAT_MSI (1 << 7)
545 #define ARM_SMMU_FEAT_COHERENCY (1 << 8)
546 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
547 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
548 #define ARM_SMMU_FEAT_STALLS (1 << 11)
549 #define ARM_SMMU_FEAT_HYP (1 << 12)
550 #define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
551 #define ARM_SMMU_FEAT_VAX (1 << 14)
554 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
555 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
558 struct arm_smmu_cmdq cmdq;
559 struct arm_smmu_evtq evtq;
560 struct arm_smmu_priq priq;
567 unsigned long ias; /* IPA */
568 unsigned long oas; /* PA */
569 unsigned long pgsize_bitmap;
571 #define ARM_SMMU_MAX_ASIDS (1 << 16)
572 unsigned int asid_bits;
573 DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
575 #define ARM_SMMU_MAX_VMIDS (1 << 16)
576 unsigned int vmid_bits;
577 DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
579 unsigned int ssid_bits;
580 unsigned int sid_bits;
582 struct arm_smmu_strtab_cfg strtab_cfg;
584 /* Hi16xx adds an extra 32 bits of goodness to its MSI payload */
590 /* IOMMU core code handle */
591 struct iommu_device iommu;
594 /* SMMU private data for each master */
595 struct arm_smmu_master_data {
596 struct arm_smmu_device *smmu;
597 struct arm_smmu_strtab_ent ste;
600 /* SMMU private data for an IOMMU domain */
601 enum arm_smmu_domain_stage {
602 ARM_SMMU_DOMAIN_S1 = 0,
604 ARM_SMMU_DOMAIN_NESTED,
605 ARM_SMMU_DOMAIN_BYPASS,
608 struct arm_smmu_domain {
609 struct arm_smmu_device *smmu;
610 struct mutex init_mutex; /* Protects smmu pointer */
612 struct io_pgtable_ops *pgtbl_ops;
615 enum arm_smmu_domain_stage stage;
617 struct arm_smmu_s1_cfg s1_cfg;
618 struct arm_smmu_s2_cfg s2_cfg;
621 struct iommu_domain domain;
624 struct arm_smmu_option_prop {
629 static struct arm_smmu_option_prop arm_smmu_options[] = {
630 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
631 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
635 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
636 struct arm_smmu_device *smmu)
638 if ((offset > SZ_64K) &&
639 (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
642 return smmu->base + offset;
645 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
647 return container_of(dom, struct arm_smmu_domain, domain);
650 static void parse_driver_options(struct arm_smmu_device *smmu)
655 if (of_property_read_bool(smmu->dev->of_node,
656 arm_smmu_options[i].prop)) {
657 smmu->options |= arm_smmu_options[i].opt;
658 dev_notice(smmu->dev, "option %s\n",
659 arm_smmu_options[i].prop);
661 } while (arm_smmu_options[++i].opt);
664 /* Low-level queue manipulation functions */
665 static bool queue_full(struct arm_smmu_queue *q)
667 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
668 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
671 static bool queue_empty(struct arm_smmu_queue *q)
673 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
674 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
677 static void queue_sync_cons(struct arm_smmu_queue *q)
679 q->cons = readl_relaxed(q->cons_reg);
682 static void queue_inc_cons(struct arm_smmu_queue *q)
684 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
686 q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
689 * Ensure that all CPU accesses (reads and writes) to the queue
690 * are complete before we update the cons pointer.
693 writel_relaxed(q->cons, q->cons_reg);
696 static int queue_sync_prod(struct arm_smmu_queue *q)
699 u32 prod = readl_relaxed(q->prod_reg);
701 if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
708 static void queue_inc_prod(struct arm_smmu_queue *q)
710 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
712 q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
713 writel(q->prod, q->prod_reg);
717 * Wait for the SMMU to consume items. If sync is true, wait until the queue
718 * is empty. Otherwise, wait until there is at least one free slot.
720 static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
723 unsigned int delay = 1, spin_cnt = 0;
725 /* Wait longer if it's a CMD_SYNC */
726 timeout = ktime_add_us(ktime_get(), sync ?
727 ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
728 ARM_SMMU_POLL_TIMEOUT_US);
730 while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
731 if (ktime_compare(ktime_get(), timeout) > 0)
736 } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
749 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
753 for (i = 0; i < n_dwords; ++i)
754 *dst++ = cpu_to_le64(*src++);
757 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
762 queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
767 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
771 for (i = 0; i < n_dwords; ++i)
772 *dst++ = le64_to_cpu(*src++);
775 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
780 queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
785 /* High-level queue accessors */
786 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
788 memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
789 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
791 switch (ent->opcode) {
792 case CMDQ_OP_TLBI_EL2_ALL:
793 case CMDQ_OP_TLBI_NSNH_ALL:
795 case CMDQ_OP_PREFETCH_CFG:
796 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
797 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
798 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
800 case CMDQ_OP_CFGI_STE:
801 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
802 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
804 case CMDQ_OP_CFGI_ALL:
805 /* Cover the entire SID range */
806 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
808 case CMDQ_OP_TLBI_NH_VA:
809 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
810 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
811 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
813 case CMDQ_OP_TLBI_S2_IPA:
814 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
815 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
816 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
818 case CMDQ_OP_TLBI_NH_ASID:
819 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
821 case CMDQ_OP_TLBI_S12_VMALL:
822 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
824 case CMDQ_OP_PRI_RESP:
825 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
826 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
827 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
828 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
829 switch (ent->pri.resp) {
837 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
839 case CMDQ_OP_CMD_SYNC:
840 if (ent->sync.msiaddr)
841 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
843 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
844 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
845 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
847 * Commands are written little-endian, but we want the SMMU to
848 * receive MSIData, and thus write it back to memory, in CPU
849 * byte order, so big-endian needs an extra byteswap here.
851 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA,
852 cpu_to_le32(ent->sync.msidata));
853 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
862 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
864 static const char *cerror_str[] = {
865 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
866 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
867 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
871 u64 cmd[CMDQ_ENT_DWORDS];
872 struct arm_smmu_queue *q = &smmu->cmdq.q;
873 u32 cons = readl_relaxed(q->cons_reg);
874 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
875 struct arm_smmu_cmdq_ent cmd_sync = {
876 .opcode = CMDQ_OP_CMD_SYNC,
879 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
880 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
883 case CMDQ_ERR_CERROR_ABT_IDX:
884 dev_err(smmu->dev, "retrying command fetch\n");
885 case CMDQ_ERR_CERROR_NONE_IDX:
887 case CMDQ_ERR_CERROR_ILL_IDX:
894 * We may have concurrent producers, so we need to be careful
895 * not to touch any of the shadow cmdq state.
897 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
898 dev_err(smmu->dev, "skipping command in error state:\n");
899 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
900 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
902 /* Convert the erroneous command into a CMD_SYNC */
903 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
904 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
908 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
911 static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
913 struct arm_smmu_queue *q = &smmu->cmdq.q;
914 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
916 smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
918 while (queue_insert_raw(q, cmd) == -ENOSPC) {
919 if (queue_poll_cons(q, false, wfe))
920 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
924 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
925 struct arm_smmu_cmdq_ent *ent)
927 u64 cmd[CMDQ_ENT_DWORDS];
930 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
931 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
936 spin_lock_irqsave(&smmu->cmdq.lock, flags);
937 arm_smmu_cmdq_insert_cmd(smmu, cmd);
938 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
942 * The difference between val and sync_idx is bounded by the maximum size of
943 * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
945 static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
950 timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
951 val = smp_cond_load_acquire(&smmu->sync_count,
952 (int)(VAL - sync_idx) >= 0 ||
953 !ktime_before(ktime_get(), timeout));
955 return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
958 static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
960 u64 cmd[CMDQ_ENT_DWORDS];
962 struct arm_smmu_cmdq_ent ent = {
963 .opcode = CMDQ_OP_CMD_SYNC,
965 .msiaddr = virt_to_phys(&smmu->sync_count),
969 spin_lock_irqsave(&smmu->cmdq.lock, flags);
971 /* Piggy-back on the previous command if it's a SYNC */
972 if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
973 ent.sync.msidata = smmu->sync_nr;
975 ent.sync.msidata = ++smmu->sync_nr;
976 arm_smmu_cmdq_build_cmd(cmd, &ent);
977 arm_smmu_cmdq_insert_cmd(smmu, cmd);
980 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
982 return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
985 static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
987 u64 cmd[CMDQ_ENT_DWORDS];
989 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
990 struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
993 arm_smmu_cmdq_build_cmd(cmd, &ent);
995 spin_lock_irqsave(&smmu->cmdq.lock, flags);
996 arm_smmu_cmdq_insert_cmd(smmu, cmd);
997 ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
998 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
1003 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1006 bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
1007 (smmu->features & ARM_SMMU_FEAT_COHERENCY);
1009 ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
1010 : __arm_smmu_cmdq_issue_sync(smmu);
1012 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
1015 /* Context descriptor manipulation functions */
1016 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
1020 /* Repack the TCR. Just care about TTBR0 for now */
1021 val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
1022 val |= ARM_SMMU_TCR2CD(tcr, TG0);
1023 val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
1024 val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
1025 val |= ARM_SMMU_TCR2CD(tcr, SH0);
1026 val |= ARM_SMMU_TCR2CD(tcr, EPD0);
1027 val |= ARM_SMMU_TCR2CD(tcr, EPD1);
1028 val |= ARM_SMMU_TCR2CD(tcr, IPS);
1029 val |= ARM_SMMU_TCR2CD(tcr, TBI0);
1034 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
1035 struct arm_smmu_s1_cfg *cfg)
1040 * We don't need to issue any invalidation here, as we'll invalidate
1041 * the STE when installing the new entry anyway.
1043 val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
1047 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1048 CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
1051 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1052 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1053 val |= CTXDESC_CD_0_S;
1055 cfg->cdptr[0] = cpu_to_le64(val);
1057 val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
1058 cfg->cdptr[1] = cpu_to_le64(val);
1060 cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
1063 /* Stream table manipulation functions */
1065 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1069 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1070 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1072 *dst = cpu_to_le64(val);
1075 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1077 struct arm_smmu_cmdq_ent cmd = {
1078 .opcode = CMDQ_OP_CFGI_STE,
1085 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1086 arm_smmu_cmdq_issue_sync(smmu);
1089 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1090 __le64 *dst, struct arm_smmu_strtab_ent *ste)
1093 * This is hideously complicated, but we only really care about
1094 * three cases at the moment:
1096 * 1. Invalid (all zero) -> bypass/fault (init)
1097 * 2. Bypass/fault -> translation/bypass (attach)
1098 * 3. Translation/bypass -> bypass/fault (detach)
1100 * Given that we can't update the STE atomically and the SMMU
1101 * doesn't read the thing in a defined order, that leaves us
1102 * with the following maintenance requirements:
1104 * 1. Update Config, return (init time STEs aren't live)
1105 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1106 * 3. Update Config, sync
1108 u64 val = le64_to_cpu(dst[0]);
1109 bool ste_live = false;
1110 struct arm_smmu_cmdq_ent prefetch_cmd = {
1111 .opcode = CMDQ_OP_PREFETCH_CFG,
1117 if (val & STRTAB_STE_0_V) {
1118 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1119 case STRTAB_STE_0_CFG_BYPASS:
1121 case STRTAB_STE_0_CFG_S1_TRANS:
1122 case STRTAB_STE_0_CFG_S2_TRANS:
1125 case STRTAB_STE_0_CFG_ABORT:
1129 BUG(); /* STE corruption */
1133 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1134 val = STRTAB_STE_0_V;
1137 if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
1138 if (!ste->assigned && disable_bypass)
1139 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1141 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1143 dst[0] = cpu_to_le64(val);
1144 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1145 STRTAB_STE_1_SHCFG_INCOMING));
1146 dst[2] = 0; /* Nuke the VMID */
1148 * The SMMU can perform negative caching, so we must sync
1149 * the STE regardless of whether the old value was live.
1152 arm_smmu_sync_ste_for_sid(smmu, sid);
1158 dst[1] = cpu_to_le64(
1159 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1160 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1161 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1162 #ifdef CONFIG_PCI_ATS
1163 FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
1165 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1167 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1168 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1169 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1171 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1172 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
1177 dst[2] = cpu_to_le64(
1178 FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
1179 FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
1181 STRTAB_STE_2_S2ENDI |
1183 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1186 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1188 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1191 arm_smmu_sync_ste_for_sid(smmu, sid);
1192 dst[0] = cpu_to_le64(val);
1193 arm_smmu_sync_ste_for_sid(smmu, sid);
1195 /* It's likely that we'll want to use the new STE soon */
1196 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1197 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1200 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1203 struct arm_smmu_strtab_ent ste = { .assigned = false };
1205 for (i = 0; i < nent; ++i) {
1206 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1207 strtab += STRTAB_STE_DWORDS;
1211 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1215 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1216 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1221 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1222 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1224 desc->span = STRTAB_SPLIT + 1;
1225 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1226 GFP_KERNEL | __GFP_ZERO);
1229 "failed to allocate l2 stream table for SID %u\n",
1234 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1235 arm_smmu_write_strtab_l1_desc(strtab, desc);
1239 /* IRQ and event handlers */
1240 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1243 struct arm_smmu_device *smmu = dev;
1244 struct arm_smmu_queue *q = &smmu->evtq.q;
1245 u64 evt[EVTQ_ENT_DWORDS];
1248 while (!queue_remove_raw(q, evt)) {
1249 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1251 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1252 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1253 dev_info(smmu->dev, "\t0x%016llx\n",
1254 (unsigned long long)evt[i]);
1259 * Not much we can do on overflow, so scream and pretend we're
1262 if (queue_sync_prod(q) == -EOVERFLOW)
1263 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1264 } while (!queue_empty(q));
1266 /* Sync our overflow flag, as we believe we're up to speed */
1267 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1271 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1277 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1278 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1279 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1280 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1281 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1283 dev_info(smmu->dev, "unexpected PRI request received:\n");
1285 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1286 sid, ssid, grpid, last ? "L" : "",
1287 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1288 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1289 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1290 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1291 evt[1] & PRIQ_1_ADDR_MASK);
1294 struct arm_smmu_cmdq_ent cmd = {
1295 .opcode = CMDQ_OP_PRI_RESP,
1296 .substream_valid = ssv,
1301 .resp = PRI_RESP_DENY,
1305 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1309 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1311 struct arm_smmu_device *smmu = dev;
1312 struct arm_smmu_queue *q = &smmu->priq.q;
1313 u64 evt[PRIQ_ENT_DWORDS];
1316 while (!queue_remove_raw(q, evt))
1317 arm_smmu_handle_ppr(smmu, evt);
1319 if (queue_sync_prod(q) == -EOVERFLOW)
1320 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1321 } while (!queue_empty(q));
1323 /* Sync our overflow flag, as we believe we're up to speed */
1324 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1325 writel(q->cons, q->cons_reg);
1329 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1331 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1333 u32 gerror, gerrorn, active;
1334 struct arm_smmu_device *smmu = dev;
1336 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1337 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1339 active = gerror ^ gerrorn;
1340 if (!(active & GERROR_ERR_MASK))
1341 return IRQ_NONE; /* No errors pending */
1344 "unexpected global error reported (0x%08x), this could be serious\n",
1347 if (active & GERROR_SFM_ERR) {
1348 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1349 arm_smmu_device_disable(smmu);
1352 if (active & GERROR_MSI_GERROR_ABT_ERR)
1353 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1355 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1356 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1358 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1359 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1361 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1362 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1364 if (active & GERROR_PRIQ_ABT_ERR)
1365 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1367 if (active & GERROR_EVTQ_ABT_ERR)
1368 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1370 if (active & GERROR_CMDQ_ERR)
1371 arm_smmu_cmdq_skip_err(smmu);
1373 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1377 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1379 struct arm_smmu_device *smmu = dev;
1381 arm_smmu_evtq_thread(irq, dev);
1382 if (smmu->features & ARM_SMMU_FEAT_PRI)
1383 arm_smmu_priq_thread(irq, dev);
1388 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1390 arm_smmu_gerror_handler(irq, dev);
1391 return IRQ_WAKE_THREAD;
1394 /* IO_PGTABLE API */
1395 static void arm_smmu_tlb_sync(void *cookie)
1397 struct arm_smmu_domain *smmu_domain = cookie;
1399 arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
1402 static void arm_smmu_tlb_inv_context(void *cookie)
1404 struct arm_smmu_domain *smmu_domain = cookie;
1405 struct arm_smmu_device *smmu = smmu_domain->smmu;
1406 struct arm_smmu_cmdq_ent cmd;
1408 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1409 cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
1410 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1413 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1414 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1418 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1419 * PTEs previously cleared by unmaps on the current CPU not yet visible
1420 * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
1421 * to guarantee those are observed before the TLBI. Do be careful, 007.
1423 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1424 arm_smmu_cmdq_issue_sync(smmu);
1427 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1428 size_t granule, bool leaf, void *cookie)
1430 struct arm_smmu_domain *smmu_domain = cookie;
1431 struct arm_smmu_device *smmu = smmu_domain->smmu;
1432 struct arm_smmu_cmdq_ent cmd = {
1439 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1440 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
1441 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1443 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1444 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1448 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1449 cmd.tlbi.addr += granule;
1450 } while (size -= granule);
1453 static const struct iommu_gather_ops arm_smmu_gather_ops = {
1454 .tlb_flush_all = arm_smmu_tlb_inv_context,
1455 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
1456 .tlb_sync = arm_smmu_tlb_sync,
1460 static bool arm_smmu_capable(enum iommu_cap cap)
1463 case IOMMU_CAP_CACHE_COHERENCY:
1465 case IOMMU_CAP_NOEXEC:
1472 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1474 struct arm_smmu_domain *smmu_domain;
1476 if (type != IOMMU_DOMAIN_UNMANAGED &&
1477 type != IOMMU_DOMAIN_DMA &&
1478 type != IOMMU_DOMAIN_IDENTITY)
1482 * Allocate the domain and initialise some of its data structures.
1483 * We can't really do anything meaningful until we've added a
1486 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1490 if (type == IOMMU_DOMAIN_DMA &&
1491 iommu_get_dma_cookie(&smmu_domain->domain)) {
1496 mutex_init(&smmu_domain->init_mutex);
1497 return &smmu_domain->domain;
1500 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1502 int idx, size = 1 << span;
1505 idx = find_first_zero_bit(map, size);
1508 } while (test_and_set_bit(idx, map));
1513 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1515 clear_bit(idx, map);
1518 static void arm_smmu_domain_free(struct iommu_domain *domain)
1520 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1521 struct arm_smmu_device *smmu = smmu_domain->smmu;
1523 iommu_put_dma_cookie(domain);
1524 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1526 /* Free the CD and ASID, if we allocated them */
1527 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1528 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1531 dmam_free_coherent(smmu_domain->smmu->dev,
1532 CTXDESC_CD_DWORDS << 3,
1536 arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1539 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1541 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1547 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1548 struct io_pgtable_cfg *pgtbl_cfg)
1552 struct arm_smmu_device *smmu = smmu_domain->smmu;
1553 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1555 asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1559 cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1561 GFP_KERNEL | __GFP_ZERO);
1563 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1568 cfg->cd.asid = (u16)asid;
1569 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1570 cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1571 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1575 arm_smmu_bitmap_free(smmu->asid_map, asid);
1579 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1580 struct io_pgtable_cfg *pgtbl_cfg)
1583 struct arm_smmu_device *smmu = smmu_domain->smmu;
1584 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1586 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1590 cfg->vmid = (u16)vmid;
1591 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1592 cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1596 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1599 unsigned long ias, oas;
1600 enum io_pgtable_fmt fmt;
1601 struct io_pgtable_cfg pgtbl_cfg;
1602 struct io_pgtable_ops *pgtbl_ops;
1603 int (*finalise_stage_fn)(struct arm_smmu_domain *,
1604 struct io_pgtable_cfg *);
1605 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1606 struct arm_smmu_device *smmu = smmu_domain->smmu;
1608 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1609 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1613 /* Restrict the stage to what we can actually support */
1614 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1615 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1616 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1617 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1619 switch (smmu_domain->stage) {
1620 case ARM_SMMU_DOMAIN_S1:
1621 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1622 ias = min_t(unsigned long, ias, VA_BITS);
1624 fmt = ARM_64_LPAE_S1;
1625 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1627 case ARM_SMMU_DOMAIN_NESTED:
1628 case ARM_SMMU_DOMAIN_S2:
1631 fmt = ARM_64_LPAE_S2;
1632 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1638 pgtbl_cfg = (struct io_pgtable_cfg) {
1639 .pgsize_bitmap = smmu->pgsize_bitmap,
1642 .tlb = &arm_smmu_gather_ops,
1643 .iommu_dev = smmu->dev,
1646 if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
1647 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
1649 if (smmu_domain->non_strict)
1650 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1652 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1656 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1657 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1658 domain->geometry.force_aperture = true;
1660 ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1662 free_io_pgtable_ops(pgtbl_ops);
1666 smmu_domain->pgtbl_ops = pgtbl_ops;
1670 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1673 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1675 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1676 struct arm_smmu_strtab_l1_desc *l1_desc;
1679 /* Two-level walk */
1680 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1681 l1_desc = &cfg->l1_desc[idx];
1682 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1683 step = &l1_desc->l2ptr[idx];
1685 /* Simple linear lookup */
1686 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1692 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1695 struct arm_smmu_master_data *master = fwspec->iommu_priv;
1696 struct arm_smmu_device *smmu = master->smmu;
1698 for (i = 0; i < fwspec->num_ids; ++i) {
1699 u32 sid = fwspec->ids[i];
1700 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1702 /* Bridged PCI devices may end up with duplicated IDs */
1703 for (j = 0; j < i; j++)
1704 if (fwspec->ids[j] == sid)
1709 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1713 static void arm_smmu_detach_dev(struct device *dev)
1715 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1716 struct arm_smmu_master_data *master = fwspec->iommu_priv;
1718 master->ste.assigned = false;
1719 arm_smmu_install_ste_for_dev(fwspec);
1722 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1725 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1726 struct arm_smmu_device *smmu;
1727 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1728 struct arm_smmu_master_data *master;
1729 struct arm_smmu_strtab_ent *ste;
1734 master = fwspec->iommu_priv;
1735 smmu = master->smmu;
1738 /* Already attached to a different domain? */
1740 arm_smmu_detach_dev(dev);
1742 mutex_lock(&smmu_domain->init_mutex);
1744 if (!smmu_domain->smmu) {
1745 smmu_domain->smmu = smmu;
1746 ret = arm_smmu_domain_finalise(domain);
1748 smmu_domain->smmu = NULL;
1751 } else if (smmu_domain->smmu != smmu) {
1753 "cannot attach to SMMU %s (upstream of %s)\n",
1754 dev_name(smmu_domain->smmu->dev),
1755 dev_name(smmu->dev));
1760 ste->assigned = true;
1762 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
1765 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1766 ste->s1_cfg = &smmu_domain->s1_cfg;
1768 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1771 ste->s2_cfg = &smmu_domain->s2_cfg;
1774 arm_smmu_install_ste_for_dev(fwspec);
1776 mutex_unlock(&smmu_domain->init_mutex);
1780 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1781 phys_addr_t paddr, size_t size, int prot)
1783 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1788 return ops->map(ops, iova, paddr, size, prot);
1792 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1794 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1799 return ops->unmap(ops, iova, size);
1802 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1804 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1806 if (smmu_domain->smmu)
1807 arm_smmu_tlb_inv_context(smmu_domain);
1810 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1812 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1815 arm_smmu_cmdq_issue_sync(smmu);
1819 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1821 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1823 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1829 return ops->iova_to_phys(ops, iova);
1832 static struct platform_driver arm_smmu_driver;
1834 static int arm_smmu_match_node(struct device *dev, void *data)
1836 return dev->fwnode == data;
1840 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1842 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1843 fwnode, arm_smmu_match_node);
1845 return dev ? dev_get_drvdata(dev) : NULL;
1848 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1850 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1852 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1853 limit *= 1UL << STRTAB_SPLIT;
1858 static struct iommu_ops arm_smmu_ops;
1860 static int arm_smmu_add_device(struct device *dev)
1863 struct arm_smmu_device *smmu;
1864 struct arm_smmu_master_data *master;
1865 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1866 struct iommu_group *group;
1868 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1871 * We _can_ actually withstand dodgy bus code re-calling add_device()
1872 * without an intervening remove_device()/of_xlate() sequence, but
1873 * we're not going to do so quietly...
1875 if (WARN_ON_ONCE(fwspec->iommu_priv)) {
1876 master = fwspec->iommu_priv;
1877 smmu = master->smmu;
1879 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1882 master = kzalloc(sizeof(*master), GFP_KERNEL);
1886 master->smmu = smmu;
1887 fwspec->iommu_priv = master;
1890 /* Check the SIDs are in range of the SMMU and our stream table */
1891 for (i = 0; i < fwspec->num_ids; i++) {
1892 u32 sid = fwspec->ids[i];
1894 if (!arm_smmu_sid_in_range(smmu, sid))
1897 /* Ensure l2 strtab is initialised */
1898 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1899 ret = arm_smmu_init_l2_strtab(smmu, sid);
1905 group = iommu_group_get_for_dev(dev);
1906 if (!IS_ERR(group)) {
1907 iommu_group_put(group);
1908 iommu_device_link(&smmu->iommu, dev);
1911 return PTR_ERR_OR_ZERO(group);
1914 static void arm_smmu_remove_device(struct device *dev)
1916 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1917 struct arm_smmu_master_data *master;
1918 struct arm_smmu_device *smmu;
1920 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1923 master = fwspec->iommu_priv;
1924 smmu = master->smmu;
1925 if (master && master->ste.assigned)
1926 arm_smmu_detach_dev(dev);
1927 iommu_group_remove_device(dev);
1928 iommu_device_unlink(&smmu->iommu, dev);
1930 iommu_fwspec_free(dev);
1933 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1935 struct iommu_group *group;
1938 * We don't support devices sharing stream IDs other than PCI RID
1939 * aliases, since the necessary ID-to-device lookup becomes rather
1940 * impractical given a potential sparse 32-bit stream ID space.
1942 if (dev_is_pci(dev))
1943 group = pci_device_group(dev);
1945 group = generic_device_group(dev);
1950 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1951 enum iommu_attr attr, void *data)
1953 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1955 switch (domain->type) {
1956 case IOMMU_DOMAIN_UNMANAGED:
1958 case DOMAIN_ATTR_NESTING:
1959 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1965 case IOMMU_DOMAIN_DMA:
1967 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1968 *(int *)data = smmu_domain->non_strict;
1979 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1980 enum iommu_attr attr, void *data)
1983 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1985 mutex_lock(&smmu_domain->init_mutex);
1987 switch (domain->type) {
1988 case IOMMU_DOMAIN_UNMANAGED:
1990 case DOMAIN_ATTR_NESTING:
1991 if (smmu_domain->smmu) {
1997 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1999 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2005 case IOMMU_DOMAIN_DMA:
2007 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2008 smmu_domain->non_strict = *(int *)data;
2019 mutex_unlock(&smmu_domain->init_mutex);
2023 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2025 return iommu_fwspec_add_ids(dev, args->args, 1);
2028 static void arm_smmu_get_resv_regions(struct device *dev,
2029 struct list_head *head)
2031 struct iommu_resv_region *region;
2032 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2034 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2035 prot, IOMMU_RESV_SW_MSI);
2039 list_add_tail(®ion->list, head);
2041 iommu_dma_get_resv_regions(dev, head);
2044 static void arm_smmu_put_resv_regions(struct device *dev,
2045 struct list_head *head)
2047 struct iommu_resv_region *entry, *next;
2049 list_for_each_entry_safe(entry, next, head, list)
2053 static struct iommu_ops arm_smmu_ops = {
2054 .capable = arm_smmu_capable,
2055 .domain_alloc = arm_smmu_domain_alloc,
2056 .domain_free = arm_smmu_domain_free,
2057 .attach_dev = arm_smmu_attach_dev,
2058 .map = arm_smmu_map,
2059 .unmap = arm_smmu_unmap,
2060 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2061 .iotlb_sync = arm_smmu_iotlb_sync,
2062 .iova_to_phys = arm_smmu_iova_to_phys,
2063 .add_device = arm_smmu_add_device,
2064 .remove_device = arm_smmu_remove_device,
2065 .device_group = arm_smmu_device_group,
2066 .domain_get_attr = arm_smmu_domain_get_attr,
2067 .domain_set_attr = arm_smmu_domain_set_attr,
2068 .of_xlate = arm_smmu_of_xlate,
2069 .get_resv_regions = arm_smmu_get_resv_regions,
2070 .put_resv_regions = arm_smmu_put_resv_regions,
2071 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2074 /* Probing and initialisation functions */
2075 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2076 struct arm_smmu_queue *q,
2077 unsigned long prod_off,
2078 unsigned long cons_off,
2081 size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
2083 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
2085 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
2090 q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
2091 q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
2092 q->ent_dwords = dwords;
2094 q->q_base = Q_BASE_RWA;
2095 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2096 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
2098 q->prod = q->cons = 0;
2102 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2107 spin_lock_init(&smmu->cmdq.lock);
2108 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2109 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
2114 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2115 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2120 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2123 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2124 ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2127 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2130 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2131 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2132 void *strtab = smmu->strtab_cfg.strtab;
2134 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2135 if (!cfg->l1_desc) {
2136 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2140 for (i = 0; i < cfg->num_l1_ents; ++i) {
2141 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2142 strtab += STRTAB_L1_DESC_DWORDS << 3;
2148 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2153 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2155 /* Calculate the L1 size, capped to the SIDSIZE. */
2156 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2157 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2158 cfg->num_l1_ents = 1 << size;
2160 size += STRTAB_SPLIT;
2161 if (size < smmu->sid_bits)
2163 "2-level strtab only covers %u/%u bits of SID\n",
2164 size, smmu->sid_bits);
2166 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2167 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2168 GFP_KERNEL | __GFP_ZERO);
2171 "failed to allocate l1 stream table (%u bytes)\n",
2175 cfg->strtab = strtab;
2177 /* Configure strtab_base_cfg for 2 levels */
2178 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2179 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2180 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2181 cfg->strtab_base_cfg = reg;
2183 return arm_smmu_init_l1_strtab(smmu);
2186 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2191 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2193 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2194 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2195 GFP_KERNEL | __GFP_ZERO);
2198 "failed to allocate linear stream table (%u bytes)\n",
2202 cfg->strtab = strtab;
2203 cfg->num_l1_ents = 1 << smmu->sid_bits;
2205 /* Configure strtab_base_cfg for a linear table covering all SIDs */
2206 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2207 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2208 cfg->strtab_base_cfg = reg;
2210 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2214 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2219 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2220 ret = arm_smmu_init_strtab_2lvl(smmu);
2222 ret = arm_smmu_init_strtab_linear(smmu);
2227 /* Set the strtab base address */
2228 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2229 reg |= STRTAB_BASE_RA;
2230 smmu->strtab_cfg.strtab_base = reg;
2232 /* Allocate the first VMID for stage-2 bypass STEs */
2233 set_bit(0, smmu->vmid_map);
2237 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2241 ret = arm_smmu_init_queues(smmu);
2245 return arm_smmu_init_strtab(smmu);
2248 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2249 unsigned int reg_off, unsigned int ack_off)
2253 writel_relaxed(val, smmu->base + reg_off);
2254 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2255 1, ARM_SMMU_POLL_TIMEOUT_US);
2258 /* GBPA is "special" */
2259 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2262 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2264 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2265 1, ARM_SMMU_POLL_TIMEOUT_US);
2271 writel_relaxed(reg | GBPA_UPDATE, gbpa);
2272 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2273 1, ARM_SMMU_POLL_TIMEOUT_US);
2276 dev_err(smmu->dev, "GBPA not responding to update\n");
2280 static void arm_smmu_free_msis(void *data)
2282 struct device *dev = data;
2283 platform_msi_domain_free_irqs(dev);
2286 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2288 phys_addr_t doorbell;
2289 struct device *dev = msi_desc_to_dev(desc);
2290 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2291 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2293 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2294 doorbell &= MSI_CFG0_ADDR_MASK;
2296 writeq_relaxed(doorbell, smmu->base + cfg[0]);
2297 writel_relaxed(msg->data, smmu->base + cfg[1]);
2298 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2301 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2303 struct msi_desc *desc;
2304 int ret, nvec = ARM_SMMU_MAX_MSIS;
2305 struct device *dev = smmu->dev;
2307 /* Clear the MSI address regs */
2308 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2309 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2311 if (smmu->features & ARM_SMMU_FEAT_PRI)
2312 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2316 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2319 if (!dev->msi_domain) {
2320 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2324 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2325 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2327 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2331 for_each_msi_entry(desc, dev) {
2332 switch (desc->platform.msi_index) {
2333 case EVTQ_MSI_INDEX:
2334 smmu->evtq.q.irq = desc->irq;
2336 case GERROR_MSI_INDEX:
2337 smmu->gerr_irq = desc->irq;
2339 case PRIQ_MSI_INDEX:
2340 smmu->priq.q.irq = desc->irq;
2342 default: /* Unknown */
2347 /* Add callback to free MSIs on teardown */
2348 devm_add_action(dev, arm_smmu_free_msis, dev);
2351 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2355 arm_smmu_setup_msis(smmu);
2357 /* Request interrupt lines */
2358 irq = smmu->evtq.q.irq;
2360 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2361 arm_smmu_evtq_thread,
2363 "arm-smmu-v3-evtq", smmu);
2365 dev_warn(smmu->dev, "failed to enable evtq irq\n");
2367 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2370 irq = smmu->gerr_irq;
2372 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2373 0, "arm-smmu-v3-gerror", smmu);
2375 dev_warn(smmu->dev, "failed to enable gerror irq\n");
2377 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2380 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2381 irq = smmu->priq.q.irq;
2383 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2384 arm_smmu_priq_thread,
2390 "failed to enable priq irq\n");
2392 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2397 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2400 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2402 /* Disable IRQs first */
2403 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2404 ARM_SMMU_IRQ_CTRLACK);
2406 dev_err(smmu->dev, "failed to disable irqs\n");
2410 irq = smmu->combined_irq;
2413 * Cavium ThunderX2 implementation doesn't support unique irq
2414 * lines. Use a single irq line for all the SMMUv3 interrupts.
2416 ret = devm_request_threaded_irq(smmu->dev, irq,
2417 arm_smmu_combined_irq_handler,
2418 arm_smmu_combined_irq_thread,
2420 "arm-smmu-v3-combined-irq", smmu);
2422 dev_warn(smmu->dev, "failed to enable combined irq\n");
2424 arm_smmu_setup_unique_irqs(smmu);
2426 if (smmu->features & ARM_SMMU_FEAT_PRI)
2427 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2429 /* Enable interrupt generation on the SMMU */
2430 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2431 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2433 dev_warn(smmu->dev, "failed to enable irqs\n");
2438 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2442 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2444 dev_err(smmu->dev, "failed to clear cr0\n");
2449 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2453 struct arm_smmu_cmdq_ent cmd;
2455 /* Clear CR0 and sync (disables SMMU and queue processing) */
2456 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2457 if (reg & CR0_SMMUEN) {
2458 if (is_kdump_kernel()) {
2459 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
2460 arm_smmu_device_disable(smmu);
2464 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2467 ret = arm_smmu_device_disable(smmu);
2471 /* CR1 (table and queue memory attributes) */
2472 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
2473 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
2474 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
2475 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
2476 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
2477 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
2478 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2480 /* CR2 (random crap) */
2481 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2482 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2485 writeq_relaxed(smmu->strtab_cfg.strtab_base,
2486 smmu->base + ARM_SMMU_STRTAB_BASE);
2487 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2488 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2491 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2492 writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2493 writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2495 enables = CR0_CMDQEN;
2496 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2499 dev_err(smmu->dev, "failed to enable command queue\n");
2503 /* Invalidate any cached configuration */
2504 cmd.opcode = CMDQ_OP_CFGI_ALL;
2505 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2506 arm_smmu_cmdq_issue_sync(smmu);
2508 /* Invalidate any stale TLB entries */
2509 if (smmu->features & ARM_SMMU_FEAT_HYP) {
2510 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2511 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2514 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2515 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2516 arm_smmu_cmdq_issue_sync(smmu);
2519 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2520 writel_relaxed(smmu->evtq.q.prod,
2521 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
2522 writel_relaxed(smmu->evtq.q.cons,
2523 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
2525 enables |= CR0_EVTQEN;
2526 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2529 dev_err(smmu->dev, "failed to enable event queue\n");
2534 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2535 writeq_relaxed(smmu->priq.q.q_base,
2536 smmu->base + ARM_SMMU_PRIQ_BASE);
2537 writel_relaxed(smmu->priq.q.prod,
2538 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
2539 writel_relaxed(smmu->priq.q.cons,
2540 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
2542 enables |= CR0_PRIQEN;
2543 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2546 dev_err(smmu->dev, "failed to enable PRI queue\n");
2551 ret = arm_smmu_setup_irqs(smmu);
2553 dev_err(smmu->dev, "failed to setup irqs\n");
2558 /* Enable the SMMU interface, or ensure bypass */
2559 if (!bypass || disable_bypass) {
2560 enables |= CR0_SMMUEN;
2562 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
2566 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2569 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2576 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
2579 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
2582 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2584 /* 2-level structures */
2585 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
2586 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2588 if (reg & IDR0_CD2L)
2589 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2592 * Translation table endianness.
2593 * We currently require the same endianness as the CPU, but this
2594 * could be changed later by adding a new IO_PGTABLE_QUIRK.
2596 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
2597 case IDR0_TTENDIAN_MIXED:
2598 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2601 case IDR0_TTENDIAN_BE:
2602 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2605 case IDR0_TTENDIAN_LE:
2606 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2610 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2614 /* Boolean feature flags */
2615 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2616 smmu->features |= ARM_SMMU_FEAT_PRI;
2618 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2619 smmu->features |= ARM_SMMU_FEAT_ATS;
2622 smmu->features |= ARM_SMMU_FEAT_SEV;
2625 smmu->features |= ARM_SMMU_FEAT_MSI;
2628 smmu->features |= ARM_SMMU_FEAT_HYP;
2631 * The coherency feature as set by FW is used in preference to the ID
2632 * register, but warn on mismatch.
2634 if (!!(reg & IDR0_COHACC) != coherent)
2635 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
2636 coherent ? "true" : "false");
2638 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
2639 case IDR0_STALL_MODEL_FORCE:
2640 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
2642 case IDR0_STALL_MODEL_STALL:
2643 smmu->features |= ARM_SMMU_FEAT_STALLS;
2647 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2650 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2652 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2653 dev_err(smmu->dev, "no translation support!\n");
2657 /* We only support the AArch64 table format at present */
2658 switch (FIELD_GET(IDR0_TTF, reg)) {
2659 case IDR0_TTF_AARCH32_64:
2662 case IDR0_TTF_AARCH64:
2665 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2669 /* ASID/VMID sizes */
2670 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2671 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2674 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2675 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2676 dev_err(smmu->dev, "embedded implementation not supported\n");
2680 /* Queue sizes, capped at 4k */
2681 smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
2682 FIELD_GET(IDR1_CMDQS, reg));
2683 if (!smmu->cmdq.q.max_n_shift) {
2684 /* Odd alignment restrictions on the base, so ignore for now */
2685 dev_err(smmu->dev, "unit-length command queue not supported\n");
2689 smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
2690 FIELD_GET(IDR1_EVTQS, reg));
2691 smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
2692 FIELD_GET(IDR1_PRIQS, reg));
2694 /* SID/SSID sizes */
2695 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
2696 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
2699 * If the SMMU supports fewer bits than would fill a single L2 stream
2700 * table, use a linear table instead.
2702 if (smmu->sid_bits <= STRTAB_SPLIT)
2703 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
2706 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2708 /* Maximum number of outstanding stalls */
2709 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
2712 if (reg & IDR5_GRAN64K)
2713 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2714 if (reg & IDR5_GRAN16K)
2715 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2716 if (reg & IDR5_GRAN4K)
2717 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2719 /* Input address size */
2720 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
2721 smmu->features |= ARM_SMMU_FEAT_VAX;
2723 /* Output address size */
2724 switch (FIELD_GET(IDR5_OAS, reg)) {
2725 case IDR5_OAS_32_BIT:
2728 case IDR5_OAS_36_BIT:
2731 case IDR5_OAS_40_BIT:
2734 case IDR5_OAS_42_BIT:
2737 case IDR5_OAS_44_BIT:
2740 case IDR5_OAS_52_BIT:
2742 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
2746 "unknown output address size. Truncating to 48-bit\n");
2748 case IDR5_OAS_48_BIT:
2752 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2753 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2755 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2757 /* Set the DMA mask for our table walker */
2758 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2760 "failed to set DMA mask for table walker\n");
2762 smmu->ias = max(smmu->ias, smmu->oas);
2764 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2765 smmu->ias, smmu->oas, smmu->features);
2770 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
2773 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
2774 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
2776 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
2777 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
2781 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
2784 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2785 struct arm_smmu_device *smmu)
2787 struct acpi_iort_smmu_v3 *iort_smmu;
2788 struct device *dev = smmu->dev;
2789 struct acpi_iort_node *node;
2791 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
2793 /* Retrieve SMMUv3 specific data */
2794 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
2796 acpi_smmu_get_options(iort_smmu->model, smmu);
2798 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
2799 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2804 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2805 struct arm_smmu_device *smmu)
2811 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2812 struct arm_smmu_device *smmu)
2814 struct device *dev = &pdev->dev;
2818 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
2819 dev_err(dev, "missing #iommu-cells property\n");
2820 else if (cells != 1)
2821 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
2825 parse_driver_options(smmu);
2827 if (of_dma_is_coherent(dev->of_node))
2828 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2833 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
2835 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
2841 static int arm_smmu_device_probe(struct platform_device *pdev)
2844 struct resource *res;
2845 resource_size_t ioaddr;
2846 struct arm_smmu_device *smmu;
2847 struct device *dev = &pdev->dev;
2850 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2852 dev_err(dev, "failed to allocate arm_smmu_device\n");
2858 ret = arm_smmu_device_dt_probe(pdev, smmu);
2860 ret = arm_smmu_device_acpi_probe(pdev, smmu);
2865 /* Set bypass mode according to firmware probing result */
2869 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2870 if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
2871 dev_err(dev, "MMIO region too small (%pr)\n", res);
2874 ioaddr = res->start;
2876 smmu->base = devm_ioremap_resource(dev, res);
2877 if (IS_ERR(smmu->base))
2878 return PTR_ERR(smmu->base);
2880 /* Interrupt lines */
2882 irq = platform_get_irq_byname(pdev, "combined");
2884 smmu->combined_irq = irq;
2886 irq = platform_get_irq_byname(pdev, "eventq");
2888 smmu->evtq.q.irq = irq;
2890 irq = platform_get_irq_byname(pdev, "priq");
2892 smmu->priq.q.irq = irq;
2894 irq = platform_get_irq_byname(pdev, "gerror");
2896 smmu->gerr_irq = irq;
2899 ret = arm_smmu_device_hw_probe(smmu);
2903 /* Initialise in-memory data structures */
2904 ret = arm_smmu_init_structures(smmu);
2908 /* Record our private device structure */
2909 platform_set_drvdata(pdev, smmu);
2911 /* Reset the device */
2912 ret = arm_smmu_device_reset(smmu, bypass);
2916 /* And we're up. Go go go! */
2917 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
2918 "smmu3.%pa", &ioaddr);
2922 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2923 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2925 ret = iommu_device_register(&smmu->iommu);
2927 dev_err(dev, "Failed to register iommu\n");
2932 if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
2934 ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2939 #ifdef CONFIG_ARM_AMBA
2940 if (amba_bustype.iommu_ops != &arm_smmu_ops) {
2941 ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2946 if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
2947 ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2954 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2956 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2958 arm_smmu_device_disable(smmu);
2961 static const struct of_device_id arm_smmu_of_match[] = {
2962 { .compatible = "arm,smmu-v3", },
2966 static struct platform_driver arm_smmu_driver = {
2968 .name = "arm-smmu-v3",
2969 .of_match_table = of_match_ptr(arm_smmu_of_match),
2970 .suppress_bind_attrs = true,
2972 .probe = arm_smmu_device_probe,
2973 .shutdown = arm_smmu_device_shutdown,
2975 builtin_platform_driver(arm_smmu_driver);