1 // SPDX-License-Identifier: GPL-2.0
3 * IOMMU API for ARM architected SMMUv3 implementations.
5 * Copyright (C) 2015 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver is powered by bad coffee and bombay mix.
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitfield.h>
15 #include <linux/bitops.h>
16 #include <linux/crash_dump.h>
17 #include <linux/delay.h>
18 #include <linux/dma-iommu.h>
19 #include <linux/err.h>
20 #include <linux/interrupt.h>
21 #include <linux/io-pgtable.h>
22 #include <linux/iommu.h>
23 #include <linux/iopoll.h>
24 #include <linux/module.h>
25 #include <linux/msi.h>
27 #include <linux/of_address.h>
28 #include <linux/of_iommu.h>
29 #include <linux/of_platform.h>
30 #include <linux/pci.h>
31 #include <linux/pci-ats.h>
32 #include <linux/platform_device.h>
34 #include <linux/amba/bus.h>
37 #define ARM_SMMU_IDR0 0x0
38 #define IDR0_ST_LVL GENMASK(28, 27)
39 #define IDR0_ST_LVL_2LVL 1
40 #define IDR0_STALL_MODEL GENMASK(25, 24)
41 #define IDR0_STALL_MODEL_STALL 0
42 #define IDR0_STALL_MODEL_FORCE 2
43 #define IDR0_TTENDIAN GENMASK(22, 21)
44 #define IDR0_TTENDIAN_MIXED 0
45 #define IDR0_TTENDIAN_LE 2
46 #define IDR0_TTENDIAN_BE 3
47 #define IDR0_CD2L (1 << 19)
48 #define IDR0_VMID16 (1 << 18)
49 #define IDR0_PRI (1 << 16)
50 #define IDR0_SEV (1 << 14)
51 #define IDR0_MSI (1 << 13)
52 #define IDR0_ASID16 (1 << 12)
53 #define IDR0_ATS (1 << 10)
54 #define IDR0_HYP (1 << 9)
55 #define IDR0_COHACC (1 << 4)
56 #define IDR0_TTF GENMASK(3, 2)
57 #define IDR0_TTF_AARCH64 2
58 #define IDR0_TTF_AARCH32_64 3
59 #define IDR0_S1P (1 << 1)
60 #define IDR0_S2P (1 << 0)
62 #define ARM_SMMU_IDR1 0x4
63 #define IDR1_TABLES_PRESET (1 << 30)
64 #define IDR1_QUEUES_PRESET (1 << 29)
65 #define IDR1_REL (1 << 28)
66 #define IDR1_CMDQS GENMASK(25, 21)
67 #define IDR1_EVTQS GENMASK(20, 16)
68 #define IDR1_PRIQS GENMASK(15, 11)
69 #define IDR1_SSIDSIZE GENMASK(10, 6)
70 #define IDR1_SIDSIZE GENMASK(5, 0)
72 #define ARM_SMMU_IDR5 0x14
73 #define IDR5_STALL_MAX GENMASK(31, 16)
74 #define IDR5_GRAN64K (1 << 6)
75 #define IDR5_GRAN16K (1 << 5)
76 #define IDR5_GRAN4K (1 << 4)
77 #define IDR5_OAS GENMASK(2, 0)
78 #define IDR5_OAS_32_BIT 0
79 #define IDR5_OAS_36_BIT 1
80 #define IDR5_OAS_40_BIT 2
81 #define IDR5_OAS_42_BIT 3
82 #define IDR5_OAS_44_BIT 4
83 #define IDR5_OAS_48_BIT 5
84 #define IDR5_OAS_52_BIT 6
85 #define IDR5_VAX GENMASK(11, 10)
86 #define IDR5_VAX_52_BIT 1
88 #define ARM_SMMU_CR0 0x20
89 #define CR0_ATSCHK (1 << 4)
90 #define CR0_CMDQEN (1 << 3)
91 #define CR0_EVTQEN (1 << 2)
92 #define CR0_PRIQEN (1 << 1)
93 #define CR0_SMMUEN (1 << 0)
95 #define ARM_SMMU_CR0ACK 0x24
97 #define ARM_SMMU_CR1 0x28
98 #define CR1_TABLE_SH GENMASK(11, 10)
99 #define CR1_TABLE_OC GENMASK(9, 8)
100 #define CR1_TABLE_IC GENMASK(7, 6)
101 #define CR1_QUEUE_SH GENMASK(5, 4)
102 #define CR1_QUEUE_OC GENMASK(3, 2)
103 #define CR1_QUEUE_IC GENMASK(1, 0)
104 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
105 #define CR1_CACHE_NC 0
106 #define CR1_CACHE_WB 1
107 #define CR1_CACHE_WT 2
109 #define ARM_SMMU_CR2 0x2c
110 #define CR2_PTM (1 << 2)
111 #define CR2_RECINVSID (1 << 1)
112 #define CR2_E2H (1 << 0)
114 #define ARM_SMMU_GBPA 0x44
115 #define GBPA_UPDATE (1 << 31)
116 #define GBPA_ABORT (1 << 20)
118 #define ARM_SMMU_IRQ_CTRL 0x50
119 #define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
120 #define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
121 #define IRQ_CTRL_GERROR_IRQEN (1 << 0)
123 #define ARM_SMMU_IRQ_CTRLACK 0x54
125 #define ARM_SMMU_GERROR 0x60
126 #define GERROR_SFM_ERR (1 << 8)
127 #define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
128 #define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
129 #define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
130 #define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
131 #define GERROR_PRIQ_ABT_ERR (1 << 3)
132 #define GERROR_EVTQ_ABT_ERR (1 << 2)
133 #define GERROR_CMDQ_ERR (1 << 0)
134 #define GERROR_ERR_MASK 0xfd
136 #define ARM_SMMU_GERRORN 0x64
138 #define ARM_SMMU_GERROR_IRQ_CFG0 0x68
139 #define ARM_SMMU_GERROR_IRQ_CFG1 0x70
140 #define ARM_SMMU_GERROR_IRQ_CFG2 0x74
142 #define ARM_SMMU_STRTAB_BASE 0x80
143 #define STRTAB_BASE_RA (1UL << 62)
144 #define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6)
146 #define ARM_SMMU_STRTAB_BASE_CFG 0x88
147 #define STRTAB_BASE_CFG_FMT GENMASK(17, 16)
148 #define STRTAB_BASE_CFG_FMT_LINEAR 0
149 #define STRTAB_BASE_CFG_FMT_2LVL 1
150 #define STRTAB_BASE_CFG_SPLIT GENMASK(10, 6)
151 #define STRTAB_BASE_CFG_LOG2SIZE GENMASK(5, 0)
153 #define ARM_SMMU_CMDQ_BASE 0x90
154 #define ARM_SMMU_CMDQ_PROD 0x98
155 #define ARM_SMMU_CMDQ_CONS 0x9c
157 #define ARM_SMMU_EVTQ_BASE 0xa0
158 #define ARM_SMMU_EVTQ_PROD 0x100a8
159 #define ARM_SMMU_EVTQ_CONS 0x100ac
160 #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
161 #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
162 #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
164 #define ARM_SMMU_PRIQ_BASE 0xc0
165 #define ARM_SMMU_PRIQ_PROD 0x100c8
166 #define ARM_SMMU_PRIQ_CONS 0x100cc
167 #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
168 #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
169 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
171 /* Common MSI config fields */
172 #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
173 #define MSI_CFG2_SH GENMASK(5, 4)
174 #define MSI_CFG2_MEMATTR GENMASK(3, 0)
176 /* Common memory attribute values */
177 #define ARM_SMMU_SH_NSH 0
178 #define ARM_SMMU_SH_OSH 2
179 #define ARM_SMMU_SH_ISH 3
180 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
181 #define ARM_SMMU_MEMATTR_OIWB 0xf
183 #define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
184 #define Q_WRP(llq, p) ((p) & (1 << (llq)->max_n_shift))
185 #define Q_OVERFLOW_FLAG (1U << 31)
186 #define Q_OVF(p) ((p) & Q_OVERFLOW_FLAG)
187 #define Q_ENT(q, p) ((q)->base + \
188 Q_IDX(&((q)->llq), p) * \
191 #define Q_BASE_RWA (1UL << 62)
192 #define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
193 #define Q_BASE_LOG2SIZE GENMASK(4, 0)
195 /* Ensure DMA allocations are naturally aligned */
196 #ifdef CONFIG_CMA_ALIGNMENT
197 #define Q_MAX_SZ_SHIFT (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
199 #define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1)
205 * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
206 * 2lvl: 128k L1 entries,
207 * 256 lazy entries per table (each table covers a PCI bus)
209 #define STRTAB_L1_SZ_SHIFT 20
210 #define STRTAB_SPLIT 8
212 #define STRTAB_L1_DESC_DWORDS 1
213 #define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
214 #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
216 #define STRTAB_STE_DWORDS 8
217 #define STRTAB_STE_0_V (1UL << 0)
218 #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
219 #define STRTAB_STE_0_CFG_ABORT 0
220 #define STRTAB_STE_0_CFG_BYPASS 4
221 #define STRTAB_STE_0_CFG_S1_TRANS 5
222 #define STRTAB_STE_0_CFG_S2_TRANS 6
224 #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
225 #define STRTAB_STE_0_S1FMT_LINEAR 0
226 #define STRTAB_STE_0_S1FMT_64K_L2 2
227 #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
228 #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
230 #define STRTAB_STE_1_S1DSS GENMASK_ULL(1, 0)
231 #define STRTAB_STE_1_S1DSS_TERMINATE 0x0
232 #define STRTAB_STE_1_S1DSS_BYPASS 0x1
233 #define STRTAB_STE_1_S1DSS_SSID0 0x2
235 #define STRTAB_STE_1_S1C_CACHE_NC 0UL
236 #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
237 #define STRTAB_STE_1_S1C_CACHE_WT 2UL
238 #define STRTAB_STE_1_S1C_CACHE_WB 3UL
239 #define STRTAB_STE_1_S1CIR GENMASK_ULL(3, 2)
240 #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
241 #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
243 #define STRTAB_STE_1_S1STALLD (1UL << 27)
245 #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
246 #define STRTAB_STE_1_EATS_ABT 0UL
247 #define STRTAB_STE_1_EATS_TRANS 1UL
248 #define STRTAB_STE_1_EATS_S1CHK 2UL
250 #define STRTAB_STE_1_STRW GENMASK_ULL(31, 30)
251 #define STRTAB_STE_1_STRW_NSEL1 0UL
252 #define STRTAB_STE_1_STRW_EL2 2UL
254 #define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
255 #define STRTAB_STE_1_SHCFG_INCOMING 1UL
257 #define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)
258 #define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32)
259 #define STRTAB_STE_2_VTCR_S2T0SZ GENMASK_ULL(5, 0)
260 #define STRTAB_STE_2_VTCR_S2SL0 GENMASK_ULL(7, 6)
261 #define STRTAB_STE_2_VTCR_S2IR0 GENMASK_ULL(9, 8)
262 #define STRTAB_STE_2_VTCR_S2OR0 GENMASK_ULL(11, 10)
263 #define STRTAB_STE_2_VTCR_S2SH0 GENMASK_ULL(13, 12)
264 #define STRTAB_STE_2_VTCR_S2TG GENMASK_ULL(15, 14)
265 #define STRTAB_STE_2_VTCR_S2PS GENMASK_ULL(18, 16)
266 #define STRTAB_STE_2_S2AA64 (1UL << 51)
267 #define STRTAB_STE_2_S2ENDI (1UL << 52)
268 #define STRTAB_STE_2_S2PTW (1UL << 54)
269 #define STRTAB_STE_2_S2R (1UL << 58)
271 #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
274 * Context descriptors.
276 * Linear: when less than 1024 SSIDs are supported
277 * 2lvl: at most 1024 L1 entries,
278 * 1024 lazy entries per table.
280 #define CTXDESC_SPLIT 10
281 #define CTXDESC_L2_ENTRIES (1 << CTXDESC_SPLIT)
283 #define CTXDESC_L1_DESC_DWORDS 1
284 #define CTXDESC_L1_DESC_V (1UL << 0)
285 #define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12)
287 #define CTXDESC_CD_DWORDS 8
288 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
289 #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
290 #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
291 #define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10)
292 #define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12)
293 #define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14)
294 #define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30)
296 #define CTXDESC_CD_0_ENDI (1UL << 15)
297 #define CTXDESC_CD_0_V (1UL << 31)
299 #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32)
300 #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
302 #define CTXDESC_CD_0_AA64 (1UL << 41)
303 #define CTXDESC_CD_0_S (1UL << 44)
304 #define CTXDESC_CD_0_R (1UL << 45)
305 #define CTXDESC_CD_0_A (1UL << 46)
306 #define CTXDESC_CD_0_ASET (1UL << 47)
307 #define CTXDESC_CD_0_ASID GENMASK_ULL(63, 48)
309 #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
312 * When the SMMU only supports linear context descriptor tables, pick a
313 * reasonable size limit (64kB).
315 #define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3))
318 #define CMDQ_ENT_SZ_SHIFT 4
319 #define CMDQ_ENT_DWORDS ((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
320 #define CMDQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
322 #define CMDQ_CONS_ERR GENMASK(30, 24)
323 #define CMDQ_ERR_CERROR_NONE_IDX 0
324 #define CMDQ_ERR_CERROR_ILL_IDX 1
325 #define CMDQ_ERR_CERROR_ABT_IDX 2
326 #define CMDQ_ERR_CERROR_ATC_INV_IDX 3
328 #define CMDQ_PROD_OWNED_FLAG Q_OVERFLOW_FLAG
331 * This is used to size the command queue and therefore must be at least
332 * BITS_PER_LONG so that the valid_map works correctly (it relies on the
333 * total number of queue entries being a multiple of BITS_PER_LONG).
335 #define CMDQ_BATCH_ENTRIES BITS_PER_LONG
337 #define CMDQ_0_OP GENMASK_ULL(7, 0)
338 #define CMDQ_0_SSV (1UL << 11)
340 #define CMDQ_PREFETCH_0_SID GENMASK_ULL(63, 32)
341 #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
342 #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12)
344 #define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12)
345 #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32)
346 #define CMDQ_CFGI_1_LEAF (1UL << 0)
347 #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0)
349 #define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32)
350 #define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48)
351 #define CMDQ_TLBI_1_LEAF (1UL << 0)
352 #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
353 #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
355 #define CMDQ_ATC_0_SSID GENMASK_ULL(31, 12)
356 #define CMDQ_ATC_0_SID GENMASK_ULL(63, 32)
357 #define CMDQ_ATC_0_GLOBAL (1UL << 9)
358 #define CMDQ_ATC_1_SIZE GENMASK_ULL(5, 0)
359 #define CMDQ_ATC_1_ADDR_MASK GENMASK_ULL(63, 12)
361 #define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
362 #define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
363 #define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
364 #define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
366 #define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
367 #define CMDQ_SYNC_0_CS_NONE 0
368 #define CMDQ_SYNC_0_CS_IRQ 1
369 #define CMDQ_SYNC_0_CS_SEV 2
370 #define CMDQ_SYNC_0_MSH GENMASK_ULL(23, 22)
371 #define CMDQ_SYNC_0_MSIATTR GENMASK_ULL(27, 24)
372 #define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
373 #define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
376 #define EVTQ_ENT_SZ_SHIFT 5
377 #define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
378 #define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
380 #define EVTQ_0_ID GENMASK_ULL(7, 0)
383 #define PRIQ_ENT_SZ_SHIFT 4
384 #define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
385 #define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
387 #define PRIQ_0_SID GENMASK_ULL(31, 0)
388 #define PRIQ_0_SSID GENMASK_ULL(51, 32)
389 #define PRIQ_0_PERM_PRIV (1UL << 58)
390 #define PRIQ_0_PERM_EXEC (1UL << 59)
391 #define PRIQ_0_PERM_READ (1UL << 60)
392 #define PRIQ_0_PERM_WRITE (1UL << 61)
393 #define PRIQ_0_PRG_LAST (1UL << 62)
394 #define PRIQ_0_SSID_V (1UL << 63)
396 #define PRIQ_1_PRG_IDX GENMASK_ULL(8, 0)
397 #define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
399 /* High-level queue structures */
400 #define ARM_SMMU_POLL_TIMEOUT_US 1000000 /* 1s! */
401 #define ARM_SMMU_POLL_SPIN_COUNT 10
403 #define MSI_IOVA_BASE 0x8000000
404 #define MSI_IOVA_LENGTH 0x100000
406 static bool disable_bypass = 1;
407 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
408 MODULE_PARM_DESC(disable_bypass,
409 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
417 enum arm_smmu_msi_index {
424 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
426 ARM_SMMU_EVTQ_IRQ_CFG0,
427 ARM_SMMU_EVTQ_IRQ_CFG1,
428 ARM_SMMU_EVTQ_IRQ_CFG2,
430 [GERROR_MSI_INDEX] = {
431 ARM_SMMU_GERROR_IRQ_CFG0,
432 ARM_SMMU_GERROR_IRQ_CFG1,
433 ARM_SMMU_GERROR_IRQ_CFG2,
436 ARM_SMMU_PRIQ_IRQ_CFG0,
437 ARM_SMMU_PRIQ_IRQ_CFG1,
438 ARM_SMMU_PRIQ_IRQ_CFG2,
442 struct arm_smmu_cmdq_ent {
445 bool substream_valid;
447 /* Command-specific fields */
449 #define CMDQ_OP_PREFETCH_CFG 0x1
456 #define CMDQ_OP_CFGI_STE 0x3
457 #define CMDQ_OP_CFGI_ALL 0x4
458 #define CMDQ_OP_CFGI_CD 0x5
459 #define CMDQ_OP_CFGI_CD_ALL 0x6
469 #define CMDQ_OP_TLBI_NH_ASID 0x11
470 #define CMDQ_OP_TLBI_NH_VA 0x12
471 #define CMDQ_OP_TLBI_EL2_ALL 0x20
472 #define CMDQ_OP_TLBI_S12_VMALL 0x28
473 #define CMDQ_OP_TLBI_S2_IPA 0x2a
474 #define CMDQ_OP_TLBI_NSNH_ALL 0x30
482 #define CMDQ_OP_ATC_INV 0x40
483 #define ATC_INV_SIZE_ALL 52
492 #define CMDQ_OP_PRI_RESP 0x41
500 #define CMDQ_OP_CMD_SYNC 0x46
507 struct arm_smmu_ll_queue {
518 u8 __pad[SMP_CACHE_BYTES];
519 } ____cacheline_aligned_in_smp;
523 struct arm_smmu_queue {
524 struct arm_smmu_ll_queue llq;
525 int irq; /* Wired interrupt */
533 u32 __iomem *prod_reg;
534 u32 __iomem *cons_reg;
537 struct arm_smmu_queue_poll {
540 unsigned int spin_cnt;
544 struct arm_smmu_cmdq {
545 struct arm_smmu_queue q;
546 atomic_long_t *valid_map;
551 struct arm_smmu_cmdq_batch {
552 u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
556 struct arm_smmu_evtq {
557 struct arm_smmu_queue q;
561 struct arm_smmu_priq {
562 struct arm_smmu_queue q;
565 /* High-level stream table and context descriptor structures */
566 struct arm_smmu_strtab_l1_desc {
570 dma_addr_t l2ptr_dma;
573 struct arm_smmu_ctx_desc {
580 struct arm_smmu_l1_ctx_desc {
582 dma_addr_t l2ptr_dma;
585 struct arm_smmu_ctx_desc_cfg {
587 dma_addr_t cdtab_dma;
588 struct arm_smmu_l1_ctx_desc *l1_desc;
589 unsigned int num_l1_ents;
592 struct arm_smmu_s1_cfg {
593 struct arm_smmu_ctx_desc_cfg cdcfg;
594 struct arm_smmu_ctx_desc cd;
599 struct arm_smmu_s2_cfg {
605 struct arm_smmu_strtab_cfg {
607 dma_addr_t strtab_dma;
608 struct arm_smmu_strtab_l1_desc *l1_desc;
609 unsigned int num_l1_ents;
615 /* An SMMUv3 instance */
616 struct arm_smmu_device {
620 #define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
621 #define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
622 #define ARM_SMMU_FEAT_TT_LE (1 << 2)
623 #define ARM_SMMU_FEAT_TT_BE (1 << 3)
624 #define ARM_SMMU_FEAT_PRI (1 << 4)
625 #define ARM_SMMU_FEAT_ATS (1 << 5)
626 #define ARM_SMMU_FEAT_SEV (1 << 6)
627 #define ARM_SMMU_FEAT_MSI (1 << 7)
628 #define ARM_SMMU_FEAT_COHERENCY (1 << 8)
629 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
630 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
631 #define ARM_SMMU_FEAT_STALLS (1 << 11)
632 #define ARM_SMMU_FEAT_HYP (1 << 12)
633 #define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
634 #define ARM_SMMU_FEAT_VAX (1 << 14)
637 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
638 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
641 struct arm_smmu_cmdq cmdq;
642 struct arm_smmu_evtq evtq;
643 struct arm_smmu_priq priq;
648 unsigned long ias; /* IPA */
649 unsigned long oas; /* PA */
650 unsigned long pgsize_bitmap;
652 #define ARM_SMMU_MAX_ASIDS (1 << 16)
653 unsigned int asid_bits;
654 DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
656 #define ARM_SMMU_MAX_VMIDS (1 << 16)
657 unsigned int vmid_bits;
658 DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
660 unsigned int ssid_bits;
661 unsigned int sid_bits;
663 struct arm_smmu_strtab_cfg strtab_cfg;
665 /* IOMMU core code handle */
666 struct iommu_device iommu;
669 /* SMMU private data for each master */
670 struct arm_smmu_master {
671 struct arm_smmu_device *smmu;
673 struct arm_smmu_domain *domain;
674 struct list_head domain_head;
676 unsigned int num_sids;
678 unsigned int ssid_bits;
681 /* SMMU private data for an IOMMU domain */
682 enum arm_smmu_domain_stage {
683 ARM_SMMU_DOMAIN_S1 = 0,
685 ARM_SMMU_DOMAIN_NESTED,
686 ARM_SMMU_DOMAIN_BYPASS,
689 struct arm_smmu_domain {
690 struct arm_smmu_device *smmu;
691 struct mutex init_mutex; /* Protects smmu pointer */
693 struct io_pgtable_ops *pgtbl_ops;
695 atomic_t nr_ats_masters;
697 enum arm_smmu_domain_stage stage;
699 struct arm_smmu_s1_cfg s1_cfg;
700 struct arm_smmu_s2_cfg s2_cfg;
703 struct iommu_domain domain;
705 struct list_head devices;
706 spinlock_t devices_lock;
709 struct arm_smmu_option_prop {
714 static struct arm_smmu_option_prop arm_smmu_options[] = {
715 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
716 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
720 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
721 struct arm_smmu_device *smmu)
723 if ((offset > SZ_64K) &&
724 (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
727 return smmu->base + offset;
730 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
732 return container_of(dom, struct arm_smmu_domain, domain);
735 static void parse_driver_options(struct arm_smmu_device *smmu)
740 if (of_property_read_bool(smmu->dev->of_node,
741 arm_smmu_options[i].prop)) {
742 smmu->options |= arm_smmu_options[i].opt;
743 dev_notice(smmu->dev, "option %s\n",
744 arm_smmu_options[i].prop);
746 } while (arm_smmu_options[++i].opt);
749 /* Low-level queue manipulation functions */
750 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
752 u32 space, prod, cons;
754 prod = Q_IDX(q, q->prod);
755 cons = Q_IDX(q, q->cons);
757 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
758 space = (1 << q->max_n_shift) - (prod - cons);
765 static bool queue_full(struct arm_smmu_ll_queue *q)
767 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
768 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
771 static bool queue_empty(struct arm_smmu_ll_queue *q)
773 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
774 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
777 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
779 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
780 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
781 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
782 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
785 static void queue_sync_cons_out(struct arm_smmu_queue *q)
788 * Ensure that all CPU accesses (reads and writes) to the queue
789 * are complete before we update the cons pointer.
792 writel_relaxed(q->llq.cons, q->cons_reg);
795 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
797 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
798 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
801 static int queue_sync_prod_in(struct arm_smmu_queue *q)
804 u32 prod = readl_relaxed(q->prod_reg);
806 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
813 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
815 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
816 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
819 static void queue_poll_init(struct arm_smmu_device *smmu,
820 struct arm_smmu_queue_poll *qp)
824 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
825 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
828 static int queue_poll(struct arm_smmu_queue_poll *qp)
830 if (ktime_compare(ktime_get(), qp->timeout) > 0)
835 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
846 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
850 for (i = 0; i < n_dwords; ++i)
851 *dst++ = cpu_to_le64(*src++);
854 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
858 for (i = 0; i < n_dwords; ++i)
859 *dst++ = le64_to_cpu(*src++);
862 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
864 if (queue_empty(&q->llq))
867 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
868 queue_inc_cons(&q->llq);
869 queue_sync_cons_out(q);
873 /* High-level queue accessors */
874 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
876 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
877 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
879 switch (ent->opcode) {
880 case CMDQ_OP_TLBI_EL2_ALL:
881 case CMDQ_OP_TLBI_NSNH_ALL:
883 case CMDQ_OP_PREFETCH_CFG:
884 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
885 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
886 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
888 case CMDQ_OP_CFGI_CD:
889 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
891 case CMDQ_OP_CFGI_STE:
892 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
893 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
895 case CMDQ_OP_CFGI_CD_ALL:
896 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
898 case CMDQ_OP_CFGI_ALL:
899 /* Cover the entire SID range */
900 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
902 case CMDQ_OP_TLBI_NH_VA:
903 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
904 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
905 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
906 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
908 case CMDQ_OP_TLBI_S2_IPA:
909 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
910 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
911 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
913 case CMDQ_OP_TLBI_NH_ASID:
914 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
916 case CMDQ_OP_TLBI_S12_VMALL:
917 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
919 case CMDQ_OP_ATC_INV:
920 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
921 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
922 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
923 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
924 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
925 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
927 case CMDQ_OP_PRI_RESP:
928 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
929 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
930 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
931 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
932 switch (ent->pri.resp) {
940 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
942 case CMDQ_OP_CMD_SYNC:
943 if (ent->sync.msiaddr) {
944 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
945 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
947 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
949 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
950 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
959 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
962 struct arm_smmu_queue *q = &smmu->cmdq.q;
963 struct arm_smmu_cmdq_ent ent = {
964 .opcode = CMDQ_OP_CMD_SYNC,
968 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
969 * payload, so the write will zero the entire command on that platform.
971 if (smmu->features & ARM_SMMU_FEAT_MSI &&
972 smmu->features & ARM_SMMU_FEAT_COHERENCY) {
973 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
977 arm_smmu_cmdq_build_cmd(cmd, &ent);
980 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
982 static const char *cerror_str[] = {
983 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
984 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
985 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
986 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
990 u64 cmd[CMDQ_ENT_DWORDS];
991 struct arm_smmu_queue *q = &smmu->cmdq.q;
992 u32 cons = readl_relaxed(q->cons_reg);
993 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
994 struct arm_smmu_cmdq_ent cmd_sync = {
995 .opcode = CMDQ_OP_CMD_SYNC,
998 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
999 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
1002 case CMDQ_ERR_CERROR_ABT_IDX:
1003 dev_err(smmu->dev, "retrying command fetch\n");
1004 case CMDQ_ERR_CERROR_NONE_IDX:
1006 case CMDQ_ERR_CERROR_ATC_INV_IDX:
1008 * ATC Invalidation Completion timeout. CONS is still pointing
1009 * at the CMD_SYNC. Attempt to complete other pending commands
1010 * by repeating the CMD_SYNC, though we might well end up back
1011 * here since the ATC invalidation may still be pending.
1014 case CMDQ_ERR_CERROR_ILL_IDX:
1021 * We may have concurrent producers, so we need to be careful
1022 * not to touch any of the shadow cmdq state.
1024 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
1025 dev_err(smmu->dev, "skipping command in error state:\n");
1026 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
1027 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
1029 /* Convert the erroneous command into a CMD_SYNC */
1030 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
1031 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
1035 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
1039 * Command queue locking.
1040 * This is a form of bastardised rwlock with the following major changes:
1042 * - The only LOCK routines are exclusive_trylock() and shared_lock().
1043 * Neither have barrier semantics, and instead provide only a control
1046 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
1047 * fails if the caller appears to be the last lock holder (yes, this is
1048 * racy). All successful UNLOCK routines have RELEASE semantics.
1050 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
1055 * We can try to avoid the cmpxchg() loop by simply incrementing the
1056 * lock counter. When held in exclusive state, the lock counter is set
1057 * to INT_MIN so these increments won't hurt as the value will remain
1060 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
1064 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
1065 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
1068 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
1070 (void)atomic_dec_return_release(&cmdq->lock);
1073 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
1075 if (atomic_read(&cmdq->lock) == 1)
1078 arm_smmu_cmdq_shared_unlock(cmdq);
1082 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
1085 local_irq_save(flags); \
1086 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
1088 local_irq_restore(flags); \
1092 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
1094 atomic_set_release(&cmdq->lock, 0); \
1095 local_irq_restore(flags); \
1100 * Command queue insertion.
1101 * This is made fiddly by our attempts to achieve some sort of scalability
1102 * since there is one queue shared amongst all of the CPUs in the system. If
1103 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
1104 * then you'll *love* this monstrosity.
1106 * The basic idea is to split the queue up into ranges of commands that are
1107 * owned by a given CPU; the owner may not have written all of the commands
1108 * itself, but is responsible for advancing the hardware prod pointer when
1109 * the time comes. The algorithm is roughly:
1111 * 1. Allocate some space in the queue. At this point we also discover
1112 * whether the head of the queue is currently owned by another CPU,
1113 * or whether we are the owner.
1115 * 2. Write our commands into our allocated slots in the queue.
1117 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
1119 * 4. If we are an owner:
1120 * a. Wait for the previous owner to finish.
1121 * b. Mark the queue head as unowned, which tells us the range
1122 * that we are responsible for publishing.
1123 * c. Wait for all commands in our owned range to become valid.
1124 * d. Advance the hardware prod pointer.
1125 * e. Tell the next owner we've finished.
1127 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
1128 * owner), then we need to stick around until it has completed:
1129 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
1130 * to clear the first 4 bytes.
1131 * b. Otherwise, we spin waiting for the hardware cons pointer to
1132 * advance past our command.
1134 * The devil is in the details, particularly the use of locking for handling
1135 * SYNC completion and freeing up space in the queue before we think that it is
1138 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
1139 u32 sprod, u32 eprod, bool set)
1141 u32 swidx, sbidx, ewidx, ebidx;
1142 struct arm_smmu_ll_queue llq = {
1143 .max_n_shift = cmdq->q.llq.max_n_shift,
1147 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
1148 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
1150 while (llq.prod != eprod) {
1153 u32 limit = BITS_PER_LONG;
1155 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
1156 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
1158 ptr = &cmdq->valid_map[swidx];
1160 if ((swidx == ewidx) && (sbidx < ebidx))
1163 mask = GENMASK(limit - 1, sbidx);
1166 * The valid bit is the inverse of the wrap bit. This means
1167 * that a zero-initialised queue is invalid and, after marking
1168 * all entries as valid, they become invalid again when we
1172 atomic_long_xor(mask, ptr);
1174 unsigned long valid;
1176 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
1177 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
1180 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
1184 /* Mark all entries in the range [sprod, eprod) as valid */
1185 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
1186 u32 sprod, u32 eprod)
1188 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
1191 /* Wait for all entries in the range [sprod, eprod) to become valid */
1192 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
1193 u32 sprod, u32 eprod)
1195 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
1198 /* Wait for the command queue to become non-full */
1199 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
1200 struct arm_smmu_ll_queue *llq)
1202 unsigned long flags;
1203 struct arm_smmu_queue_poll qp;
1204 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1208 * Try to update our copy of cons by grabbing exclusive cmdq access. If
1209 * that fails, spin until somebody else updates it for us.
1211 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
1212 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
1213 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
1214 llq->val = READ_ONCE(cmdq->q.llq.val);
1218 queue_poll_init(smmu, &qp);
1220 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1221 if (!queue_full(llq))
1224 ret = queue_poll(&qp);
1231 * Wait until the SMMU signals a CMD_SYNC completion MSI.
1232 * Must be called with the cmdq lock held in some capacity.
1234 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
1235 struct arm_smmu_ll_queue *llq)
1238 struct arm_smmu_queue_poll qp;
1239 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1240 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
1242 queue_poll_init(smmu, &qp);
1245 * The MSI won't generate an event, since it's being written back
1246 * into the command queue.
1249 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
1250 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
1255 * Wait until the SMMU cons index passes llq->prod.
1256 * Must be called with the cmdq lock held in some capacity.
1258 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
1259 struct arm_smmu_ll_queue *llq)
1261 struct arm_smmu_queue_poll qp;
1262 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1263 u32 prod = llq->prod;
1266 queue_poll_init(smmu, &qp);
1267 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1269 if (queue_consumed(llq, prod))
1272 ret = queue_poll(&qp);
1275 * This needs to be a readl() so that our subsequent call
1276 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
1278 * Specifically, we need to ensure that we observe all
1279 * shared_lock()s by other CMD_SYNCs that share our owner,
1280 * so that a failing call to tryunlock() means that we're
1281 * the last one out and therefore we can safely advance
1282 * cmdq->q.llq.cons. Roughly speaking:
1284 * CPU 0 CPU1 CPU2 (us)
1294 * <control dependency>
1300 * Requires us to see CPU 0's shared_lock() acquisition.
1302 llq->cons = readl(cmdq->q.cons_reg);
1308 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
1309 struct arm_smmu_ll_queue *llq)
1311 if (smmu->features & ARM_SMMU_FEAT_MSI &&
1312 smmu->features & ARM_SMMU_FEAT_COHERENCY)
1313 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
1315 return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
1318 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
1322 struct arm_smmu_ll_queue llq = {
1323 .max_n_shift = cmdq->q.llq.max_n_shift,
1327 for (i = 0; i < n; ++i) {
1328 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
1330 prod = queue_inc_prod_n(&llq, i);
1331 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
1336 * This is the actual insertion function, and provides the following
1337 * ordering guarantees to callers:
1339 * - There is a dma_wmb() before publishing any commands to the queue.
1340 * This can be relied upon to order prior writes to data structures
1341 * in memory (such as a CD or an STE) before the command.
1343 * - On completion of a CMD_SYNC, there is a control dependency.
1344 * This can be relied upon to order subsequent writes to memory (e.g.
1345 * freeing an IOVA) after completion of the CMD_SYNC.
1347 * - Command insertion is totally ordered, so if two CPUs each race to
1348 * insert their own list of commands then all of the commands from one
1349 * CPU will appear before any of the commands from the other CPU.
1351 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
1352 u64 *cmds, int n, bool sync)
1354 u64 cmd_sync[CMDQ_ENT_DWORDS];
1356 unsigned long flags;
1358 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1359 struct arm_smmu_ll_queue llq = {
1360 .max_n_shift = cmdq->q.llq.max_n_shift,
1364 /* 1. Allocate some space in the queue */
1365 local_irq_save(flags);
1366 llq.val = READ_ONCE(cmdq->q.llq.val);
1370 while (!queue_has_space(&llq, n + sync)) {
1371 local_irq_restore(flags);
1372 if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
1373 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
1374 local_irq_save(flags);
1377 head.cons = llq.cons;
1378 head.prod = queue_inc_prod_n(&llq, n + sync) |
1379 CMDQ_PROD_OWNED_FLAG;
1381 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
1387 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
1388 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
1389 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
1392 * 2. Write our commands into the queue
1393 * Dependency ordering from the cmpxchg() loop above.
1395 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
1397 prod = queue_inc_prod_n(&llq, n);
1398 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
1399 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
1402 * In order to determine completion of our CMD_SYNC, we must
1403 * ensure that the queue can't wrap twice without us noticing.
1404 * We achieve that by taking the cmdq lock as shared before
1405 * marking our slot as valid.
1407 arm_smmu_cmdq_shared_lock(cmdq);
1410 /* 3. Mark our slots as valid, ensuring commands are visible first */
1412 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
1414 /* 4. If we are the owner, take control of the SMMU hardware */
1416 /* a. Wait for previous owner to finish */
1417 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
1419 /* b. Stop gathering work by clearing the owned flag */
1420 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
1421 &cmdq->q.llq.atomic.prod);
1422 prod &= ~CMDQ_PROD_OWNED_FLAG;
1425 * c. Wait for any gathered work to be written to the queue.
1426 * Note that we read our own entries so that we have the control
1427 * dependency required by (d).
1429 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
1432 * d. Advance the hardware prod pointer
1433 * Control dependency ordering from the entries becoming valid.
1435 writel_relaxed(prod, cmdq->q.prod_reg);
1438 * e. Tell the next owner we're done
1439 * Make sure we've updated the hardware first, so that we don't
1440 * race to update prod and potentially move it backwards.
1442 atomic_set_release(&cmdq->owner_prod, prod);
1445 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
1447 llq.prod = queue_inc_prod_n(&llq, n);
1448 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
1450 dev_err_ratelimited(smmu->dev,
1451 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
1453 readl_relaxed(cmdq->q.prod_reg),
1454 readl_relaxed(cmdq->q.cons_reg));
1458 * Try to unlock the cmq lock. This will fail if we're the last
1459 * reader, in which case we can safely update cmdq->q.llq.cons
1461 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
1462 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
1463 arm_smmu_cmdq_shared_unlock(cmdq);
1467 local_irq_restore(flags);
1471 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
1472 struct arm_smmu_cmdq_ent *ent)
1474 u64 cmd[CMDQ_ENT_DWORDS];
1476 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
1477 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
1482 return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
1485 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1487 return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
1490 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
1491 struct arm_smmu_cmdq_batch *cmds,
1492 struct arm_smmu_cmdq_ent *cmd)
1494 if (cmds->num == CMDQ_BATCH_ENTRIES) {
1495 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
1498 arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
1502 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
1503 struct arm_smmu_cmdq_batch *cmds)
1505 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
1508 /* Context descriptor manipulation functions */
1509 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
1510 int ssid, bool leaf)
1513 unsigned long flags;
1514 struct arm_smmu_master *master;
1515 struct arm_smmu_device *smmu = smmu_domain->smmu;
1516 struct arm_smmu_cmdq_ent cmd = {
1517 .opcode = CMDQ_OP_CFGI_CD,
1524 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1525 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1526 for (i = 0; i < master->num_sids; i++) {
1527 cmd.cfgi.sid = master->sids[i];
1528 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1531 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1533 arm_smmu_cmdq_issue_sync(smmu);
1536 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1537 struct arm_smmu_l1_ctx_desc *l1_desc)
1539 size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1541 l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1542 &l1_desc->l2ptr_dma, GFP_KERNEL);
1543 if (!l1_desc->l2ptr) {
1545 "failed to allocate context descriptor table\n");
1551 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1552 struct arm_smmu_l1_ctx_desc *l1_desc)
1554 u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1557 /* See comment in arm_smmu_write_ctx_desc() */
1558 WRITE_ONCE(*dst, cpu_to_le64(val));
1561 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1566 struct arm_smmu_l1_ctx_desc *l1_desc;
1567 struct arm_smmu_device *smmu = smmu_domain->smmu;
1568 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1570 if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1571 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1573 idx = ssid >> CTXDESC_SPLIT;
1574 l1_desc = &cdcfg->l1_desc[idx];
1575 if (!l1_desc->l2ptr) {
1576 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1579 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1580 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1581 /* An invalid L1CD can be cached */
1582 arm_smmu_sync_cd(smmu_domain, ssid, false);
1584 idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1585 return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1588 static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
1589 int ssid, struct arm_smmu_ctx_desc *cd)
1592 * This function handles the following cases:
1594 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1595 * (2) Install a secondary CD, for SID+SSID traffic.
1596 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1597 * CD, then invalidate the old entry and mappings.
1598 * (4) Remove a secondary CD.
1603 struct arm_smmu_device *smmu = smmu_domain->smmu;
1605 if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1608 cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1612 val = le64_to_cpu(cdptr[0]);
1613 cd_live = !!(val & CTXDESC_CD_0_V);
1615 if (!cd) { /* (4) */
1617 } else if (cd_live) { /* (3) */
1618 val &= ~CTXDESC_CD_0_ASID;
1619 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1621 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1622 * this substream's traffic
1624 } else { /* (1) and (2) */
1625 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1627 cdptr[3] = cpu_to_le64(cd->mair);
1630 * STE is live, and the SMMU might read dwords of this CD in any
1631 * order. Ensure that it observes valid values before reading
1634 arm_smmu_sync_cd(smmu_domain, ssid, true);
1640 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1642 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1645 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1646 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1647 val |= CTXDESC_CD_0_S;
1651 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1652 * "Configuration structures and configuration invalidation completion"
1654 * The size of single-copy atomic reads made by the SMMU is
1655 * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1656 * field within an aligned 64-bit span of a structure can be altered
1657 * without first making the structure invalid.
1659 WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1660 arm_smmu_sync_cd(smmu_domain, ssid, true);
1664 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1668 size_t max_contexts;
1669 struct arm_smmu_device *smmu = smmu_domain->smmu;
1670 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1671 struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1673 max_contexts = 1 << cfg->s1cdmax;
1675 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1676 max_contexts <= CTXDESC_L2_ENTRIES) {
1677 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1678 cdcfg->num_l1_ents = max_contexts;
1680 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1682 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1683 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1684 CTXDESC_L2_ENTRIES);
1686 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1687 sizeof(*cdcfg->l1_desc),
1689 if (!cdcfg->l1_desc)
1692 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1695 cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1697 if (!cdcfg->cdtab) {
1698 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1706 if (cdcfg->l1_desc) {
1707 devm_kfree(smmu->dev, cdcfg->l1_desc);
1708 cdcfg->l1_desc = NULL;
1713 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1716 size_t size, l1size;
1717 struct arm_smmu_device *smmu = smmu_domain->smmu;
1718 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1720 if (cdcfg->l1_desc) {
1721 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1723 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1724 if (!cdcfg->l1_desc[i].l2ptr)
1727 dmam_free_coherent(smmu->dev, size,
1728 cdcfg->l1_desc[i].l2ptr,
1729 cdcfg->l1_desc[i].l2ptr_dma);
1731 devm_kfree(smmu->dev, cdcfg->l1_desc);
1732 cdcfg->l1_desc = NULL;
1734 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1736 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1739 dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1740 cdcfg->cdtab_dma = 0;
1741 cdcfg->cdtab = NULL;
1744 /* Stream table manipulation functions */
1746 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1750 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1751 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1753 /* See comment in arm_smmu_write_ctx_desc() */
1754 WRITE_ONCE(*dst, cpu_to_le64(val));
1757 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1759 struct arm_smmu_cmdq_ent cmd = {
1760 .opcode = CMDQ_OP_CFGI_STE,
1767 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1768 arm_smmu_cmdq_issue_sync(smmu);
1771 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1775 * This is hideously complicated, but we only really care about
1776 * three cases at the moment:
1778 * 1. Invalid (all zero) -> bypass/fault (init)
1779 * 2. Bypass/fault -> translation/bypass (attach)
1780 * 3. Translation/bypass -> bypass/fault (detach)
1782 * Given that we can't update the STE atomically and the SMMU
1783 * doesn't read the thing in a defined order, that leaves us
1784 * with the following maintenance requirements:
1786 * 1. Update Config, return (init time STEs aren't live)
1787 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1788 * 3. Update Config, sync
1790 u64 val = le64_to_cpu(dst[0]);
1791 bool ste_live = false;
1792 struct arm_smmu_device *smmu = NULL;
1793 struct arm_smmu_s1_cfg *s1_cfg = NULL;
1794 struct arm_smmu_s2_cfg *s2_cfg = NULL;
1795 struct arm_smmu_domain *smmu_domain = NULL;
1796 struct arm_smmu_cmdq_ent prefetch_cmd = {
1797 .opcode = CMDQ_OP_PREFETCH_CFG,
1804 smmu_domain = master->domain;
1805 smmu = master->smmu;
1809 switch (smmu_domain->stage) {
1810 case ARM_SMMU_DOMAIN_S1:
1811 s1_cfg = &smmu_domain->s1_cfg;
1813 case ARM_SMMU_DOMAIN_S2:
1814 case ARM_SMMU_DOMAIN_NESTED:
1815 s2_cfg = &smmu_domain->s2_cfg;
1822 if (val & STRTAB_STE_0_V) {
1823 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1824 case STRTAB_STE_0_CFG_BYPASS:
1826 case STRTAB_STE_0_CFG_S1_TRANS:
1827 case STRTAB_STE_0_CFG_S2_TRANS:
1830 case STRTAB_STE_0_CFG_ABORT:
1831 BUG_ON(!disable_bypass);
1834 BUG(); /* STE corruption */
1838 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1839 val = STRTAB_STE_0_V;
1842 if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1843 if (!smmu_domain && disable_bypass)
1844 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1846 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1848 dst[0] = cpu_to_le64(val);
1849 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1850 STRTAB_STE_1_SHCFG_INCOMING));
1851 dst[2] = 0; /* Nuke the VMID */
1853 * The SMMU can perform negative caching, so we must sync
1854 * the STE regardless of whether the old value was live.
1857 arm_smmu_sync_ste_for_sid(smmu, sid);
1863 dst[1] = cpu_to_le64(
1864 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1865 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1866 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1867 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1868 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1870 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1871 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1872 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1874 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1875 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1876 FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1877 FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1882 dst[2] = cpu_to_le64(
1883 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1884 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1886 STRTAB_STE_2_S2ENDI |
1888 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1891 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1893 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1896 if (master->ats_enabled)
1897 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1898 STRTAB_STE_1_EATS_TRANS));
1900 arm_smmu_sync_ste_for_sid(smmu, sid);
1901 /* See comment in arm_smmu_write_ctx_desc() */
1902 WRITE_ONCE(dst[0], cpu_to_le64(val));
1903 arm_smmu_sync_ste_for_sid(smmu, sid);
1905 /* It's likely that we'll want to use the new STE soon */
1906 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1907 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1910 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1914 for (i = 0; i < nent; ++i) {
1915 arm_smmu_write_strtab_ent(NULL, -1, strtab);
1916 strtab += STRTAB_STE_DWORDS;
1920 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1924 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1925 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1930 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1931 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1933 desc->span = STRTAB_SPLIT + 1;
1934 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1938 "failed to allocate l2 stream table for SID %u\n",
1943 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1944 arm_smmu_write_strtab_l1_desc(strtab, desc);
1948 /* IRQ and event handlers */
1949 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1952 struct arm_smmu_device *smmu = dev;
1953 struct arm_smmu_queue *q = &smmu->evtq.q;
1954 struct arm_smmu_ll_queue *llq = &q->llq;
1955 u64 evt[EVTQ_ENT_DWORDS];
1958 while (!queue_remove_raw(q, evt)) {
1959 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1961 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1962 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1963 dev_info(smmu->dev, "\t0x%016llx\n",
1964 (unsigned long long)evt[i]);
1969 * Not much we can do on overflow, so scream and pretend we're
1972 if (queue_sync_prod_in(q) == -EOVERFLOW)
1973 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1974 } while (!queue_empty(llq));
1976 /* Sync our overflow flag, as we believe we're up to speed */
1977 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1978 Q_IDX(llq, llq->cons);
1982 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1988 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1989 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1990 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1991 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1992 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1994 dev_info(smmu->dev, "unexpected PRI request received:\n");
1996 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1997 sid, ssid, grpid, last ? "L" : "",
1998 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1999 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
2000 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
2001 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
2002 evt[1] & PRIQ_1_ADDR_MASK);
2005 struct arm_smmu_cmdq_ent cmd = {
2006 .opcode = CMDQ_OP_PRI_RESP,
2007 .substream_valid = ssv,
2012 .resp = PRI_RESP_DENY,
2016 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2020 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
2022 struct arm_smmu_device *smmu = dev;
2023 struct arm_smmu_queue *q = &smmu->priq.q;
2024 struct arm_smmu_ll_queue *llq = &q->llq;
2025 u64 evt[PRIQ_ENT_DWORDS];
2028 while (!queue_remove_raw(q, evt))
2029 arm_smmu_handle_ppr(smmu, evt);
2031 if (queue_sync_prod_in(q) == -EOVERFLOW)
2032 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
2033 } while (!queue_empty(llq));
2035 /* Sync our overflow flag, as we believe we're up to speed */
2036 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
2037 Q_IDX(llq, llq->cons);
2038 queue_sync_cons_out(q);
2042 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
2044 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
2046 u32 gerror, gerrorn, active;
2047 struct arm_smmu_device *smmu = dev;
2049 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
2050 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
2052 active = gerror ^ gerrorn;
2053 if (!(active & GERROR_ERR_MASK))
2054 return IRQ_NONE; /* No errors pending */
2057 "unexpected global error reported (0x%08x), this could be serious\n",
2060 if (active & GERROR_SFM_ERR) {
2061 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
2062 arm_smmu_device_disable(smmu);
2065 if (active & GERROR_MSI_GERROR_ABT_ERR)
2066 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
2068 if (active & GERROR_MSI_PRIQ_ABT_ERR)
2069 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
2071 if (active & GERROR_MSI_EVTQ_ABT_ERR)
2072 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
2074 if (active & GERROR_MSI_CMDQ_ABT_ERR)
2075 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
2077 if (active & GERROR_PRIQ_ABT_ERR)
2078 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
2080 if (active & GERROR_EVTQ_ABT_ERR)
2081 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
2083 if (active & GERROR_CMDQ_ERR)
2084 arm_smmu_cmdq_skip_err(smmu);
2086 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
2090 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
2092 struct arm_smmu_device *smmu = dev;
2094 arm_smmu_evtq_thread(irq, dev);
2095 if (smmu->features & ARM_SMMU_FEAT_PRI)
2096 arm_smmu_priq_thread(irq, dev);
2101 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
2103 arm_smmu_gerror_handler(irq, dev);
2104 return IRQ_WAKE_THREAD;
2108 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
2109 struct arm_smmu_cmdq_ent *cmd)
2113 /* ATC invalidates are always on 4096-bytes pages */
2114 size_t inval_grain_shift = 12;
2115 unsigned long page_start, page_end;
2117 *cmd = (struct arm_smmu_cmdq_ent) {
2118 .opcode = CMDQ_OP_ATC_INV,
2119 .substream_valid = !!ssid,
2124 cmd->atc.size = ATC_INV_SIZE_ALL;
2128 page_start = iova >> inval_grain_shift;
2129 page_end = (iova + size - 1) >> inval_grain_shift;
2132 * In an ATS Invalidate Request, the address must be aligned on the
2133 * range size, which must be a power of two number of page sizes. We
2134 * thus have to choose between grossly over-invalidating the region, or
2135 * splitting the invalidation into multiple commands. For simplicity
2136 * we'll go with the first solution, but should refine it in the future
2137 * if multiple commands are shown to be more efficient.
2139 * Find the smallest power of two that covers the range. The most
2140 * significant differing bit between the start and end addresses,
2141 * fls(start ^ end), indicates the required span. For example:
2143 * We want to invalidate pages [8; 11]. This is already the ideal range:
2144 * x = 0b1000 ^ 0b1011 = 0b11
2145 * span = 1 << fls(x) = 4
2147 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
2148 * x = 0b0111 ^ 0b1010 = 0b1101
2149 * span = 1 << fls(x) = 16
2151 log2_span = fls_long(page_start ^ page_end);
2152 span_mask = (1ULL << log2_span) - 1;
2154 page_start &= ~span_mask;
2156 cmd->atc.addr = page_start << inval_grain_shift;
2157 cmd->atc.size = log2_span;
2160 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
2161 struct arm_smmu_cmdq_ent *cmd)
2165 if (!master->ats_enabled)
2168 for (i = 0; i < master->num_sids; i++) {
2169 cmd->atc.sid = master->sids[i];
2170 arm_smmu_cmdq_issue_cmd(master->smmu, cmd);
2173 return arm_smmu_cmdq_issue_sync(master->smmu);
2176 static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2177 int ssid, unsigned long iova, size_t size)
2180 unsigned long flags;
2181 struct arm_smmu_cmdq_ent cmd;
2182 struct arm_smmu_master *master;
2184 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2188 * Ensure that we've completed prior invalidation of the main TLBs
2189 * before we read 'nr_ats_masters' in case of a concurrent call to
2190 * arm_smmu_enable_ats():
2192 * // unmap() // arm_smmu_enable_ats()
2193 * TLBI+SYNC atomic_inc(&nr_ats_masters);
2195 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
2197 * Ensures that we always see the incremented 'nr_ats_masters' count if
2198 * ATS was enabled at the PCI device before completion of the TLBI.
2201 if (!atomic_read(&smmu_domain->nr_ats_masters))
2204 arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
2206 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2207 list_for_each_entry(master, &smmu_domain->devices, domain_head)
2208 ret |= arm_smmu_atc_inv_master(master, &cmd);
2209 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2211 return ret ? -ETIMEDOUT : 0;
2214 /* IO_PGTABLE API */
2215 static void arm_smmu_tlb_inv_context(void *cookie)
2217 struct arm_smmu_domain *smmu_domain = cookie;
2218 struct arm_smmu_device *smmu = smmu_domain->smmu;
2219 struct arm_smmu_cmdq_ent cmd;
2221 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2222 cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
2223 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
2226 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
2227 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
2231 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2232 * PTEs previously cleared by unmaps on the current CPU not yet visible
2233 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2234 * insertion to guarantee those are observed before the TLBI. Do be
2237 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2238 arm_smmu_cmdq_issue_sync(smmu);
2239 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2242 static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
2243 size_t granule, bool leaf,
2244 struct arm_smmu_domain *smmu_domain)
2246 struct arm_smmu_device *smmu = smmu_domain->smmu;
2247 unsigned long start = iova, end = iova + size;
2248 struct arm_smmu_cmdq_batch cmds = {};
2249 struct arm_smmu_cmdq_ent cmd = {
2258 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2259 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
2260 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
2262 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
2263 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
2266 while (iova < end) {
2267 cmd.tlbi.addr = iova;
2268 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
2271 arm_smmu_cmdq_batch_submit(smmu, &cmds);
2274 * Unfortunately, this can't be leaf-only since we may have
2275 * zapped an entire table.
2277 arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
2280 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2281 unsigned long iova, size_t granule,
2284 struct arm_smmu_domain *smmu_domain = cookie;
2285 struct iommu_domain *domain = &smmu_domain->domain;
2287 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2290 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2291 size_t granule, void *cookie)
2293 arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
2296 static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
2297 size_t granule, void *cookie)
2299 arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
2302 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2303 .tlb_flush_all = arm_smmu_tlb_inv_context,
2304 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
2305 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
2306 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
2310 static bool arm_smmu_capable(enum iommu_cap cap)
2313 case IOMMU_CAP_CACHE_COHERENCY:
2315 case IOMMU_CAP_NOEXEC:
2322 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2324 struct arm_smmu_domain *smmu_domain;
2326 if (type != IOMMU_DOMAIN_UNMANAGED &&
2327 type != IOMMU_DOMAIN_DMA &&
2328 type != IOMMU_DOMAIN_IDENTITY)
2332 * Allocate the domain and initialise some of its data structures.
2333 * We can't really do anything meaningful until we've added a
2336 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2340 if (type == IOMMU_DOMAIN_DMA &&
2341 iommu_get_dma_cookie(&smmu_domain->domain)) {
2346 mutex_init(&smmu_domain->init_mutex);
2347 INIT_LIST_HEAD(&smmu_domain->devices);
2348 spin_lock_init(&smmu_domain->devices_lock);
2350 return &smmu_domain->domain;
2353 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2355 int idx, size = 1 << span;
2358 idx = find_first_zero_bit(map, size);
2361 } while (test_and_set_bit(idx, map));
2366 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2368 clear_bit(idx, map);
2371 static void arm_smmu_domain_free(struct iommu_domain *domain)
2373 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2374 struct arm_smmu_device *smmu = smmu_domain->smmu;
2376 iommu_put_dma_cookie(domain);
2377 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2379 /* Free the CD and ASID, if we allocated them */
2380 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2381 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2383 if (cfg->cdcfg.cdtab) {
2384 arm_smmu_free_cd_tables(smmu_domain);
2385 arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
2388 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2390 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2396 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2397 struct arm_smmu_master *master,
2398 struct io_pgtable_cfg *pgtbl_cfg)
2402 struct arm_smmu_device *smmu = smmu_domain->smmu;
2403 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2404 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2406 asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
2410 cfg->s1cdmax = master->ssid_bits;
2412 ret = arm_smmu_alloc_cd_tables(smmu_domain);
2416 cfg->cd.asid = (u16)asid;
2417 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2418 cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2419 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2420 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2421 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2422 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2423 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2424 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2425 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2428 * Note that this will end up calling arm_smmu_sync_cd() before
2429 * the master has been added to the devices list for this domain.
2430 * This isn't an issue because the STE hasn't been installed yet.
2432 ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2434 goto out_free_cd_tables;
2439 arm_smmu_free_cd_tables(smmu_domain);
2441 arm_smmu_bitmap_free(smmu->asid_map, asid);
2445 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2446 struct arm_smmu_master *master,
2447 struct io_pgtable_cfg *pgtbl_cfg)
2450 struct arm_smmu_device *smmu = smmu_domain->smmu;
2451 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2452 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2454 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2458 vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2459 cfg->vmid = (u16)vmid;
2460 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2461 cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2462 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2463 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2464 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2465 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2466 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2467 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2471 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2472 struct arm_smmu_master *master)
2475 unsigned long ias, oas;
2476 enum io_pgtable_fmt fmt;
2477 struct io_pgtable_cfg pgtbl_cfg;
2478 struct io_pgtable_ops *pgtbl_ops;
2479 int (*finalise_stage_fn)(struct arm_smmu_domain *,
2480 struct arm_smmu_master *,
2481 struct io_pgtable_cfg *);
2482 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2483 struct arm_smmu_device *smmu = smmu_domain->smmu;
2485 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2486 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2490 /* Restrict the stage to what we can actually support */
2491 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2492 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2493 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2494 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2496 switch (smmu_domain->stage) {
2497 case ARM_SMMU_DOMAIN_S1:
2498 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2499 ias = min_t(unsigned long, ias, VA_BITS);
2501 fmt = ARM_64_LPAE_S1;
2502 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2504 case ARM_SMMU_DOMAIN_NESTED:
2505 case ARM_SMMU_DOMAIN_S2:
2508 fmt = ARM_64_LPAE_S2;
2509 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2515 pgtbl_cfg = (struct io_pgtable_cfg) {
2516 .pgsize_bitmap = smmu->pgsize_bitmap,
2519 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2520 .tlb = &arm_smmu_flush_ops,
2521 .iommu_dev = smmu->dev,
2524 if (smmu_domain->non_strict)
2525 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2527 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2531 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2532 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2533 domain->geometry.force_aperture = true;
2535 ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2537 free_io_pgtable_ops(pgtbl_ops);
2541 smmu_domain->pgtbl_ops = pgtbl_ops;
2545 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2548 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2550 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2551 struct arm_smmu_strtab_l1_desc *l1_desc;
2554 /* Two-level walk */
2555 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2556 l1_desc = &cfg->l1_desc[idx];
2557 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2558 step = &l1_desc->l2ptr[idx];
2560 /* Simple linear lookup */
2561 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2567 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2570 struct arm_smmu_device *smmu = master->smmu;
2572 for (i = 0; i < master->num_sids; ++i) {
2573 u32 sid = master->sids[i];
2574 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2576 /* Bridged PCI devices may end up with duplicated IDs */
2577 for (j = 0; j < i; j++)
2578 if (master->sids[j] == sid)
2583 arm_smmu_write_strtab_ent(master, sid, step);
2587 #ifdef CONFIG_PCI_ATS
2588 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2590 struct pci_dev *pdev;
2591 struct arm_smmu_device *smmu = master->smmu;
2592 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2594 if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) ||
2595 !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled())
2598 pdev = to_pci_dev(master->dev);
2599 return !pdev->untrusted && pdev->ats_cap;
2602 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2608 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2611 struct pci_dev *pdev;
2612 struct arm_smmu_device *smmu = master->smmu;
2613 struct arm_smmu_domain *smmu_domain = master->domain;
2615 /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2616 if (!master->ats_enabled)
2619 /* Smallest Translation Unit: log2 of the smallest supported granule */
2620 stu = __ffs(smmu->pgsize_bitmap);
2621 pdev = to_pci_dev(master->dev);
2623 atomic_inc(&smmu_domain->nr_ats_masters);
2624 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2625 if (pci_enable_ats(pdev, stu))
2626 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2629 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2631 struct arm_smmu_cmdq_ent cmd;
2632 struct arm_smmu_domain *smmu_domain = master->domain;
2634 if (!master->ats_enabled)
2637 pci_disable_ats(to_pci_dev(master->dev));
2639 * Ensure ATS is disabled at the endpoint before we issue the
2640 * ATC invalidation via the SMMU.
2643 arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
2644 arm_smmu_atc_inv_master(master, &cmd);
2645 atomic_dec(&smmu_domain->nr_ats_masters);
2648 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2653 struct pci_dev *pdev;
2655 if (!dev_is_pci(master->dev))
2658 pdev = to_pci_dev(master->dev);
2660 features = pci_pasid_features(pdev);
2664 num_pasids = pci_max_pasids(pdev);
2665 if (num_pasids <= 0)
2668 ret = pci_enable_pasid(pdev, features);
2670 dev_err(&pdev->dev, "Failed to enable PASID\n");
2674 master->ssid_bits = min_t(u8, ilog2(num_pasids),
2675 master->smmu->ssid_bits);
2679 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2681 struct pci_dev *pdev;
2683 if (!dev_is_pci(master->dev))
2686 pdev = to_pci_dev(master->dev);
2688 if (!pdev->pasid_enabled)
2691 master->ssid_bits = 0;
2692 pci_disable_pasid(pdev);
2695 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2697 unsigned long flags;
2698 struct arm_smmu_domain *smmu_domain = master->domain;
2703 arm_smmu_disable_ats(master);
2705 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2706 list_del(&master->domain_head);
2707 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2709 master->domain = NULL;
2710 master->ats_enabled = false;
2711 arm_smmu_install_ste_for_dev(master);
2714 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2717 unsigned long flags;
2718 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2719 struct arm_smmu_device *smmu;
2720 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2721 struct arm_smmu_master *master;
2726 master = fwspec->iommu_priv;
2727 smmu = master->smmu;
2729 arm_smmu_detach_dev(master);
2731 mutex_lock(&smmu_domain->init_mutex);
2733 if (!smmu_domain->smmu) {
2734 smmu_domain->smmu = smmu;
2735 ret = arm_smmu_domain_finalise(domain, master);
2737 smmu_domain->smmu = NULL;
2740 } else if (smmu_domain->smmu != smmu) {
2742 "cannot attach to SMMU %s (upstream of %s)\n",
2743 dev_name(smmu_domain->smmu->dev),
2744 dev_name(smmu->dev));
2747 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2748 master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2750 "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2751 smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2756 master->domain = smmu_domain;
2758 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2759 master->ats_enabled = arm_smmu_ats_supported(master);
2761 arm_smmu_install_ste_for_dev(master);
2763 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2764 list_add(&master->domain_head, &smmu_domain->devices);
2765 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2767 arm_smmu_enable_ats(master);
2770 mutex_unlock(&smmu_domain->init_mutex);
2774 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2775 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2777 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2782 return ops->map(ops, iova, paddr, size, prot);
2785 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2786 size_t size, struct iommu_iotlb_gather *gather)
2788 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2789 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2794 return ops->unmap(ops, iova, size, gather);
2797 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2799 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2801 if (smmu_domain->smmu)
2802 arm_smmu_tlb_inv_context(smmu_domain);
2805 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2806 struct iommu_iotlb_gather *gather)
2808 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2810 arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
2811 gather->pgsize, true, smmu_domain);
2815 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2817 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2819 if (domain->type == IOMMU_DOMAIN_IDENTITY)
2825 return ops->iova_to_phys(ops, iova);
2828 static struct platform_driver arm_smmu_driver;
2831 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2833 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2836 return dev ? dev_get_drvdata(dev) : NULL;
2839 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2841 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2843 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2844 limit *= 1UL << STRTAB_SPLIT;
2849 static struct iommu_ops arm_smmu_ops;
2851 static int arm_smmu_add_device(struct device *dev)
2854 struct arm_smmu_device *smmu;
2855 struct arm_smmu_master *master;
2856 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2857 struct iommu_group *group;
2859 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2862 if (WARN_ON_ONCE(fwspec->iommu_priv))
2865 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2869 master = kzalloc(sizeof(*master), GFP_KERNEL);
2874 master->smmu = smmu;
2875 master->sids = fwspec->ids;
2876 master->num_sids = fwspec->num_ids;
2877 fwspec->iommu_priv = master;
2879 /* Check the SIDs are in range of the SMMU and our stream table */
2880 for (i = 0; i < master->num_sids; i++) {
2881 u32 sid = master->sids[i];
2883 if (!arm_smmu_sid_in_range(smmu, sid)) {
2885 goto err_free_master;
2888 /* Ensure l2 strtab is initialised */
2889 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2890 ret = arm_smmu_init_l2_strtab(smmu, sid);
2892 goto err_free_master;
2896 master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2899 * Note that PASID must be enabled before, and disabled after ATS:
2900 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2902 * Behavior is undefined if this bit is Set and the value of the PASID
2903 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
2906 arm_smmu_enable_pasid(master);
2908 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2909 master->ssid_bits = min_t(u8, master->ssid_bits,
2910 CTXDESC_LINEAR_CDMAX);
2912 ret = iommu_device_link(&smmu->iommu, dev);
2914 goto err_disable_pasid;
2916 group = iommu_group_get_for_dev(dev);
2917 if (IS_ERR(group)) {
2918 ret = PTR_ERR(group);
2922 iommu_group_put(group);
2926 iommu_device_unlink(&smmu->iommu, dev);
2928 arm_smmu_disable_pasid(master);
2931 fwspec->iommu_priv = NULL;
2935 static void arm_smmu_remove_device(struct device *dev)
2937 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2938 struct arm_smmu_master *master;
2939 struct arm_smmu_device *smmu;
2941 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2944 master = fwspec->iommu_priv;
2945 smmu = master->smmu;
2946 arm_smmu_detach_dev(master);
2947 iommu_group_remove_device(dev);
2948 iommu_device_unlink(&smmu->iommu, dev);
2949 arm_smmu_disable_pasid(master);
2951 iommu_fwspec_free(dev);
2954 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2956 struct iommu_group *group;
2959 * We don't support devices sharing stream IDs other than PCI RID
2960 * aliases, since the necessary ID-to-device lookup becomes rather
2961 * impractical given a potential sparse 32-bit stream ID space.
2963 if (dev_is_pci(dev))
2964 group = pci_device_group(dev);
2966 group = generic_device_group(dev);
2971 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2972 enum iommu_attr attr, void *data)
2974 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2976 switch (domain->type) {
2977 case IOMMU_DOMAIN_UNMANAGED:
2979 case DOMAIN_ATTR_NESTING:
2980 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2986 case IOMMU_DOMAIN_DMA:
2988 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2989 *(int *)data = smmu_domain->non_strict;
3000 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
3001 enum iommu_attr attr, void *data)
3004 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3006 mutex_lock(&smmu_domain->init_mutex);
3008 switch (domain->type) {
3009 case IOMMU_DOMAIN_UNMANAGED:
3011 case DOMAIN_ATTR_NESTING:
3012 if (smmu_domain->smmu) {
3018 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
3020 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
3026 case IOMMU_DOMAIN_DMA:
3028 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
3029 smmu_domain->non_strict = *(int *)data;
3040 mutex_unlock(&smmu_domain->init_mutex);
3044 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
3046 return iommu_fwspec_add_ids(dev, args->args, 1);
3049 static void arm_smmu_get_resv_regions(struct device *dev,
3050 struct list_head *head)
3052 struct iommu_resv_region *region;
3053 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
3055 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
3056 prot, IOMMU_RESV_SW_MSI);
3060 list_add_tail(®ion->list, head);
3062 iommu_dma_get_resv_regions(dev, head);
3065 static struct iommu_ops arm_smmu_ops = {
3066 .capable = arm_smmu_capable,
3067 .domain_alloc = arm_smmu_domain_alloc,
3068 .domain_free = arm_smmu_domain_free,
3069 .attach_dev = arm_smmu_attach_dev,
3070 .map = arm_smmu_map,
3071 .unmap = arm_smmu_unmap,
3072 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
3073 .iotlb_sync = arm_smmu_iotlb_sync,
3074 .iova_to_phys = arm_smmu_iova_to_phys,
3075 .add_device = arm_smmu_add_device,
3076 .remove_device = arm_smmu_remove_device,
3077 .device_group = arm_smmu_device_group,
3078 .domain_get_attr = arm_smmu_domain_get_attr,
3079 .domain_set_attr = arm_smmu_domain_set_attr,
3080 .of_xlate = arm_smmu_of_xlate,
3081 .get_resv_regions = arm_smmu_get_resv_regions,
3082 .put_resv_regions = generic_iommu_put_resv_regions,
3083 .pgsize_bitmap = -1UL, /* Restricted during device attach */
3086 /* Probing and initialisation functions */
3087 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3088 struct arm_smmu_queue *q,
3089 unsigned long prod_off,
3090 unsigned long cons_off,
3091 size_t dwords, const char *name)
3096 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3097 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3099 if (q->base || qsz < PAGE_SIZE)
3102 q->llq.max_n_shift--;
3107 "failed to allocate queue (0x%zx bytes) for %s\n",
3112 if (!WARN_ON(q->base_dma & (qsz - 1))) {
3113 dev_info(smmu->dev, "allocated %u entries for %s\n",
3114 1 << q->llq.max_n_shift, name);
3117 q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
3118 q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
3119 q->ent_dwords = dwords;
3121 q->q_base = Q_BASE_RWA;
3122 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3123 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3125 q->llq.prod = q->llq.cons = 0;
3129 static void arm_smmu_cmdq_free_bitmap(void *data)
3131 unsigned long *bitmap = data;
3132 bitmap_free(bitmap);
3135 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
3138 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
3139 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3140 atomic_long_t *bitmap;
3142 atomic_set(&cmdq->owner_prod, 0);
3143 atomic_set(&cmdq->lock, 0);
3145 bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
3147 dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
3150 cmdq->valid_map = bitmap;
3151 devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
3157 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3162 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
3163 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
3168 ret = arm_smmu_cmdq_init(smmu);
3173 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
3174 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
3180 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3183 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
3184 ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
3188 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
3191 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3192 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
3193 void *strtab = smmu->strtab_cfg.strtab;
3195 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
3196 if (!cfg->l1_desc) {
3197 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
3201 for (i = 0; i < cfg->num_l1_ents; ++i) {
3202 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3203 strtab += STRTAB_L1_DESC_DWORDS << 3;
3209 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3214 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3216 /* Calculate the L1 size, capped to the SIDSIZE. */
3217 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3218 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3219 cfg->num_l1_ents = 1 << size;
3221 size += STRTAB_SPLIT;
3222 if (size < smmu->sid_bits)
3224 "2-level strtab only covers %u/%u bits of SID\n",
3225 size, smmu->sid_bits);
3227 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3228 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3232 "failed to allocate l1 stream table (%u bytes)\n",
3236 cfg->strtab = strtab;
3238 /* Configure strtab_base_cfg for 2 levels */
3239 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3240 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3241 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3242 cfg->strtab_base_cfg = reg;
3244 return arm_smmu_init_l1_strtab(smmu);
3247 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3252 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3254 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3255 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3259 "failed to allocate linear stream table (%u bytes)\n",
3263 cfg->strtab = strtab;
3264 cfg->num_l1_ents = 1 << smmu->sid_bits;
3266 /* Configure strtab_base_cfg for a linear table covering all SIDs */
3267 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3268 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3269 cfg->strtab_base_cfg = reg;
3271 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3275 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3280 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3281 ret = arm_smmu_init_strtab_2lvl(smmu);
3283 ret = arm_smmu_init_strtab_linear(smmu);
3288 /* Set the strtab base address */
3289 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3290 reg |= STRTAB_BASE_RA;
3291 smmu->strtab_cfg.strtab_base = reg;
3293 /* Allocate the first VMID for stage-2 bypass STEs */
3294 set_bit(0, smmu->vmid_map);
3298 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3302 ret = arm_smmu_init_queues(smmu);
3306 return arm_smmu_init_strtab(smmu);
3309 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3310 unsigned int reg_off, unsigned int ack_off)
3314 writel_relaxed(val, smmu->base + reg_off);
3315 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3316 1, ARM_SMMU_POLL_TIMEOUT_US);
3319 /* GBPA is "special" */
3320 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3323 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3325 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3326 1, ARM_SMMU_POLL_TIMEOUT_US);
3332 writel_relaxed(reg | GBPA_UPDATE, gbpa);
3333 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3334 1, ARM_SMMU_POLL_TIMEOUT_US);
3337 dev_err(smmu->dev, "GBPA not responding to update\n");
3341 static void arm_smmu_free_msis(void *data)
3343 struct device *dev = data;
3344 platform_msi_domain_free_irqs(dev);
3347 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3349 phys_addr_t doorbell;
3350 struct device *dev = msi_desc_to_dev(desc);
3351 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3352 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3354 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3355 doorbell &= MSI_CFG0_ADDR_MASK;
3357 writeq_relaxed(doorbell, smmu->base + cfg[0]);
3358 writel_relaxed(msg->data, smmu->base + cfg[1]);
3359 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3362 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3364 struct msi_desc *desc;
3365 int ret, nvec = ARM_SMMU_MAX_MSIS;
3366 struct device *dev = smmu->dev;
3368 /* Clear the MSI address regs */
3369 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3370 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3372 if (smmu->features & ARM_SMMU_FEAT_PRI)
3373 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3377 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3380 if (!dev->msi_domain) {
3381 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3385 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3386 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3388 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3392 for_each_msi_entry(desc, dev) {
3393 switch (desc->platform.msi_index) {
3394 case EVTQ_MSI_INDEX:
3395 smmu->evtq.q.irq = desc->irq;
3397 case GERROR_MSI_INDEX:
3398 smmu->gerr_irq = desc->irq;
3400 case PRIQ_MSI_INDEX:
3401 smmu->priq.q.irq = desc->irq;
3403 default: /* Unknown */
3408 /* Add callback to free MSIs on teardown */
3409 devm_add_action(dev, arm_smmu_free_msis, dev);
3412 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3416 arm_smmu_setup_msis(smmu);
3418 /* Request interrupt lines */
3419 irq = smmu->evtq.q.irq;
3421 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3422 arm_smmu_evtq_thread,
3424 "arm-smmu-v3-evtq", smmu);
3426 dev_warn(smmu->dev, "failed to enable evtq irq\n");
3428 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3431 irq = smmu->gerr_irq;
3433 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3434 0, "arm-smmu-v3-gerror", smmu);
3436 dev_warn(smmu->dev, "failed to enable gerror irq\n");
3438 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3441 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3442 irq = smmu->priq.q.irq;
3444 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3445 arm_smmu_priq_thread,
3451 "failed to enable priq irq\n");
3453 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3458 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3461 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3463 /* Disable IRQs first */
3464 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3465 ARM_SMMU_IRQ_CTRLACK);
3467 dev_err(smmu->dev, "failed to disable irqs\n");
3471 irq = smmu->combined_irq;
3474 * Cavium ThunderX2 implementation doesn't support unique irq
3475 * lines. Use a single irq line for all the SMMUv3 interrupts.
3477 ret = devm_request_threaded_irq(smmu->dev, irq,
3478 arm_smmu_combined_irq_handler,
3479 arm_smmu_combined_irq_thread,
3481 "arm-smmu-v3-combined-irq", smmu);
3483 dev_warn(smmu->dev, "failed to enable combined irq\n");
3485 arm_smmu_setup_unique_irqs(smmu);
3487 if (smmu->features & ARM_SMMU_FEAT_PRI)
3488 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3490 /* Enable interrupt generation on the SMMU */
3491 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3492 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3494 dev_warn(smmu->dev, "failed to enable irqs\n");
3499 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3503 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3505 dev_err(smmu->dev, "failed to clear cr0\n");
3510 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3514 struct arm_smmu_cmdq_ent cmd;
3516 /* Clear CR0 and sync (disables SMMU and queue processing) */
3517 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3518 if (reg & CR0_SMMUEN) {
3519 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3520 WARN_ON(is_kdump_kernel() && !disable_bypass);
3521 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3524 ret = arm_smmu_device_disable(smmu);
3528 /* CR1 (table and queue memory attributes) */
3529 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3530 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3531 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3532 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3533 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3534 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3535 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3537 /* CR2 (random crap) */
3538 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3539 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3542 writeq_relaxed(smmu->strtab_cfg.strtab_base,
3543 smmu->base + ARM_SMMU_STRTAB_BASE);
3544 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3545 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3548 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3549 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3550 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3552 enables = CR0_CMDQEN;
3553 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3556 dev_err(smmu->dev, "failed to enable command queue\n");
3560 /* Invalidate any cached configuration */
3561 cmd.opcode = CMDQ_OP_CFGI_ALL;
3562 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3563 arm_smmu_cmdq_issue_sync(smmu);
3565 /* Invalidate any stale TLB entries */
3566 if (smmu->features & ARM_SMMU_FEAT_HYP) {
3567 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3568 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3571 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3572 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3573 arm_smmu_cmdq_issue_sync(smmu);
3576 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3577 writel_relaxed(smmu->evtq.q.llq.prod,
3578 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3579 writel_relaxed(smmu->evtq.q.llq.cons,
3580 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3582 enables |= CR0_EVTQEN;
3583 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3586 dev_err(smmu->dev, "failed to enable event queue\n");
3591 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3592 writeq_relaxed(smmu->priq.q.q_base,
3593 smmu->base + ARM_SMMU_PRIQ_BASE);
3594 writel_relaxed(smmu->priq.q.llq.prod,
3595 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3596 writel_relaxed(smmu->priq.q.llq.cons,
3597 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3599 enables |= CR0_PRIQEN;
3600 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3603 dev_err(smmu->dev, "failed to enable PRI queue\n");
3608 if (smmu->features & ARM_SMMU_FEAT_ATS) {
3609 enables |= CR0_ATSCHK;
3610 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3613 dev_err(smmu->dev, "failed to enable ATS check\n");
3618 ret = arm_smmu_setup_irqs(smmu);
3620 dev_err(smmu->dev, "failed to setup irqs\n");
3624 if (is_kdump_kernel())
3625 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3627 /* Enable the SMMU interface, or ensure bypass */
3628 if (!bypass || disable_bypass) {
3629 enables |= CR0_SMMUEN;
3631 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3635 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3638 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3645 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3648 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3651 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3653 /* 2-level structures */
3654 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3655 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3657 if (reg & IDR0_CD2L)
3658 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3661 * Translation table endianness.
3662 * We currently require the same endianness as the CPU, but this
3663 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3665 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3666 case IDR0_TTENDIAN_MIXED:
3667 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3670 case IDR0_TTENDIAN_BE:
3671 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3674 case IDR0_TTENDIAN_LE:
3675 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3679 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3683 /* Boolean feature flags */
3684 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3685 smmu->features |= ARM_SMMU_FEAT_PRI;
3687 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3688 smmu->features |= ARM_SMMU_FEAT_ATS;
3691 smmu->features |= ARM_SMMU_FEAT_SEV;
3694 smmu->features |= ARM_SMMU_FEAT_MSI;
3697 smmu->features |= ARM_SMMU_FEAT_HYP;
3700 * The coherency feature as set by FW is used in preference to the ID
3701 * register, but warn on mismatch.
3703 if (!!(reg & IDR0_COHACC) != coherent)
3704 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3705 coherent ? "true" : "false");
3707 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3708 case IDR0_STALL_MODEL_FORCE:
3709 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3711 case IDR0_STALL_MODEL_STALL:
3712 smmu->features |= ARM_SMMU_FEAT_STALLS;
3716 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3719 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3721 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3722 dev_err(smmu->dev, "no translation support!\n");
3726 /* We only support the AArch64 table format at present */
3727 switch (FIELD_GET(IDR0_TTF, reg)) {
3728 case IDR0_TTF_AARCH32_64:
3731 case IDR0_TTF_AARCH64:
3734 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3738 /* ASID/VMID sizes */
3739 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3740 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3743 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3744 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3745 dev_err(smmu->dev, "embedded implementation not supported\n");
3749 /* Queue sizes, capped to ensure natural alignment */
3750 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3751 FIELD_GET(IDR1_CMDQS, reg));
3752 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3754 * We don't support splitting up batches, so one batch of
3755 * commands plus an extra sync needs to fit inside the command
3756 * queue. There's also no way we can handle the weird alignment
3757 * restrictions on the base pointer for a unit-length queue.
3759 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3760 CMDQ_BATCH_ENTRIES);
3764 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3765 FIELD_GET(IDR1_EVTQS, reg));
3766 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3767 FIELD_GET(IDR1_PRIQS, reg));
3769 /* SID/SSID sizes */
3770 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3771 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3774 * If the SMMU supports fewer bits than would fill a single L2 stream
3775 * table, use a linear table instead.
3777 if (smmu->sid_bits <= STRTAB_SPLIT)
3778 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3781 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3783 /* Maximum number of outstanding stalls */
3784 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3787 if (reg & IDR5_GRAN64K)
3788 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3789 if (reg & IDR5_GRAN16K)
3790 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3791 if (reg & IDR5_GRAN4K)
3792 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3794 /* Input address size */
3795 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3796 smmu->features |= ARM_SMMU_FEAT_VAX;
3798 /* Output address size */
3799 switch (FIELD_GET(IDR5_OAS, reg)) {
3800 case IDR5_OAS_32_BIT:
3803 case IDR5_OAS_36_BIT:
3806 case IDR5_OAS_40_BIT:
3809 case IDR5_OAS_42_BIT:
3812 case IDR5_OAS_44_BIT:
3815 case IDR5_OAS_52_BIT:
3817 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3821 "unknown output address size. Truncating to 48-bit\n");
3823 case IDR5_OAS_48_BIT:
3827 if (arm_smmu_ops.pgsize_bitmap == -1UL)
3828 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3830 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3832 /* Set the DMA mask for our table walker */
3833 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3835 "failed to set DMA mask for table walker\n");
3837 smmu->ias = max(smmu->ias, smmu->oas);
3839 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3840 smmu->ias, smmu->oas, smmu->features);
3845 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3848 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3849 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3851 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3852 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3856 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3859 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3860 struct arm_smmu_device *smmu)
3862 struct acpi_iort_smmu_v3 *iort_smmu;
3863 struct device *dev = smmu->dev;
3864 struct acpi_iort_node *node;
3866 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3868 /* Retrieve SMMUv3 specific data */
3869 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3871 acpi_smmu_get_options(iort_smmu->model, smmu);
3873 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3874 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3879 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3880 struct arm_smmu_device *smmu)
3886 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3887 struct arm_smmu_device *smmu)
3889 struct device *dev = &pdev->dev;
3893 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3894 dev_err(dev, "missing #iommu-cells property\n");
3895 else if (cells != 1)
3896 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3900 parse_driver_options(smmu);
3902 if (of_dma_is_coherent(dev->of_node))
3903 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3908 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3910 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3916 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3921 if (pci_bus_type.iommu_ops != ops) {
3922 err = bus_set_iommu(&pci_bus_type, ops);
3927 #ifdef CONFIG_ARM_AMBA
3928 if (amba_bustype.iommu_ops != ops) {
3929 err = bus_set_iommu(&amba_bustype, ops);
3931 goto err_reset_pci_ops;
3934 if (platform_bus_type.iommu_ops != ops) {
3935 err = bus_set_iommu(&platform_bus_type, ops);
3937 goto err_reset_amba_ops;
3943 #ifdef CONFIG_ARM_AMBA
3944 bus_set_iommu(&amba_bustype, NULL);
3946 err_reset_pci_ops: __maybe_unused;
3948 bus_set_iommu(&pci_bus_type, NULL);
3953 static int arm_smmu_device_probe(struct platform_device *pdev)
3956 struct resource *res;
3957 resource_size_t ioaddr;
3958 struct arm_smmu_device *smmu;
3959 struct device *dev = &pdev->dev;
3962 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3964 dev_err(dev, "failed to allocate arm_smmu_device\n");
3970 ret = arm_smmu_device_dt_probe(pdev, smmu);
3972 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3977 /* Set bypass mode according to firmware probing result */
3981 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3982 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3983 dev_err(dev, "MMIO region too small (%pr)\n", res);
3986 ioaddr = res->start;
3988 smmu->base = devm_ioremap_resource(dev, res);
3989 if (IS_ERR(smmu->base))
3990 return PTR_ERR(smmu->base);
3992 /* Interrupt lines */
3994 irq = platform_get_irq_byname_optional(pdev, "combined");
3996 smmu->combined_irq = irq;
3998 irq = platform_get_irq_byname_optional(pdev, "eventq");
4000 smmu->evtq.q.irq = irq;
4002 irq = platform_get_irq_byname_optional(pdev, "priq");
4004 smmu->priq.q.irq = irq;
4006 irq = platform_get_irq_byname_optional(pdev, "gerror");
4008 smmu->gerr_irq = irq;
4011 ret = arm_smmu_device_hw_probe(smmu);
4015 /* Initialise in-memory data structures */
4016 ret = arm_smmu_init_structures(smmu);
4020 /* Record our private device structure */
4021 platform_set_drvdata(pdev, smmu);
4023 /* Reset the device */
4024 ret = arm_smmu_device_reset(smmu, bypass);
4028 /* And we're up. Go go go! */
4029 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4030 "smmu3.%pa", &ioaddr);
4034 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
4035 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
4037 ret = iommu_device_register(&smmu->iommu);
4039 dev_err(dev, "Failed to register iommu\n");
4043 return arm_smmu_set_bus_ops(&arm_smmu_ops);
4046 static int arm_smmu_device_remove(struct platform_device *pdev)
4048 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4050 arm_smmu_set_bus_ops(NULL);
4051 iommu_device_unregister(&smmu->iommu);
4052 iommu_device_sysfs_remove(&smmu->iommu);
4053 arm_smmu_device_disable(smmu);
4058 static void arm_smmu_device_shutdown(struct platform_device *pdev)
4060 arm_smmu_device_remove(pdev);
4063 static const struct of_device_id arm_smmu_of_match[] = {
4064 { .compatible = "arm,smmu-v3", },
4067 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4069 static struct platform_driver arm_smmu_driver = {
4071 .name = "arm-smmu-v3",
4072 .of_match_table = arm_smmu_of_match,
4073 .suppress_bind_attrs = true,
4075 .probe = arm_smmu_device_probe,
4076 .remove = arm_smmu_device_remove,
4077 .shutdown = arm_smmu_device_shutdown,
4079 module_platform_driver(arm_smmu_driver);
4081 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4082 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4083 MODULE_ALIAS("platform:arm-smmu-v3");
4084 MODULE_LICENSE("GPL v2");