1 // SPDX-License-Identifier: GPL-2.0-only
2 // Copyright (C) 2019-2020 NVIDIA CORPORATION. All rights reserved.
4 #include <linux/bitfield.h>
5 #include <linux/delay.h>
7 #include <linux/platform_device.h>
8 #include <linux/slab.h>
13 * Tegra194 has three ARM MMU-500 Instances.
14 * Two of them are used together and must be programmed identically for
15 * interleaved IOVA accesses across them and translates accesses from
16 * non-isochronous HW devices.
17 * Third one is used for translating accesses from isochronous HW devices.
18 * This implementation supports programming of the two instances that must
19 * be programmed identically.
20 * The third instance usage is through standard arm-smmu driver itself and
21 * is out of scope of this implementation.
23 #define MAX_SMMU_INSTANCES 2
26 struct arm_smmu_device smmu;
27 void __iomem *bases[MAX_SMMU_INSTANCES];
28 unsigned int num_instances;
31 static inline struct nvidia_smmu *to_nvidia_smmu(struct arm_smmu_device *smmu)
33 return container_of(smmu, struct nvidia_smmu, smmu);
36 static inline void __iomem *nvidia_smmu_page(struct arm_smmu_device *smmu,
37 unsigned int inst, int page)
39 struct nvidia_smmu *nvidia_smmu;
41 nvidia_smmu = container_of(smmu, struct nvidia_smmu, smmu);
42 return nvidia_smmu->bases[inst] + (page << smmu->pgshift);
45 static u32 nvidia_smmu_read_reg(struct arm_smmu_device *smmu,
48 void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset;
50 return readl_relaxed(reg);
53 static void nvidia_smmu_write_reg(struct arm_smmu_device *smmu,
54 int page, int offset, u32 val)
56 struct nvidia_smmu *nvidia = to_nvidia_smmu(smmu);
59 for (i = 0; i < nvidia->num_instances; i++) {
60 void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset;
62 writel_relaxed(val, reg);
66 static u64 nvidia_smmu_read_reg64(struct arm_smmu_device *smmu,
69 void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset;
71 return readq_relaxed(reg);
74 static void nvidia_smmu_write_reg64(struct arm_smmu_device *smmu,
75 int page, int offset, u64 val)
77 struct nvidia_smmu *nvidia = to_nvidia_smmu(smmu);
80 for (i = 0; i < nvidia->num_instances; i++) {
81 void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset;
83 writeq_relaxed(val, reg);
87 static void nvidia_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
90 struct nvidia_smmu *nvidia = to_nvidia_smmu(smmu);
93 arm_smmu_writel(smmu, page, sync, 0);
95 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
96 unsigned int spin_cnt;
98 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
102 for (i = 0; i < nvidia->num_instances; i++) {
105 reg = nvidia_smmu_page(smmu, i, page) + status;
106 val |= readl_relaxed(reg);
109 if (!(val & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
118 dev_err_ratelimited(smmu->dev,
119 "TLB sync timed out -- SMMU may be deadlocked\n");
122 static int nvidia_smmu_reset(struct arm_smmu_device *smmu)
124 struct nvidia_smmu *nvidia = to_nvidia_smmu(smmu);
127 for (i = 0; i < nvidia->num_instances; i++) {
129 void __iomem *reg = nvidia_smmu_page(smmu, i, ARM_SMMU_GR0) +
132 /* clear global FSR */
133 val = readl_relaxed(reg);
134 writel_relaxed(val, reg);
140 static irqreturn_t nvidia_smmu_global_fault_inst(int irq,
141 struct arm_smmu_device *smmu,
144 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
145 void __iomem *gr0_base = nvidia_smmu_page(smmu, inst, 0);
147 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
151 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
152 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
153 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
155 dev_err_ratelimited(smmu->dev,
156 "Unexpected global fault, this could be serious\n");
157 dev_err_ratelimited(smmu->dev,
158 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
159 gfsr, gfsynr0, gfsynr1, gfsynr2);
161 writel_relaxed(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
165 static irqreturn_t nvidia_smmu_global_fault(int irq, void *dev)
168 irqreturn_t ret = IRQ_NONE;
169 struct arm_smmu_device *smmu = dev;
170 struct nvidia_smmu *nvidia = to_nvidia_smmu(smmu);
172 for (inst = 0; inst < nvidia->num_instances; inst++) {
175 irq_ret = nvidia_smmu_global_fault_inst(irq, smmu, inst);
176 if (irq_ret == IRQ_HANDLED)
183 static irqreturn_t nvidia_smmu_context_fault_bank(int irq,
184 struct arm_smmu_device *smmu,
187 u32 fsr, fsynr, cbfrsynra;
189 void __iomem *gr1_base = nvidia_smmu_page(smmu, inst, 1);
190 void __iomem *cb_base = nvidia_smmu_page(smmu, inst, smmu->numpage + idx);
192 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
193 if (!(fsr & ARM_SMMU_FSR_FAULT))
196 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
197 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
198 cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(idx));
200 dev_err_ratelimited(smmu->dev,
201 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
202 fsr, iova, fsynr, cbfrsynra, idx);
204 writel_relaxed(fsr, cb_base + ARM_SMMU_CB_FSR);
208 static irqreturn_t nvidia_smmu_context_fault(int irq, void *dev)
212 irqreturn_t ret = IRQ_NONE;
213 struct arm_smmu_device *smmu;
214 struct iommu_domain *domain = dev;
215 struct arm_smmu_domain *smmu_domain;
216 struct nvidia_smmu *nvidia;
218 smmu_domain = container_of(domain, struct arm_smmu_domain, domain);
219 smmu = smmu_domain->smmu;
220 nvidia = to_nvidia_smmu(smmu);
222 for (inst = 0; inst < nvidia->num_instances; inst++) {
226 * Interrupt line is shared between all contexts.
227 * Check for faults across all contexts.
229 for (idx = 0; idx < smmu->num_context_banks; idx++) {
230 irq_ret = nvidia_smmu_context_fault_bank(irq, smmu,
232 if (irq_ret == IRQ_HANDLED)
240 static const struct arm_smmu_impl nvidia_smmu_impl = {
241 .read_reg = nvidia_smmu_read_reg,
242 .write_reg = nvidia_smmu_write_reg,
243 .read_reg64 = nvidia_smmu_read_reg64,
244 .write_reg64 = nvidia_smmu_write_reg64,
245 .reset = nvidia_smmu_reset,
246 .tlb_sync = nvidia_smmu_tlb_sync,
247 .global_fault = nvidia_smmu_global_fault,
248 .context_fault = nvidia_smmu_context_fault,
251 static const struct arm_smmu_impl nvidia_smmu_single_impl = {
254 struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
256 struct resource *res;
257 struct device *dev = smmu->dev;
258 struct nvidia_smmu *nvidia_smmu;
259 struct platform_device *pdev = to_platform_device(dev);
262 nvidia_smmu = devm_krealloc(dev, smmu, sizeof(*nvidia_smmu), GFP_KERNEL);
264 return ERR_PTR(-ENOMEM);
266 /* Instance 0 is ioremapped by arm-smmu.c. */
267 nvidia_smmu->bases[0] = smmu->base;
268 nvidia_smmu->num_instances++;
270 for (i = 1; i < MAX_SMMU_INSTANCES; i++) {
271 res = platform_get_resource(pdev, IORESOURCE_MEM, i);
275 nvidia_smmu->bases[i] = devm_ioremap_resource(dev, res);
276 if (IS_ERR(nvidia_smmu->bases[i]))
277 return ERR_CAST(nvidia_smmu->bases[i]);
279 nvidia_smmu->num_instances++;
282 if (nvidia_smmu->num_instances == 1)
283 nvidia_smmu->smmu.impl = &nvidia_smmu_single_impl;
285 nvidia_smmu->smmu.impl = &nvidia_smmu_impl;
287 return &nvidia_smmu->smmu;