1 // SPDX-License-Identifier: GPL-2.0+
3 * TCE helpers for IODA PCI/PCIe on PowerNV platforms
5 * Copyright 2018 IBM Corp.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 #include <linux/kernel.h>
14 #include <linux/iommu.h>
16 #include <asm/iommu.h>
20 unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
22 struct pci_controller *hose = phb->hose;
23 struct device_node *dn = hose->dn;
24 unsigned long mask = 0;
28 count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
30 mask = SZ_4K | SZ_64K;
31 /* Add 16M for POWER8 by default */
32 if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
33 !cpu_has_feature(CPU_FTR_ARCH_300))
34 mask |= SZ_16M | SZ_256M;
38 for (i = 0; i < count; i++) {
39 rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
48 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
49 void *tce_mem, u64 tce_size,
50 u64 dma_offset, unsigned int page_shift)
52 tbl->it_blocksize = 16;
53 tbl->it_base = (unsigned long)tce_mem;
54 tbl->it_page_shift = page_shift;
55 tbl->it_offset = dma_offset >> tbl->it_page_shift;
57 tbl->it_size = tce_size >> 3;
59 tbl->it_type = TCE_PCI;
62 static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
64 struct page *tce_mem = NULL;
67 tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
70 pr_err("Failed to allocate a TCE memory, level shift=%d\n",
74 addr = page_address(tce_mem);
75 memset(addr, 0, 1UL << shift);
80 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
81 unsigned long size, unsigned int levels);
83 static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
85 __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
86 int level = tbl->it_indirect_levels;
87 const long shift = ilog2(tbl->it_level_size);
88 unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
91 int n = (idx & mask) >> (level * shift);
92 unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
100 tmp2 = pnv_alloc_tce_level(tbl->it_nid,
101 ilog2(tbl->it_level_size) + 3);
105 tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
106 oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
109 pnv_pci_ioda2_table_do_free_pages(tmp2,
110 ilog2(tbl->it_level_size) + 3, 1);
115 tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
124 int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
125 unsigned long uaddr, enum dma_data_direction direction,
128 u64 proto_tce = iommu_direction_to_tce_perm(direction);
129 u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
132 if (proto_tce & TCE_PCI_WRITE)
133 proto_tce |= TCE_PCI_READ;
135 for (i = 0; i < npages; i++) {
136 unsigned long newtce = proto_tce |
137 ((rpn + i) << tbl->it_page_shift);
138 unsigned long idx = index - tbl->it_offset + i;
140 *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
146 #ifdef CONFIG_IOMMU_API
147 int pnv_tce_xchg(struct iommu_table *tbl, long index,
148 unsigned long *hpa, enum dma_data_direction *direction,
151 u64 proto_tce = iommu_direction_to_tce_perm(*direction);
152 unsigned long newtce = *hpa | proto_tce, oldtce;
153 unsigned long idx = index - tbl->it_offset;
156 BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
158 if (*direction == DMA_NONE) {
159 ptce = pnv_tce(tbl, false, idx, false);
167 ptce = pnv_tce(tbl, false, idx, alloc);
172 if (newtce & TCE_PCI_WRITE)
173 newtce |= TCE_PCI_READ;
175 oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
176 *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
177 *direction = iommu_tce_direction(oldtce);
182 __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
184 if (WARN_ON_ONCE(!tbl->it_userspace))
187 return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
191 void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
195 for (i = 0; i < npages; i++) {
196 unsigned long idx = index - tbl->it_offset + i;
197 __be64 *ptce = pnv_tce(tbl, false, idx, false);
200 *ptce = cpu_to_be64(0);
202 /* Skip the rest of the level */
203 i |= tbl->it_level_size - 1;
207 unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
209 __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
214 return be64_to_cpu(*ptce);
217 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
218 unsigned long size, unsigned int levels)
220 const unsigned long addr_ul = (unsigned long) addr &
221 ~(TCE_PCI_READ | TCE_PCI_WRITE);
225 u64 *tmp = (u64 *) addr_ul;
227 for (i = 0; i < size; ++i) {
228 unsigned long hpa = be64_to_cpu(tmp[i]);
230 if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
233 pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
238 free_pages(addr_ul, get_order(size << 3));
241 void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
243 const unsigned long size = tbl->it_indirect_levels ?
244 tbl->it_level_size : tbl->it_size;
249 pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
250 tbl->it_indirect_levels);
251 if (tbl->it_userspace) {
252 pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
253 tbl->it_indirect_levels);
257 static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
258 unsigned int levels, unsigned long limit,
259 unsigned long *current_offset, unsigned long *total_allocated)
262 unsigned long allocated = 1UL << shift;
263 unsigned int entries = 1UL << (shift - 3);
266 addr = pnv_alloc_tce_level(nid, shift);
267 *total_allocated += allocated;
271 *current_offset += allocated;
275 for (i = 0; i < entries; ++i) {
276 tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
277 levels, limit, current_offset, total_allocated);
281 addr[i] = cpu_to_be64(__pa(tmp) |
282 TCE_PCI_READ | TCE_PCI_WRITE);
284 if (*current_offset >= limit)
291 long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
292 __u32 page_shift, __u64 window_size, __u32 levels,
293 bool alloc_userspace_copy, struct iommu_table *tbl)
295 void *addr, *uas = NULL;
296 unsigned long offset = 0, level_shift, total_allocated = 0;
297 unsigned long total_allocated_uas = 0;
298 const unsigned int window_shift = ilog2(window_size);
299 unsigned int entries_shift = window_shift - page_shift;
300 unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
302 const unsigned long tce_table_size = 1UL << table_shift;
304 if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
307 if (!is_power_of_2(window_size))
310 /* Adjust direct table size from window_size and levels */
311 entries_shift = (entries_shift + levels - 1) / levels;
312 level_shift = entries_shift + 3;
313 level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
315 if ((level_shift - 3) * levels + page_shift >= 55)
318 /* Allocate TCE table */
319 addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
320 1, tce_table_size, &offset, &total_allocated);
322 /* addr==NULL means that the first level allocation failed */
327 * First level was allocated but some lower level failed as
328 * we did not allocate as much as we wanted,
329 * release partially allocated table.
331 if (levels == 1 && offset < tce_table_size)
334 /* Allocate userspace view of the TCE table */
335 if (alloc_userspace_copy) {
337 uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
338 1, tce_table_size, &offset,
339 &total_allocated_uas);
342 if (levels == 1 && (offset < tce_table_size ||
343 total_allocated_uas != total_allocated))
347 /* Setup linux iommu table */
348 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
350 tbl->it_level_size = 1ULL << (level_shift - 3);
351 tbl->it_indirect_levels = levels - 1;
352 tbl->it_userspace = uas;
355 pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
356 window_size, tce_table_size, bus_offset, tbl->it_base,
357 tbl->it_userspace, 1, levels);
362 pnv_pci_ioda2_table_do_free_pages(uas,
363 1ULL << (level_shift - 3), levels - 1);
365 pnv_pci_ioda2_table_do_free_pages(addr,
366 1ULL << (level_shift - 3), levels - 1);
371 void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
372 struct iommu_table_group *table_group)
376 struct iommu_table_group_link *tgl;
378 if (!tbl || !table_group)
381 /* Remove link to a group from table's list of attached groups */
385 list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
386 if (tgl->table_group == table_group) {
387 list_del_rcu(&tgl->next);
398 /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
400 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
401 if (table_group->tables[i] == tbl) {
402 iommu_tce_table_put(tbl);
403 table_group->tables[i] = NULL;
411 long pnv_pci_link_table_and_group(int node, int num,
412 struct iommu_table *tbl,
413 struct iommu_table_group *table_group)
415 struct iommu_table_group_link *tgl = NULL;
417 if (WARN_ON(!tbl || !table_group))
420 tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
425 tgl->table_group = table_group;
426 list_add_rcu(&tgl->next, &tbl->it_group_list);
428 table_group->tables[num] = iommu_tce_table_get(tbl);