Merge tag 'dma-mapping-5.14' of git://git.infradead.org/users/hch/dma-mapping
[linux-2.6-microblaze.git] / arch / powerpc / platforms / powernv / pci-ioda-tce.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * TCE helpers for IODA PCI/PCIe on PowerNV platforms
4  *
5  * Copyright 2018 IBM Corp.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  */
12
13 #include <linux/kernel.h>
14 #include <linux/iommu.h>
15
16 #include <asm/iommu.h>
17 #include <asm/tce.h>
18 #include "pci.h"
19
20 unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
21 {
22         struct pci_controller *hose = phb->hose;
23         struct device_node *dn = hose->dn;
24         unsigned long mask = 0;
25         int i, rc, count;
26         u32 val;
27
28         count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
29         if (count <= 0) {
30                 mask = SZ_4K | SZ_64K;
31                 /* Add 16M for POWER8 by default */
32                 if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
33                                 !cpu_has_feature(CPU_FTR_ARCH_300))
34                         mask |= SZ_16M | SZ_256M;
35                 return mask;
36         }
37
38         for (i = 0; i < count; i++) {
39                 rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
40                                                 i, &val);
41                 if (rc == 0)
42                         mask |= 1ULL << val;
43         }
44
45         return mask;
46 }
47
48 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
49                 void *tce_mem, u64 tce_size,
50                 u64 dma_offset, unsigned int page_shift)
51 {
52         tbl->it_blocksize = 16;
53         tbl->it_base = (unsigned long)tce_mem;
54         tbl->it_page_shift = page_shift;
55         tbl->it_offset = dma_offset >> tbl->it_page_shift;
56         tbl->it_index = 0;
57         tbl->it_size = tce_size >> 3;
58         tbl->it_busno = 0;
59         tbl->it_type = TCE_PCI;
60 }
61
62 static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
63 {
64         struct page *tce_mem = NULL;
65         __be64 *addr;
66
67         tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
68                         shift - PAGE_SHIFT);
69         if (!tce_mem) {
70                 pr_err("Failed to allocate a TCE memory, level shift=%d\n",
71                                 shift);
72                 return NULL;
73         }
74         addr = page_address(tce_mem);
75         memset(addr, 0, 1UL << shift);
76
77         return addr;
78 }
79
80 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
81                 unsigned long size, unsigned int levels);
82
83 static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
84 {
85         __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
86         int  level = tbl->it_indirect_levels;
87         const long shift = ilog2(tbl->it_level_size);
88         unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
89
90         while (level) {
91                 int n = (idx & mask) >> (level * shift);
92                 unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
93
94                 if (!tce) {
95                         __be64 *tmp2;
96
97                         if (!alloc)
98                                 return NULL;
99
100                         tmp2 = pnv_alloc_tce_level(tbl->it_nid,
101                                         ilog2(tbl->it_level_size) + 3);
102                         if (!tmp2)
103                                 return NULL;
104
105                         tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
106                         oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
107                                         cpu_to_be64(tce)));
108                         if (oldtce) {
109                                 pnv_pci_ioda2_table_do_free_pages(tmp2,
110                                         ilog2(tbl->it_level_size) + 3, 1);
111                                 tce = oldtce;
112                         }
113                 }
114
115                 tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
116                 idx &= ~mask;
117                 mask >>= shift;
118                 --level;
119         }
120
121         return tmp + idx;
122 }
123
124 int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
125                 unsigned long uaddr, enum dma_data_direction direction,
126                 unsigned long attrs)
127 {
128         u64 proto_tce = iommu_direction_to_tce_perm(direction);
129         u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
130         long i;
131
132         if (proto_tce & TCE_PCI_WRITE)
133                 proto_tce |= TCE_PCI_READ;
134
135         for (i = 0; i < npages; i++) {
136                 unsigned long newtce = proto_tce |
137                         ((rpn + i) << tbl->it_page_shift);
138                 unsigned long idx = index - tbl->it_offset + i;
139
140                 *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
141         }
142
143         return 0;
144 }
145
146 #ifdef CONFIG_IOMMU_API
147 int pnv_tce_xchg(struct iommu_table *tbl, long index,
148                 unsigned long *hpa, enum dma_data_direction *direction,
149                 bool alloc)
150 {
151         u64 proto_tce = iommu_direction_to_tce_perm(*direction);
152         unsigned long newtce = *hpa | proto_tce, oldtce;
153         unsigned long idx = index - tbl->it_offset;
154         __be64 *ptce = NULL;
155
156         BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
157
158         if (*direction == DMA_NONE) {
159                 ptce = pnv_tce(tbl, false, idx, false);
160                 if (!ptce) {
161                         *hpa = 0;
162                         return 0;
163                 }
164         }
165
166         if (!ptce) {
167                 ptce = pnv_tce(tbl, false, idx, alloc);
168                 if (!ptce)
169                         return -ENOMEM;
170         }
171
172         if (newtce & TCE_PCI_WRITE)
173                 newtce |= TCE_PCI_READ;
174
175         oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
176         *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
177         *direction = iommu_tce_direction(oldtce);
178
179         return 0;
180 }
181
182 __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
183 {
184         if (WARN_ON_ONCE(!tbl->it_userspace))
185                 return NULL;
186
187         return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
188 }
189 #endif
190
191 void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
192 {
193         long i;
194
195         for (i = 0; i < npages; i++) {
196                 unsigned long idx = index - tbl->it_offset + i;
197                 __be64 *ptce = pnv_tce(tbl, false, idx, false);
198
199                 if (ptce)
200                         *ptce = cpu_to_be64(0);
201                 else
202                         /* Skip the rest of the level */
203                         i |= tbl->it_level_size - 1;
204         }
205 }
206
207 unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
208 {
209         __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
210
211         if (!ptce)
212                 return 0;
213
214         return be64_to_cpu(*ptce);
215 }
216
217 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
218                 unsigned long size, unsigned int levels)
219 {
220         const unsigned long addr_ul = (unsigned long) addr &
221                         ~(TCE_PCI_READ | TCE_PCI_WRITE);
222
223         if (levels) {
224                 long i;
225                 u64 *tmp = (u64 *) addr_ul;
226
227                 for (i = 0; i < size; ++i) {
228                         unsigned long hpa = be64_to_cpu(tmp[i]);
229
230                         if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
231                                 continue;
232
233                         pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
234                                         levels - 1);
235                 }
236         }
237
238         free_pages(addr_ul, get_order(size << 3));
239 }
240
241 void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
242 {
243         const unsigned long size = tbl->it_indirect_levels ?
244                         tbl->it_level_size : tbl->it_size;
245
246         if (!tbl->it_size)
247                 return;
248
249         pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
250                         tbl->it_indirect_levels);
251         if (tbl->it_userspace) {
252                 pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
253                                 tbl->it_indirect_levels);
254         }
255 }
256
257 static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
258                 unsigned int levels, unsigned long limit,
259                 unsigned long *current_offset, unsigned long *total_allocated)
260 {
261         __be64 *addr, *tmp;
262         unsigned long allocated = 1UL << shift;
263         unsigned int entries = 1UL << (shift - 3);
264         long i;
265
266         addr = pnv_alloc_tce_level(nid, shift);
267         *total_allocated += allocated;
268
269         --levels;
270         if (!levels) {
271                 *current_offset += allocated;
272                 return addr;
273         }
274
275         for (i = 0; i < entries; ++i) {
276                 tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
277                                 levels, limit, current_offset, total_allocated);
278                 if (!tmp)
279                         break;
280
281                 addr[i] = cpu_to_be64(__pa(tmp) |
282                                 TCE_PCI_READ | TCE_PCI_WRITE);
283
284                 if (*current_offset >= limit)
285                         break;
286         }
287
288         return addr;
289 }
290
291 long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
292                 __u32 page_shift, __u64 window_size, __u32 levels,
293                 bool alloc_userspace_copy, struct iommu_table *tbl)
294 {
295         void *addr, *uas = NULL;
296         unsigned long offset = 0, level_shift, total_allocated = 0;
297         unsigned long total_allocated_uas = 0;
298         const unsigned int window_shift = ilog2(window_size);
299         unsigned int entries_shift = window_shift - page_shift;
300         unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
301                         PAGE_SHIFT);
302         const unsigned long tce_table_size = 1UL << table_shift;
303
304         if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
305                 return -EINVAL;
306
307         if (!is_power_of_2(window_size))
308                 return -EINVAL;
309
310         /* Adjust direct table size from window_size and levels */
311         entries_shift = (entries_shift + levels - 1) / levels;
312         level_shift = entries_shift + 3;
313         level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
314
315         if ((level_shift - 3) * levels + page_shift >= 55)
316                 return -EINVAL;
317
318         /* Allocate TCE table */
319         addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
320                         1, tce_table_size, &offset, &total_allocated);
321
322         /* addr==NULL means that the first level allocation failed */
323         if (!addr)
324                 return -ENOMEM;
325
326         /*
327          * First level was allocated but some lower level failed as
328          * we did not allocate as much as we wanted,
329          * release partially allocated table.
330          */
331         if (levels == 1 && offset < tce_table_size)
332                 goto free_tces_exit;
333
334         /* Allocate userspace view of the TCE table */
335         if (alloc_userspace_copy) {
336                 offset = 0;
337                 uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
338                                 1, tce_table_size, &offset,
339                                 &total_allocated_uas);
340                 if (!uas)
341                         goto free_tces_exit;
342                 if (levels == 1 && (offset < tce_table_size ||
343                                 total_allocated_uas != total_allocated))
344                         goto free_uas_exit;
345         }
346
347         /* Setup linux iommu table */
348         pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
349                         page_shift);
350         tbl->it_level_size = 1ULL << (level_shift - 3);
351         tbl->it_indirect_levels = levels - 1;
352         tbl->it_userspace = uas;
353         tbl->it_nid = nid;
354
355         pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
356                         window_size, tce_table_size, bus_offset, tbl->it_base,
357                         tbl->it_userspace, 1, levels);
358
359         return 0;
360
361 free_uas_exit:
362         pnv_pci_ioda2_table_do_free_pages(uas,
363                         1ULL << (level_shift - 3), levels - 1);
364 free_tces_exit:
365         pnv_pci_ioda2_table_do_free_pages(addr,
366                         1ULL << (level_shift - 3), levels - 1);
367
368         return -ENOMEM;
369 }
370
371 void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
372                 struct iommu_table_group *table_group)
373 {
374         long i;
375         bool found;
376         struct iommu_table_group_link *tgl;
377
378         if (!tbl || !table_group)
379                 return;
380
381         /* Remove link to a group from table's list of attached groups */
382         found = false;
383
384         rcu_read_lock();
385         list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
386                 if (tgl->table_group == table_group) {
387                         list_del_rcu(&tgl->next);
388                         kfree_rcu(tgl, rcu);
389                         found = true;
390                         break;
391                 }
392         }
393         rcu_read_unlock();
394
395         if (WARN_ON(!found))
396                 return;
397
398         /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
399         found = false;
400         for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
401                 if (table_group->tables[i] == tbl) {
402                         iommu_tce_table_put(tbl);
403                         table_group->tables[i] = NULL;
404                         found = true;
405                         break;
406                 }
407         }
408         WARN_ON(!found);
409 }
410
411 long pnv_pci_link_table_and_group(int node, int num,
412                 struct iommu_table *tbl,
413                 struct iommu_table_group *table_group)
414 {
415         struct iommu_table_group_link *tgl = NULL;
416
417         if (WARN_ON(!tbl || !table_group))
418                 return -EINVAL;
419
420         tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
421                         node);
422         if (!tgl)
423                 return -ENOMEM;
424
425         tgl->table_group = table_group;
426         list_add_rcu(&tgl->next, &tbl->it_group_list);
427
428         table_group->tables[num] = iommu_tce_table_get(tbl);
429
430         return 0;
431 }