Merge tag 'csky-for-linus-5.6-rc3' of git://github.com/c-sky/csky-linux
[linux-2.6-microblaze.git] / kernel / dma / direct.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2018 Christoph Hellwig.
4  *
5  * DMA operations that map physical memory directly without using an IOMMU.
6  */
7 #include <linux/memblock.h> /* for max_pfn */
8 #include <linux/export.h>
9 #include <linux/mm.h>
10 #include <linux/dma-direct.h>
11 #include <linux/scatterlist.h>
12 #include <linux/dma-contiguous.h>
13 #include <linux/dma-noncoherent.h>
14 #include <linux/pfn.h>
15 #include <linux/vmalloc.h>
16 #include <linux/set_memory.h>
17 #include <linux/swiotlb.h>
18
19 /*
20  * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it
21  * it for entirely different regions. In that case the arch code needs to
22  * override the variable below for dma-direct to work properly.
23  */
24 unsigned int zone_dma_bits __ro_after_init = 24;
25
26 static inline dma_addr_t phys_to_dma_direct(struct device *dev,
27                 phys_addr_t phys)
28 {
29         if (force_dma_unencrypted(dev))
30                 return __phys_to_dma(dev, phys);
31         return phys_to_dma(dev, phys);
32 }
33
34 static inline struct page *dma_direct_to_page(struct device *dev,
35                 dma_addr_t dma_addr)
36 {
37         return pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_addr)));
38 }
39
40 u64 dma_direct_get_required_mask(struct device *dev)
41 {
42         u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT);
43
44         return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
45 }
46
47 static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
48                 u64 *phys_limit)
49 {
50         u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit);
51
52         if (force_dma_unencrypted(dev))
53                 *phys_limit = __dma_to_phys(dev, dma_limit);
54         else
55                 *phys_limit = dma_to_phys(dev, dma_limit);
56
57         /*
58          * Optimistically try the zone that the physical address mask falls
59          * into first.  If that returns memory that isn't actually addressable
60          * we will fallback to the next lower zone and try again.
61          *
62          * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
63          * zones.
64          */
65         if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits))
66                 return GFP_DMA;
67         if (*phys_limit <= DMA_BIT_MASK(32))
68                 return GFP_DMA32;
69         return 0;
70 }
71
72 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
73 {
74         return phys_to_dma_direct(dev, phys) + size - 1 <=
75                         min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
76 }
77
78 struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
79                 gfp_t gfp, unsigned long attrs)
80 {
81         size_t alloc_size = PAGE_ALIGN(size);
82         int node = dev_to_node(dev);
83         struct page *page = NULL;
84         u64 phys_limit;
85
86         if (attrs & DMA_ATTR_NO_WARN)
87                 gfp |= __GFP_NOWARN;
88
89         /* we always manually zero the memory once we are done: */
90         gfp &= ~__GFP_ZERO;
91         gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
92                         &phys_limit);
93         page = dma_alloc_contiguous(dev, alloc_size, gfp);
94         if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
95                 dma_free_contiguous(dev, page, alloc_size);
96                 page = NULL;
97         }
98 again:
99         if (!page)
100                 page = alloc_pages_node(node, gfp, get_order(alloc_size));
101         if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
102                 dma_free_contiguous(dev, page, size);
103                 page = NULL;
104
105                 if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
106                     phys_limit < DMA_BIT_MASK(64) &&
107                     !(gfp & (GFP_DMA32 | GFP_DMA))) {
108                         gfp |= GFP_DMA32;
109                         goto again;
110                 }
111
112                 if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) {
113                         gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
114                         goto again;
115                 }
116         }
117
118         return page;
119 }
120
121 void *dma_direct_alloc_pages(struct device *dev, size_t size,
122                 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
123 {
124         struct page *page;
125         void *ret;
126
127         if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
128             dma_alloc_need_uncached(dev, attrs) &&
129             !gfpflags_allow_blocking(gfp)) {
130                 ret = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp);
131                 if (!ret)
132                         return NULL;
133                 goto done;
134         }
135
136         page = __dma_direct_alloc_pages(dev, size, gfp, attrs);
137         if (!page)
138                 return NULL;
139
140         if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
141             !force_dma_unencrypted(dev)) {
142                 /* remove any dirty cache lines on the kernel alias */
143                 if (!PageHighMem(page))
144                         arch_dma_prep_coherent(page, size);
145                 /* return the page pointer as the opaque cookie */
146                 ret = page;
147                 goto done;
148         }
149
150         if ((IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
151              dma_alloc_need_uncached(dev, attrs)) ||
152             (IS_ENABLED(CONFIG_DMA_REMAP) && PageHighMem(page))) {
153                 /* remove any dirty cache lines on the kernel alias */
154                 arch_dma_prep_coherent(page, PAGE_ALIGN(size));
155
156                 /* create a coherent mapping */
157                 ret = dma_common_contiguous_remap(page, PAGE_ALIGN(size),
158                                 dma_pgprot(dev, PAGE_KERNEL, attrs),
159                                 __builtin_return_address(0));
160                 if (!ret) {
161                         dma_free_contiguous(dev, page, size);
162                         return ret;
163                 }
164
165                 memset(ret, 0, size);
166                 goto done;
167         }
168
169         if (PageHighMem(page)) {
170                 /*
171                  * Depending on the cma= arguments and per-arch setup
172                  * dma_alloc_contiguous could return highmem pages.
173                  * Without remapping there is no way to return them here,
174                  * so log an error and fail.
175                  */
176                 dev_info(dev, "Rejecting highmem page from CMA.\n");
177                 dma_free_contiguous(dev, page, size);
178                 return NULL;
179         }
180
181         ret = page_address(page);
182         if (force_dma_unencrypted(dev))
183                 set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
184
185         memset(ret, 0, size);
186
187         if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
188             dma_alloc_need_uncached(dev, attrs)) {
189                 arch_dma_prep_coherent(page, size);
190                 ret = uncached_kernel_address(ret);
191         }
192 done:
193         if (force_dma_unencrypted(dev))
194                 *dma_handle = __phys_to_dma(dev, page_to_phys(page));
195         else
196                 *dma_handle = phys_to_dma(dev, page_to_phys(page));
197         return ret;
198 }
199
200 void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
201                 dma_addr_t dma_addr, unsigned long attrs)
202 {
203         unsigned int page_order = get_order(size);
204
205         if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
206             !force_dma_unencrypted(dev)) {
207                 /* cpu_addr is a struct page cookie, not a kernel address */
208                 dma_free_contiguous(dev, cpu_addr, size);
209                 return;
210         }
211
212         if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
213             dma_free_from_pool(cpu_addr, PAGE_ALIGN(size)))
214                 return;
215
216         if (force_dma_unencrypted(dev))
217                 set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
218
219         if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr))
220                 vunmap(cpu_addr);
221
222         dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size);
223 }
224
225 void *dma_direct_alloc(struct device *dev, size_t size,
226                 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
227 {
228         if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
229             !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
230             dma_alloc_need_uncached(dev, attrs))
231                 return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
232         return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
233 }
234
235 void dma_direct_free(struct device *dev, size_t size,
236                 void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
237 {
238         if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
239             !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
240             dma_alloc_need_uncached(dev, attrs))
241                 arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
242         else
243                 dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
244 }
245
246 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
247     defined(CONFIG_SWIOTLB)
248 void dma_direct_sync_single_for_device(struct device *dev,
249                 dma_addr_t addr, size_t size, enum dma_data_direction dir)
250 {
251         phys_addr_t paddr = dma_to_phys(dev, addr);
252
253         if (unlikely(is_swiotlb_buffer(paddr)))
254                 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
255
256         if (!dev_is_dma_coherent(dev))
257                 arch_sync_dma_for_device(paddr, size, dir);
258 }
259 EXPORT_SYMBOL(dma_direct_sync_single_for_device);
260
261 void dma_direct_sync_sg_for_device(struct device *dev,
262                 struct scatterlist *sgl, int nents, enum dma_data_direction dir)
263 {
264         struct scatterlist *sg;
265         int i;
266
267         for_each_sg(sgl, sg, nents, i) {
268                 phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
269
270                 if (unlikely(is_swiotlb_buffer(paddr)))
271                         swiotlb_tbl_sync_single(dev, paddr, sg->length,
272                                         dir, SYNC_FOR_DEVICE);
273
274                 if (!dev_is_dma_coherent(dev))
275                         arch_sync_dma_for_device(paddr, sg->length,
276                                         dir);
277         }
278 }
279 EXPORT_SYMBOL(dma_direct_sync_sg_for_device);
280 #endif
281
282 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
283     defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \
284     defined(CONFIG_SWIOTLB)
285 void dma_direct_sync_single_for_cpu(struct device *dev,
286                 dma_addr_t addr, size_t size, enum dma_data_direction dir)
287 {
288         phys_addr_t paddr = dma_to_phys(dev, addr);
289
290         if (!dev_is_dma_coherent(dev)) {
291                 arch_sync_dma_for_cpu(paddr, size, dir);
292                 arch_sync_dma_for_cpu_all();
293         }
294
295         if (unlikely(is_swiotlb_buffer(paddr)))
296                 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
297 }
298 EXPORT_SYMBOL(dma_direct_sync_single_for_cpu);
299
300 void dma_direct_sync_sg_for_cpu(struct device *dev,
301                 struct scatterlist *sgl, int nents, enum dma_data_direction dir)
302 {
303         struct scatterlist *sg;
304         int i;
305
306         for_each_sg(sgl, sg, nents, i) {
307                 phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
308
309                 if (!dev_is_dma_coherent(dev))
310                         arch_sync_dma_for_cpu(paddr, sg->length, dir);
311
312                 if (unlikely(is_swiotlb_buffer(paddr)))
313                         swiotlb_tbl_sync_single(dev, paddr, sg->length, dir,
314                                         SYNC_FOR_CPU);
315         }
316
317         if (!dev_is_dma_coherent(dev))
318                 arch_sync_dma_for_cpu_all();
319 }
320 EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu);
321
322 void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
323                 size_t size, enum dma_data_direction dir, unsigned long attrs)
324 {
325         phys_addr_t phys = dma_to_phys(dev, addr);
326
327         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
328                 dma_direct_sync_single_for_cpu(dev, addr, size, dir);
329
330         if (unlikely(is_swiotlb_buffer(phys)))
331                 swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
332 }
333 EXPORT_SYMBOL(dma_direct_unmap_page);
334
335 void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
336                 int nents, enum dma_data_direction dir, unsigned long attrs)
337 {
338         struct scatterlist *sg;
339         int i;
340
341         for_each_sg(sgl, sg, nents, i)
342                 dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir,
343                              attrs);
344 }
345 EXPORT_SYMBOL(dma_direct_unmap_sg);
346 #endif
347
348 dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
349                 unsigned long offset, size_t size, enum dma_data_direction dir,
350                 unsigned long attrs)
351 {
352         phys_addr_t phys = page_to_phys(page) + offset;
353         dma_addr_t dma_addr = phys_to_dma(dev, phys);
354
355         if (unlikely(swiotlb_force == SWIOTLB_FORCE))
356                 return swiotlb_map(dev, phys, size, dir, attrs);
357
358         if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
359                 if (swiotlb_force != SWIOTLB_NO_FORCE)
360                         return swiotlb_map(dev, phys, size, dir, attrs);
361
362                 dev_WARN_ONCE(dev, 1,
363                              "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
364                              &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
365                 return DMA_MAPPING_ERROR;
366         }
367
368         if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
369                 arch_sync_dma_for_device(phys, size, dir);
370         return dma_addr;
371 }
372 EXPORT_SYMBOL(dma_direct_map_page);
373
374 int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
375                 enum dma_data_direction dir, unsigned long attrs)
376 {
377         int i;
378         struct scatterlist *sg;
379
380         for_each_sg(sgl, sg, nents, i) {
381                 sg->dma_address = dma_direct_map_page(dev, sg_page(sg),
382                                 sg->offset, sg->length, dir, attrs);
383                 if (sg->dma_address == DMA_MAPPING_ERROR)
384                         goto out_unmap;
385                 sg_dma_len(sg) = sg->length;
386         }
387
388         return nents;
389
390 out_unmap:
391         dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
392         return 0;
393 }
394 EXPORT_SYMBOL(dma_direct_map_sg);
395
396 dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
397                 size_t size, enum dma_data_direction dir, unsigned long attrs)
398 {
399         dma_addr_t dma_addr = paddr;
400
401         if (unlikely(!dma_capable(dev, dma_addr, size, false))) {
402                 dev_err_once(dev,
403                              "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
404                              &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
405                 WARN_ON_ONCE(1);
406                 return DMA_MAPPING_ERROR;
407         }
408
409         return dma_addr;
410 }
411 EXPORT_SYMBOL(dma_direct_map_resource);
412
413 int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt,
414                 void *cpu_addr, dma_addr_t dma_addr, size_t size,
415                 unsigned long attrs)
416 {
417         struct page *page = dma_direct_to_page(dev, dma_addr);
418         int ret;
419
420         ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
421         if (!ret)
422                 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
423         return ret;
424 }
425
426 #ifdef CONFIG_MMU
427 bool dma_direct_can_mmap(struct device *dev)
428 {
429         return dev_is_dma_coherent(dev) ||
430                 IS_ENABLED(CONFIG_DMA_NONCOHERENT_MMAP);
431 }
432
433 int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
434                 void *cpu_addr, dma_addr_t dma_addr, size_t size,
435                 unsigned long attrs)
436 {
437         unsigned long user_count = vma_pages(vma);
438         unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
439         unsigned long pfn = PHYS_PFN(dma_to_phys(dev, dma_addr));
440         int ret = -ENXIO;
441
442         vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
443
444         if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
445                 return ret;
446
447         if (vma->vm_pgoff >= count || user_count > count - vma->vm_pgoff)
448                 return -ENXIO;
449         return remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
450                         user_count << PAGE_SHIFT, vma->vm_page_prot);
451 }
452 #else /* CONFIG_MMU */
453 bool dma_direct_can_mmap(struct device *dev)
454 {
455         return false;
456 }
457
458 int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
459                 void *cpu_addr, dma_addr_t dma_addr, size_t size,
460                 unsigned long attrs)
461 {
462         return -ENXIO;
463 }
464 #endif /* CONFIG_MMU */
465
466 int dma_direct_supported(struct device *dev, u64 mask)
467 {
468         u64 min_mask = (max_pfn - 1) << PAGE_SHIFT;
469
470         /*
471          * Because 32-bit DMA masks are so common we expect every architecture
472          * to be able to satisfy them - either by not supporting more physical
473          * memory, or by providing a ZONE_DMA32.  If neither is the case, the
474          * architecture needs to use an IOMMU instead of the direct mapping.
475          */
476         if (mask >= DMA_BIT_MASK(32))
477                 return 1;
478
479         /*
480          * This check needs to be against the actual bit mask value, so
481          * use __phys_to_dma() here so that the SME encryption mask isn't
482          * part of the check.
483          */
484         if (IS_ENABLED(CONFIG_ZONE_DMA))
485                 min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits));
486         return mask >= __phys_to_dma(dev, min_mask);
487 }
488
489 size_t dma_direct_max_mapping_size(struct device *dev)
490 {
491         /* If SWIOTLB is active, use its maximum mapping size */
492         if (is_swiotlb_active() &&
493             (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE))
494                 return swiotlb_max_mapping_size(dev);
495         return SIZE_MAX;
496 }