Merge tag 'staging-5.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...
[linux-2.6-microblaze.git] / kernel / dma / direct.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2018 Christoph Hellwig.
4  *
5  * DMA operations that map physical memory directly without using an IOMMU.
6  */
7 #include <linux/memblock.h> /* for max_pfn */
8 #include <linux/export.h>
9 #include <linux/mm.h>
10 #include <linux/dma-direct.h>
11 #include <linux/scatterlist.h>
12 #include <linux/dma-contiguous.h>
13 #include <linux/dma-noncoherent.h>
14 #include <linux/pfn.h>
15 #include <linux/set_memory.h>
16 #include <linux/swiotlb.h>
17
18 /*
19  * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it
20  * it for entirely different regions. In that case the arch code needs to
21  * override the variable below for dma-direct to work properly.
22  */
23 unsigned int zone_dma_bits __ro_after_init = 24;
24
25 static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
26 {
27         if (!dev->dma_mask) {
28                 dev_err_once(dev, "DMA map on device without dma_mask\n");
29         } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) {
30                 dev_err_once(dev,
31                         "overflow %pad+%zu of DMA mask %llx bus mask %llx\n",
32                         &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask);
33         }
34         WARN_ON_ONCE(1);
35 }
36
37 static inline dma_addr_t phys_to_dma_direct(struct device *dev,
38                 phys_addr_t phys)
39 {
40         if (force_dma_unencrypted(dev))
41                 return __phys_to_dma(dev, phys);
42         return phys_to_dma(dev, phys);
43 }
44
45 u64 dma_direct_get_required_mask(struct device *dev)
46 {
47         u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT);
48
49         return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
50 }
51
52 static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
53                 u64 *phys_mask)
54 {
55         if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask)
56                 dma_mask = dev->bus_dma_mask;
57
58         if (force_dma_unencrypted(dev))
59                 *phys_mask = __dma_to_phys(dev, dma_mask);
60         else
61                 *phys_mask = dma_to_phys(dev, dma_mask);
62
63         /*
64          * Optimistically try the zone that the physical address mask falls
65          * into first.  If that returns memory that isn't actually addressable
66          * we will fallback to the next lower zone and try again.
67          *
68          * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
69          * zones.
70          */
71         if (*phys_mask <= DMA_BIT_MASK(zone_dma_bits))
72                 return GFP_DMA;
73         if (*phys_mask <= DMA_BIT_MASK(32))
74                 return GFP_DMA32;
75         return 0;
76 }
77
78 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
79 {
80         return phys_to_dma_direct(dev, phys) + size - 1 <=
81                         min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask);
82 }
83
84 struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
85                 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
86 {
87         size_t alloc_size = PAGE_ALIGN(size);
88         int node = dev_to_node(dev);
89         struct page *page = NULL;
90         u64 phys_mask;
91
92         if (attrs & DMA_ATTR_NO_WARN)
93                 gfp |= __GFP_NOWARN;
94
95         /* we always manually zero the memory once we are done: */
96         gfp &= ~__GFP_ZERO;
97         gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
98                         &phys_mask);
99         page = dma_alloc_contiguous(dev, alloc_size, gfp);
100         if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
101                 dma_free_contiguous(dev, page, alloc_size);
102                 page = NULL;
103         }
104 again:
105         if (!page)
106                 page = alloc_pages_node(node, gfp, get_order(alloc_size));
107         if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
108                 dma_free_contiguous(dev, page, size);
109                 page = NULL;
110
111                 if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
112                     phys_mask < DMA_BIT_MASK(64) &&
113                     !(gfp & (GFP_DMA32 | GFP_DMA))) {
114                         gfp |= GFP_DMA32;
115                         goto again;
116                 }
117
118                 if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) {
119                         gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
120                         goto again;
121                 }
122         }
123
124         return page;
125 }
126
127 void *dma_direct_alloc_pages(struct device *dev, size_t size,
128                 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
129 {
130         struct page *page;
131         void *ret;
132
133         page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
134         if (!page)
135                 return NULL;
136
137         if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
138             !force_dma_unencrypted(dev)) {
139                 /* remove any dirty cache lines on the kernel alias */
140                 if (!PageHighMem(page))
141                         arch_dma_prep_coherent(page, size);
142                 *dma_handle = phys_to_dma(dev, page_to_phys(page));
143                 /* return the page pointer as the opaque cookie */
144                 return page;
145         }
146
147         if (PageHighMem(page)) {
148                 /*
149                  * Depending on the cma= arguments and per-arch setup
150                  * dma_alloc_contiguous could return highmem pages.
151                  * Without remapping there is no way to return them here,
152                  * so log an error and fail.
153                  */
154                 dev_info(dev, "Rejecting highmem page from CMA.\n");
155                 __dma_direct_free_pages(dev, size, page);
156                 return NULL;
157         }
158
159         ret = page_address(page);
160         if (force_dma_unencrypted(dev)) {
161                 set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
162                 *dma_handle = __phys_to_dma(dev, page_to_phys(page));
163         } else {
164                 *dma_handle = phys_to_dma(dev, page_to_phys(page));
165         }
166         memset(ret, 0, size);
167
168         if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
169             dma_alloc_need_uncached(dev, attrs)) {
170                 arch_dma_prep_coherent(page, size);
171                 ret = uncached_kernel_address(ret);
172         }
173
174         return ret;
175 }
176
177 void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page)
178 {
179         dma_free_contiguous(dev, page, size);
180 }
181
182 void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
183                 dma_addr_t dma_addr, unsigned long attrs)
184 {
185         unsigned int page_order = get_order(size);
186
187         if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
188             !force_dma_unencrypted(dev)) {
189                 /* cpu_addr is a struct page cookie, not a kernel address */
190                 __dma_direct_free_pages(dev, size, cpu_addr);
191                 return;
192         }
193
194         if (force_dma_unencrypted(dev))
195                 set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
196
197         if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
198             dma_alloc_need_uncached(dev, attrs))
199                 cpu_addr = cached_kernel_address(cpu_addr);
200         __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr));
201 }
202
203 void *dma_direct_alloc(struct device *dev, size_t size,
204                 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
205 {
206         if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
207             dma_alloc_need_uncached(dev, attrs))
208                 return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
209         return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
210 }
211
212 void dma_direct_free(struct device *dev, size_t size,
213                 void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
214 {
215         if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
216             dma_alloc_need_uncached(dev, attrs))
217                 arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
218         else
219                 dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
220 }
221
222 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
223     defined(CONFIG_SWIOTLB)
224 void dma_direct_sync_single_for_device(struct device *dev,
225                 dma_addr_t addr, size_t size, enum dma_data_direction dir)
226 {
227         phys_addr_t paddr = dma_to_phys(dev, addr);
228
229         if (unlikely(is_swiotlb_buffer(paddr)))
230                 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
231
232         if (!dev_is_dma_coherent(dev))
233                 arch_sync_dma_for_device(dev, paddr, size, dir);
234 }
235 EXPORT_SYMBOL(dma_direct_sync_single_for_device);
236
237 void dma_direct_sync_sg_for_device(struct device *dev,
238                 struct scatterlist *sgl, int nents, enum dma_data_direction dir)
239 {
240         struct scatterlist *sg;
241         int i;
242
243         for_each_sg(sgl, sg, nents, i) {
244                 phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
245
246                 if (unlikely(is_swiotlb_buffer(paddr)))
247                         swiotlb_tbl_sync_single(dev, paddr, sg->length,
248                                         dir, SYNC_FOR_DEVICE);
249
250                 if (!dev_is_dma_coherent(dev))
251                         arch_sync_dma_for_device(dev, paddr, sg->length,
252                                         dir);
253         }
254 }
255 EXPORT_SYMBOL(dma_direct_sync_sg_for_device);
256 #endif
257
258 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
259     defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \
260     defined(CONFIG_SWIOTLB)
261 void dma_direct_sync_single_for_cpu(struct device *dev,
262                 dma_addr_t addr, size_t size, enum dma_data_direction dir)
263 {
264         phys_addr_t paddr = dma_to_phys(dev, addr);
265
266         if (!dev_is_dma_coherent(dev)) {
267                 arch_sync_dma_for_cpu(dev, paddr, size, dir);
268                 arch_sync_dma_for_cpu_all(dev);
269         }
270
271         if (unlikely(is_swiotlb_buffer(paddr)))
272                 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
273 }
274 EXPORT_SYMBOL(dma_direct_sync_single_for_cpu);
275
276 void dma_direct_sync_sg_for_cpu(struct device *dev,
277                 struct scatterlist *sgl, int nents, enum dma_data_direction dir)
278 {
279         struct scatterlist *sg;
280         int i;
281
282         for_each_sg(sgl, sg, nents, i) {
283                 phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
284
285                 if (!dev_is_dma_coherent(dev))
286                         arch_sync_dma_for_cpu(dev, paddr, sg->length, dir);
287
288                 if (unlikely(is_swiotlb_buffer(paddr)))
289                         swiotlb_tbl_sync_single(dev, paddr, sg->length, dir,
290                                         SYNC_FOR_CPU);
291         }
292
293         if (!dev_is_dma_coherent(dev))
294                 arch_sync_dma_for_cpu_all(dev);
295 }
296 EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu);
297
298 void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
299                 size_t size, enum dma_data_direction dir, unsigned long attrs)
300 {
301         phys_addr_t phys = dma_to_phys(dev, addr);
302
303         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
304                 dma_direct_sync_single_for_cpu(dev, addr, size, dir);
305
306         if (unlikely(is_swiotlb_buffer(phys)))
307                 swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
308 }
309 EXPORT_SYMBOL(dma_direct_unmap_page);
310
311 void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
312                 int nents, enum dma_data_direction dir, unsigned long attrs)
313 {
314         struct scatterlist *sg;
315         int i;
316
317         for_each_sg(sgl, sg, nents, i)
318                 dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir,
319                              attrs);
320 }
321 EXPORT_SYMBOL(dma_direct_unmap_sg);
322 #endif
323
324 static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr,
325                 size_t size)
326 {
327         return swiotlb_force != SWIOTLB_FORCE &&
328                 dma_capable(dev, dma_addr, size);
329 }
330
331 dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
332                 unsigned long offset, size_t size, enum dma_data_direction dir,
333                 unsigned long attrs)
334 {
335         phys_addr_t phys = page_to_phys(page) + offset;
336         dma_addr_t dma_addr = phys_to_dma(dev, phys);
337
338         if (unlikely(!dma_direct_possible(dev, dma_addr, size)) &&
339             !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) {
340                 report_addr(dev, dma_addr, size);
341                 return DMA_MAPPING_ERROR;
342         }
343
344         if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
345                 arch_sync_dma_for_device(dev, phys, size, dir);
346         return dma_addr;
347 }
348 EXPORT_SYMBOL(dma_direct_map_page);
349
350 int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
351                 enum dma_data_direction dir, unsigned long attrs)
352 {
353         int i;
354         struct scatterlist *sg;
355
356         for_each_sg(sgl, sg, nents, i) {
357                 sg->dma_address = dma_direct_map_page(dev, sg_page(sg),
358                                 sg->offset, sg->length, dir, attrs);
359                 if (sg->dma_address == DMA_MAPPING_ERROR)
360                         goto out_unmap;
361                 sg_dma_len(sg) = sg->length;
362         }
363
364         return nents;
365
366 out_unmap:
367         dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
368         return 0;
369 }
370 EXPORT_SYMBOL(dma_direct_map_sg);
371
372 dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
373                 size_t size, enum dma_data_direction dir, unsigned long attrs)
374 {
375         dma_addr_t dma_addr = paddr;
376
377         if (unlikely(!dma_direct_possible(dev, dma_addr, size))) {
378                 report_addr(dev, dma_addr, size);
379                 return DMA_MAPPING_ERROR;
380         }
381
382         return dma_addr;
383 }
384 EXPORT_SYMBOL(dma_direct_map_resource);
385
386 /*
387  * Because 32-bit DMA masks are so common we expect every architecture to be
388  * able to satisfy them - either by not supporting more physical memory, or by
389  * providing a ZONE_DMA32.  If neither is the case, the architecture needs to
390  * use an IOMMU instead of the direct mapping.
391  */
392 int dma_direct_supported(struct device *dev, u64 mask)
393 {
394         u64 min_mask;
395
396         if (IS_ENABLED(CONFIG_ZONE_DMA))
397                 min_mask = DMA_BIT_MASK(zone_dma_bits);
398         else
399                 min_mask = DMA_BIT_MASK(32);
400
401         min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT);
402
403         /*
404          * This check needs to be against the actual bit mask value, so
405          * use __phys_to_dma() here so that the SME encryption mask isn't
406          * part of the check.
407          */
408         return mask >= __phys_to_dma(dev, min_mask);
409 }
410
411 size_t dma_direct_max_mapping_size(struct device *dev)
412 {
413         /* If SWIOTLB is active, use its maximum mapping size */
414         if (is_swiotlb_active() &&
415             (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE))
416                 return swiotlb_max_mapping_size(dev);
417         return SIZE_MAX;
418 }