1 // SPDX-License-Identifier: GPL-2.0-only
3 * MMU-based software IOTLB.
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
7 * Author: Xie Yongji <xieyongji@bytedance.com>
11 #include <linux/slab.h>
12 #include <linux/file.h>
13 #include <linux/anon_inodes.h>
14 #include <linux/highmem.h>
15 #include <linux/vmalloc.h>
16 #include <linux/vdpa.h>
18 #include "iova_domain.h"
20 static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
22 u64 addr, unsigned int perm,
23 struct file *file, u64 offset)
25 struct vdpa_map_file *map_file;
28 map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC);
32 map_file->file = get_file(file);
33 map_file->offset = offset;
35 ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last,
36 addr, perm, map_file);
45 static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
48 struct vdpa_map_file *map_file;
49 struct vhost_iotlb_map *map;
51 while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) {
52 map_file = (struct vdpa_map_file *)map->opaque;
55 vhost_iotlb_map_free(domain->iotlb, map);
59 int vduse_domain_set_map(struct vduse_iova_domain *domain,
60 struct vhost_iotlb *iotlb)
62 struct vdpa_map_file *map_file;
63 struct vhost_iotlb_map *map;
64 u64 start = 0ULL, last = ULLONG_MAX;
67 spin_lock(&domain->iotlb_lock);
68 vduse_iotlb_del_range(domain, start, last);
70 for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
71 map = vhost_iotlb_itree_next(map, start, last)) {
72 map_file = (struct vdpa_map_file *)map->opaque;
73 ret = vduse_iotlb_add_range(domain, map->start, map->last,
80 spin_unlock(&domain->iotlb_lock);
84 vduse_iotlb_del_range(domain, start, last);
85 spin_unlock(&domain->iotlb_lock);
89 void vduse_domain_clear_map(struct vduse_iova_domain *domain,
90 struct vhost_iotlb *iotlb)
92 struct vhost_iotlb_map *map;
93 u64 start = 0ULL, last = ULLONG_MAX;
95 spin_lock(&domain->iotlb_lock);
96 for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
97 map = vhost_iotlb_itree_next(map, start, last)) {
98 vduse_iotlb_del_range(domain, map->start, map->last);
100 spin_unlock(&domain->iotlb_lock);
103 static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
104 u64 iova, u64 size, u64 paddr)
106 struct vduse_bounce_map *map;
107 u64 last = iova + size - 1;
109 while (iova <= last) {
110 map = &domain->bounce_maps[iova >> PAGE_SHIFT];
111 if (!map->bounce_page) {
112 map->bounce_page = alloc_page(GFP_ATOMIC);
113 if (!map->bounce_page)
116 map->orig_phys = paddr;
123 static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
126 struct vduse_bounce_map *map;
127 u64 last = iova + size - 1;
129 while (iova <= last) {
130 map = &domain->bounce_maps[iova >> PAGE_SHIFT];
131 map->orig_phys = INVALID_PHYS_ADDR;
136 static void do_bounce(phys_addr_t orig, void *addr, size_t size,
137 enum dma_data_direction dir)
139 unsigned long pfn = PFN_DOWN(orig);
140 unsigned int offset = offset_in_page(orig);
145 sz = min_t(size_t, PAGE_SIZE - offset, size);
147 buffer = kmap_atomic(pfn_to_page(pfn));
148 if (dir == DMA_TO_DEVICE)
149 memcpy(addr, buffer + offset, sz);
151 memcpy(buffer + offset, addr, sz);
152 kunmap_atomic(buffer);
161 static void vduse_domain_bounce(struct vduse_iova_domain *domain,
162 dma_addr_t iova, size_t size,
163 enum dma_data_direction dir)
165 struct vduse_bounce_map *map;
170 if (iova >= domain->bounce_size)
174 map = &domain->bounce_maps[iova >> PAGE_SHIFT];
175 offset = offset_in_page(iova);
176 sz = min_t(size_t, PAGE_SIZE - offset, size);
178 if (WARN_ON(!map->bounce_page ||
179 map->orig_phys == INVALID_PHYS_ADDR))
182 addr = page_address(map->bounce_page) + offset;
183 do_bounce(map->orig_phys + offset, addr, sz, dir);
190 vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
192 u64 start = iova & PAGE_MASK;
193 u64 last = start + PAGE_SIZE - 1;
194 struct vhost_iotlb_map *map;
195 struct page *page = NULL;
197 spin_lock(&domain->iotlb_lock);
198 map = vhost_iotlb_itree_first(domain->iotlb, start, last);
202 page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
205 spin_unlock(&domain->iotlb_lock);
211 vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
213 struct vduse_bounce_map *map;
214 struct page *page = NULL;
216 spin_lock(&domain->iotlb_lock);
217 map = &domain->bounce_maps[iova >> PAGE_SHIFT];
218 if (!map->bounce_page)
221 page = map->bounce_page;
224 spin_unlock(&domain->iotlb_lock);
230 vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain)
232 struct vduse_bounce_map *map;
233 unsigned long pfn, bounce_pfns;
235 bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
237 for (pfn = 0; pfn < bounce_pfns; pfn++) {
238 map = &domain->bounce_maps[pfn];
239 if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
242 if (!map->bounce_page)
245 __free_page(map->bounce_page);
246 map->bounce_page = NULL;
250 void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
252 if (!domain->bounce_map)
255 spin_lock(&domain->iotlb_lock);
256 if (!domain->bounce_map)
259 vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1);
260 domain->bounce_map = 0;
262 spin_unlock(&domain->iotlb_lock);
265 static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
269 if (domain->bounce_map)
272 spin_lock(&domain->iotlb_lock);
273 if (domain->bounce_map)
276 ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1,
277 0, VHOST_MAP_RW, domain->file, 0);
281 domain->bounce_map = 1;
283 spin_unlock(&domain->iotlb_lock);
288 vduse_domain_alloc_iova(struct iova_domain *iovad,
289 unsigned long size, unsigned long limit)
291 unsigned long shift = iova_shift(iovad);
292 unsigned long iova_len = iova_align(iovad, size) >> shift;
293 unsigned long iova_pfn;
296 * Freeing non-power-of-two-sized allocations back into the IOVA caches
297 * will come back to bite us badly, so we have to waste a bit of space
298 * rounding up anything cacheable to make sure that can't happen. The
299 * order of the unadjusted size will still match upon freeing.
301 if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
302 iova_len = roundup_pow_of_two(iova_len);
303 iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
305 return iova_pfn << shift;
308 static void vduse_domain_free_iova(struct iova_domain *iovad,
309 dma_addr_t iova, size_t size)
311 unsigned long shift = iova_shift(iovad);
312 unsigned long iova_len = iova_align(iovad, size) >> shift;
314 free_iova_fast(iovad, iova >> shift, iova_len);
317 dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
318 struct page *page, unsigned long offset,
319 size_t size, enum dma_data_direction dir,
322 struct iova_domain *iovad = &domain->stream_iovad;
323 unsigned long limit = domain->bounce_size - 1;
324 phys_addr_t pa = page_to_phys(page) + offset;
325 dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
328 return DMA_MAPPING_ERROR;
330 if (vduse_domain_init_bounce_map(domain))
333 if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
336 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
337 vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
341 vduse_domain_free_iova(iovad, iova, size);
342 return DMA_MAPPING_ERROR;
345 void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
346 dma_addr_t dma_addr, size_t size,
347 enum dma_data_direction dir, unsigned long attrs)
349 struct iova_domain *iovad = &domain->stream_iovad;
351 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
352 vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
354 vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
355 vduse_domain_free_iova(iovad, dma_addr, size);
358 void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
359 size_t size, dma_addr_t *dma_addr,
360 gfp_t flag, unsigned long attrs)
362 struct iova_domain *iovad = &domain->consistent_iovad;
363 unsigned long limit = domain->iova_limit;
364 dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
365 void *orig = alloc_pages_exact(size, flag);
370 spin_lock(&domain->iotlb_lock);
371 if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
372 virt_to_phys(orig), VHOST_MAP_RW,
373 domain->file, (u64)iova)) {
374 spin_unlock(&domain->iotlb_lock);
377 spin_unlock(&domain->iotlb_lock);
383 *dma_addr = DMA_MAPPING_ERROR;
385 free_pages_exact(orig, size);
387 vduse_domain_free_iova(iovad, iova, size);
392 void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
393 void *vaddr, dma_addr_t dma_addr,
396 struct iova_domain *iovad = &domain->consistent_iovad;
397 struct vhost_iotlb_map *map;
398 struct vdpa_map_file *map_file;
401 spin_lock(&domain->iotlb_lock);
402 map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
403 (u64)dma_addr + size - 1);
405 spin_unlock(&domain->iotlb_lock);
408 map_file = (struct vdpa_map_file *)map->opaque;
409 fput(map_file->file);
412 vhost_iotlb_map_free(domain->iotlb, map);
413 spin_unlock(&domain->iotlb_lock);
415 vduse_domain_free_iova(iovad, dma_addr, size);
416 free_pages_exact(phys_to_virt(pa), size);
419 static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
421 struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
422 unsigned long iova = vmf->pgoff << PAGE_SHIFT;
426 return VM_FAULT_SIGBUS;
428 if (iova < domain->bounce_size)
429 page = vduse_domain_get_bounce_page(domain, iova);
431 page = vduse_domain_get_coherent_page(domain, iova);
434 return VM_FAULT_SIGBUS;
441 static const struct vm_operations_struct vduse_domain_mmap_ops = {
442 .fault = vduse_domain_mmap_fault,
445 static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
447 struct vduse_iova_domain *domain = file->private_data;
449 vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND;
450 vma->vm_private_data = domain;
451 vma->vm_ops = &vduse_domain_mmap_ops;
456 static int vduse_domain_release(struct inode *inode, struct file *file)
458 struct vduse_iova_domain *domain = file->private_data;
460 spin_lock(&domain->iotlb_lock);
461 vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
462 vduse_domain_free_bounce_pages(domain);
463 spin_unlock(&domain->iotlb_lock);
464 put_iova_domain(&domain->stream_iovad);
465 put_iova_domain(&domain->consistent_iovad);
466 vhost_iotlb_free(domain->iotlb);
467 vfree(domain->bounce_maps);
473 static const struct file_operations vduse_domain_fops = {
474 .owner = THIS_MODULE,
475 .mmap = vduse_domain_mmap,
476 .release = vduse_domain_release,
479 void vduse_domain_destroy(struct vduse_iova_domain *domain)
484 struct vduse_iova_domain *
485 vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
487 struct vduse_iova_domain *domain;
489 struct vduse_bounce_map *map;
490 unsigned long pfn, bounce_pfns;
492 bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
493 if (iova_limit <= bounce_size)
496 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
500 domain->iotlb = vhost_iotlb_alloc(0, 0);
504 domain->iova_limit = iova_limit;
505 domain->bounce_size = PAGE_ALIGN(bounce_size);
506 domain->bounce_maps = vzalloc(bounce_pfns *
507 sizeof(struct vduse_bounce_map));
508 if (!domain->bounce_maps)
511 for (pfn = 0; pfn < bounce_pfns; pfn++) {
512 map = &domain->bounce_maps[pfn];
513 map->orig_phys = INVALID_PHYS_ADDR;
515 file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops,
521 spin_lock_init(&domain->iotlb_lock);
522 init_iova_domain(&domain->stream_iovad,
523 PAGE_SIZE, IOVA_START_PFN);
524 init_iova_domain(&domain->consistent_iovad,
525 PAGE_SIZE, bounce_pfns);
529 vfree(domain->bounce_maps);
531 vhost_iotlb_free(domain->iotlb);
537 int vduse_domain_init(void)
539 return iova_cache_get();
542 void vduse_domain_exit(void)