2 * Copyright 2018 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 #include "nouveau_dmem.h"
23 #include "nouveau_drv.h"
24 #include "nouveau_chan.h"
25 #include "nouveau_dma.h"
26 #include "nouveau_mem.h"
27 #include "nouveau_bo.h"
28 #include "nouveau_svm.h"
30 #include <nvif/class.h>
31 #include <nvif/object.h>
32 #include <nvif/push906f.h>
33 #include <nvif/if000c.h>
34 #include <nvif/if500b.h>
35 #include <nvif/if900b.h>
36 #include <nvif/if000c.h>
38 #include <nvhw/class/cla0b5.h>
40 #include <linux/sched/mm.h>
41 #include <linux/hmm.h>
44 * FIXME: this is ugly right now we are using TTM to allocate vram and we pin
45 * it in vram while in use. We likely want to overhaul memory management for
46 * nouveau to be more page like (not necessarily with system page size but a
47 * bigger page size) at lowest level and have some shim layer on top that would
48 * provide the same functionality as TTM.
50 #define DMEM_CHUNK_SIZE (2UL << 20)
51 #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT)
59 typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages,
60 enum nouveau_aper, u64 dst_addr,
61 enum nouveau_aper, u64 src_addr);
62 typedef int (*nouveau_clear_page_t)(struct nouveau_drm *drm, u32 length,
63 enum nouveau_aper, u64 dst_addr);
65 struct nouveau_dmem_chunk {
66 struct list_head list;
67 struct nouveau_bo *bo;
68 struct nouveau_drm *drm;
69 unsigned long callocated;
70 struct dev_pagemap pagemap;
73 struct nouveau_dmem_migrate {
74 nouveau_migrate_copy_t copy_func;
75 nouveau_clear_page_t clear_func;
76 struct nouveau_channel *chan;
80 struct nouveau_drm *drm;
81 struct nouveau_dmem_migrate migrate;
82 struct list_head chunks;
84 struct page *free_pages;
88 static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page)
90 return container_of(page->pgmap, struct nouveau_dmem_chunk, pagemap);
93 static struct nouveau_drm *page_to_drm(struct page *page)
95 struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
100 unsigned long nouveau_dmem_page_addr(struct page *page)
102 struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
103 unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) -
104 chunk->pagemap.res.start;
106 return chunk->bo->offset + off;
109 static void nouveau_dmem_page_free(struct page *page)
111 struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
112 struct nouveau_dmem *dmem = chunk->drm->dmem;
114 spin_lock(&dmem->lock);
115 page->zone_device_data = dmem->free_pages;
116 dmem->free_pages = page;
118 WARN_ON(!chunk->callocated);
121 * FIXME when chunk->callocated reach 0 we should add the chunk to
122 * a reclaim list so that it can be freed in case of memory pressure.
124 spin_unlock(&dmem->lock);
127 static void nouveau_dmem_fence_done(struct nouveau_fence **fence)
130 nouveau_fence_wait(*fence, true, false);
131 nouveau_fence_unref(fence);
134 * FIXME wait for channel to be IDLE before calling finalizing
140 static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm,
141 struct vm_fault *vmf, struct migrate_vma *args,
142 dma_addr_t *dma_addr)
144 struct device *dev = drm->dev->dev;
145 struct page *dpage, *spage;
147 spage = migrate_pfn_to_page(args->src[0]);
148 if (!spage || !(args->src[0] & MIGRATE_PFN_MIGRATE))
151 dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
153 return VM_FAULT_SIGBUS;
156 *dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
157 if (dma_mapping_error(dev, *dma_addr))
158 goto error_free_page;
160 if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr,
161 NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage)))
162 goto error_dma_unmap;
164 args->dst[0] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
168 dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
171 return VM_FAULT_SIGBUS;
174 static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
176 struct nouveau_drm *drm = page_to_drm(vmf->page);
177 struct nouveau_dmem *dmem = drm->dmem;
178 struct nouveau_fence *fence;
179 unsigned long src = 0, dst = 0;
180 dma_addr_t dma_addr = 0;
182 struct migrate_vma args = {
184 .start = vmf->address,
185 .end = vmf->address + PAGE_SIZE,
188 .src_owner = drm->dev,
192 * FIXME what we really want is to find some heuristic to migrate more
193 * than just one page on CPU fault. When such fault happens it is very
194 * likely that more surrounding page will CPU fault too.
196 if (migrate_vma_setup(&args) < 0)
197 return VM_FAULT_SIGBUS;
201 ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr);
205 nouveau_fence_new(dmem->migrate.chan, false, &fence);
206 migrate_vma_pages(&args);
207 nouveau_dmem_fence_done(&fence);
208 dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
210 migrate_vma_finalize(&args);
214 static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
215 .page_free = nouveau_dmem_page_free,
216 .migrate_to_ram = nouveau_dmem_migrate_to_ram,
220 nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
222 struct nouveau_dmem_chunk *chunk;
223 struct resource *res;
226 unsigned long i, pfn_first;
229 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
235 /* Allocate unused physical address space for device private pages. */
236 res = request_free_mem_region(&iomem_resource, DMEM_CHUNK_SIZE,
244 chunk->pagemap.type = MEMORY_DEVICE_PRIVATE;
245 chunk->pagemap.res = *res;
246 chunk->pagemap.ops = &nouveau_dmem_pagemap_ops;
247 chunk->pagemap.owner = drm->dev;
249 ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0,
250 TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL,
255 ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
259 ptr = memremap_pages(&chunk->pagemap, numa_node_id());
265 mutex_lock(&drm->dmem->mutex);
266 list_add(&chunk->list, &drm->dmem->chunks);
267 mutex_unlock(&drm->dmem->mutex);
269 pfn_first = chunk->pagemap.res.start >> PAGE_SHIFT;
270 page = pfn_to_page(pfn_first);
271 spin_lock(&drm->dmem->lock);
272 for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) {
273 page->zone_device_data = drm->dmem->free_pages;
274 drm->dmem->free_pages = page;
278 spin_unlock(&drm->dmem->lock);
280 NV_INFO(drm, "DMEM: registered %ldMB of device memory\n",
281 DMEM_CHUNK_SIZE >> 20);
286 nouveau_bo_unpin(chunk->bo);
288 nouveau_bo_ref(NULL, &chunk->bo);
290 release_mem_region(chunk->pagemap.res.start,
291 resource_size(&chunk->pagemap.res));
299 nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
301 struct nouveau_dmem_chunk *chunk;
302 struct page *page = NULL;
305 spin_lock(&drm->dmem->lock);
306 if (drm->dmem->free_pages) {
307 page = drm->dmem->free_pages;
308 drm->dmem->free_pages = page->zone_device_data;
309 chunk = nouveau_page_to_chunk(page);
311 spin_unlock(&drm->dmem->lock);
313 spin_unlock(&drm->dmem->lock);
314 ret = nouveau_dmem_chunk_alloc(drm, &page);
325 nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page)
332 nouveau_dmem_resume(struct nouveau_drm *drm)
334 struct nouveau_dmem_chunk *chunk;
337 if (drm->dmem == NULL)
340 mutex_lock(&drm->dmem->mutex);
341 list_for_each_entry(chunk, &drm->dmem->chunks, list) {
342 ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
343 /* FIXME handle pin failure */
346 mutex_unlock(&drm->dmem->mutex);
350 nouveau_dmem_suspend(struct nouveau_drm *drm)
352 struct nouveau_dmem_chunk *chunk;
354 if (drm->dmem == NULL)
357 mutex_lock(&drm->dmem->mutex);
358 list_for_each_entry(chunk, &drm->dmem->chunks, list)
359 nouveau_bo_unpin(chunk->bo);
360 mutex_unlock(&drm->dmem->mutex);
364 nouveau_dmem_fini(struct nouveau_drm *drm)
366 struct nouveau_dmem_chunk *chunk, *tmp;
368 if (drm->dmem == NULL)
371 mutex_lock(&drm->dmem->mutex);
373 list_for_each_entry_safe(chunk, tmp, &drm->dmem->chunks, list) {
374 nouveau_bo_unpin(chunk->bo);
375 nouveau_bo_ref(NULL, &chunk->bo);
376 list_del(&chunk->list);
377 memunmap_pages(&chunk->pagemap);
378 release_mem_region(chunk->pagemap.res.start,
379 resource_size(&chunk->pagemap.res));
383 mutex_unlock(&drm->dmem->mutex);
387 nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages,
388 enum nouveau_aper dst_aper, u64 dst_addr,
389 enum nouveau_aper src_aper, u64 src_addr)
391 struct nvif_push *push = drm->dmem->migrate.chan->chan.push;
395 ret = PUSH_WAIT(push, 13);
399 if (src_aper != NOUVEAU_APER_VIRT) {
401 case NOUVEAU_APER_VRAM:
402 PUSH_IMMD(push, NVA0B5, SET_SRC_PHYS_MODE,
403 NVDEF(NVA0B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB));
405 case NOUVEAU_APER_HOST:
406 PUSH_IMMD(push, NVA0B5, SET_SRC_PHYS_MODE,
407 NVDEF(NVA0B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM));
413 launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL);
416 if (dst_aper != NOUVEAU_APER_VIRT) {
418 case NOUVEAU_APER_VRAM:
419 PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE,
420 NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
422 case NOUVEAU_APER_HOST:
423 PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE,
424 NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
430 launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
433 PUSH_MTHD(push, NVA0B5, OFFSET_IN_UPPER,
434 NVVAL(NVA0B5, OFFSET_IN_UPPER, UPPER, upper_32_bits(src_addr)),
436 OFFSET_IN_LOWER, lower_32_bits(src_addr),
439 NVVAL(NVA0B5, OFFSET_OUT_UPPER, UPPER, upper_32_bits(dst_addr)),
441 OFFSET_OUT_LOWER, lower_32_bits(dst_addr),
443 PITCH_OUT, PAGE_SIZE,
444 LINE_LENGTH_IN, PAGE_SIZE,
447 PUSH_MTHD(push, NVA0B5, LAUNCH_DMA, launch_dma |
448 NVDEF(NVA0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED) |
449 NVDEF(NVA0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE) |
450 NVDEF(NVA0B5, LAUNCH_DMA, SEMAPHORE_TYPE, NONE) |
451 NVDEF(NVA0B5, LAUNCH_DMA, INTERRUPT_TYPE, NONE) |
452 NVDEF(NVA0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
453 NVDEF(NVA0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
454 NVDEF(NVA0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, TRUE) |
455 NVDEF(NVA0B5, LAUNCH_DMA, REMAP_ENABLE, FALSE) |
456 NVDEF(NVA0B5, LAUNCH_DMA, BYPASS_L2, USE_PTE_SETTING));
461 nvc0b5_migrate_clear(struct nouveau_drm *drm, u32 length,
462 enum nouveau_aper dst_aper, u64 dst_addr)
464 struct nvif_push *push = drm->dmem->migrate.chan->chan.push;
465 u32 launch_dma = (1 << 10) /* REMAP_ENABLE_TRUE */ |
466 (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ |
467 (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ |
468 (1 << 2) /* FLUSH_ENABLE_TRUE. */ |
469 (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */;
470 u32 remap = (4 << 0) /* DST_X_CONST_A */ |
471 (5 << 4) /* DST_Y_CONST_B */ |
472 (3 << 16) /* COMPONENT_SIZE_FOUR */ |
473 (1 << 24) /* NUM_DST_COMPONENTS_TWO */;
476 ret = PUSH_WAIT(push, 12);
481 case NOUVEAU_APER_VRAM:
482 PUSH_NVIM(push, NVA0B5, 0x0264, 0);
484 case NOUVEAU_APER_HOST:
485 PUSH_NVIM(push, NVA0B5, 0x0264, 1);
490 launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */
492 PUSH_NVSQ(push, NVA0B5, 0x0700, 0,
495 PUSH_NVSQ(push, NVA0B5, 0x0408, upper_32_bits(dst_addr),
496 0x040c, lower_32_bits(dst_addr));
497 PUSH_NVSQ(push, NVA0B5, 0x0418, length >> 3);
498 PUSH_NVSQ(push, NVA0B5, 0x0300, launch_dma);
503 nouveau_dmem_migrate_init(struct nouveau_drm *drm)
505 switch (drm->ttm.copy.oclass) {
506 case PASCAL_DMA_COPY_A:
507 case PASCAL_DMA_COPY_B:
508 case VOLTA_DMA_COPY_A:
509 case TURING_DMA_COPY_A:
510 drm->dmem->migrate.copy_func = nvc0b5_migrate_copy;
511 drm->dmem->migrate.clear_func = nvc0b5_migrate_clear;
512 drm->dmem->migrate.chan = drm->ttm.chan;
521 nouveau_dmem_init(struct nouveau_drm *drm)
525 /* This only make sense on PASCAL or newer */
526 if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL)
529 if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL)))
532 drm->dmem->drm = drm;
533 mutex_init(&drm->dmem->mutex);
534 INIT_LIST_HEAD(&drm->dmem->chunks);
535 mutex_init(&drm->dmem->mutex);
536 spin_lock_init(&drm->dmem->lock);
538 /* Initialize migration dma helpers before registering memory */
539 ret = nouveau_dmem_migrate_init(drm);
546 static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
547 unsigned long src, dma_addr_t *dma_addr, u64 *pfn)
549 struct device *dev = drm->dev->dev;
550 struct page *dpage, *spage;
553 spage = migrate_pfn_to_page(src);
554 if (!(src & MIGRATE_PFN_MIGRATE))
557 dpage = nouveau_dmem_page_alloc_locked(drm);
561 paddr = nouveau_dmem_page_addr(dpage);
563 *dma_addr = dma_map_page(dev, spage, 0, page_size(spage),
565 if (dma_mapping_error(dev, *dma_addr))
567 if (drm->dmem->migrate.copy_func(drm, 1,
568 NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST, *dma_addr))
571 *dma_addr = DMA_MAPPING_ERROR;
572 if (drm->dmem->migrate.clear_func(drm, page_size(dpage),
573 NOUVEAU_APER_VRAM, paddr))
577 *pfn = NVIF_VMM_PFNMAP_V0_V | NVIF_VMM_PFNMAP_V0_VRAM |
578 ((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT);
579 if (src & MIGRATE_PFN_WRITE)
580 *pfn |= NVIF_VMM_PFNMAP_V0_W;
581 return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
584 dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
586 nouveau_dmem_page_free_locked(drm, dpage);
588 *pfn = NVIF_VMM_PFNMAP_V0_NONE;
592 static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
593 struct nouveau_svmm *svmm, struct migrate_vma *args,
594 dma_addr_t *dma_addrs, u64 *pfns)
596 struct nouveau_fence *fence;
597 unsigned long addr = args->start, nr_dma = 0, i;
599 for (i = 0; addr < args->end; i++) {
600 args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->src[i],
601 dma_addrs + nr_dma, pfns + i);
602 if (!dma_mapping_error(drm->dev->dev, dma_addrs[nr_dma]))
607 nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
608 migrate_vma_pages(args);
609 nouveau_dmem_fence_done(&fence);
610 nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
613 dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE,
616 migrate_vma_finalize(args);
620 nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
621 struct nouveau_svmm *svmm,
622 struct vm_area_struct *vma,
626 unsigned long npages = (end - start) >> PAGE_SHIFT;
627 unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages);
628 dma_addr_t *dma_addrs;
629 struct migrate_vma args = {
637 if (drm->dmem == NULL)
640 args.src = kcalloc(max, sizeof(*args.src), GFP_KERNEL);
643 args.dst = kcalloc(max, sizeof(*args.dst), GFP_KERNEL);
647 dma_addrs = kmalloc_array(max, sizeof(*dma_addrs), GFP_KERNEL);
651 pfns = nouveau_pfns_alloc(max);
655 for (i = 0; i < npages; i += max) {
656 args.end = start + (max << PAGE_SHIFT);
657 ret = migrate_vma_setup(&args);
662 nouveau_dmem_migrate_chunk(drm, svmm, &args, dma_addrs,
664 args.start = args.end;
669 nouveau_pfns_free(pfns);