1 // SPDX-License-Identifier: MIT
3 * Copyright © 2021 Intel Corporation
8 #include <linux/dma-buf.h>
10 #include <drm/drm_drv.h>
11 #include <drm/drm_gem_ttm_helper.h>
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_device.h>
14 #include <drm/ttm/ttm_placement.h>
15 #include <drm/ttm/ttm_tt.h>
16 #include <drm/xe_drm.h>
18 #include "xe_device.h"
19 #include "xe_dma_buf.h"
20 #include "xe_drm_client.h"
24 #include "xe_migrate.h"
25 #include "xe_preempt_fence.h"
26 #include "xe_res_cursor.h"
28 #include "xe_ttm_stolen_mgr.h"
31 static const struct ttm_place sys_placement_flags = {
34 .mem_type = XE_PL_SYSTEM,
38 static struct ttm_placement sys_placement = {
40 .placement = &sys_placement_flags,
41 .num_busy_placement = 1,
42 .busy_placement = &sys_placement_flags,
45 static const struct ttm_place tt_placement_flags = {
52 static struct ttm_placement tt_placement = {
54 .placement = &tt_placement_flags,
55 .num_busy_placement = 1,
56 .busy_placement = &sys_placement_flags,
59 bool mem_type_is_vram(u32 mem_type)
61 return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
64 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
66 return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
69 static bool resource_is_vram(struct ttm_resource *res)
71 return mem_type_is_vram(res->mem_type);
74 bool xe_bo_is_vram(struct xe_bo *bo)
76 return resource_is_vram(bo->ttm.resource) ||
77 resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
80 bool xe_bo_is_stolen(struct xe_bo *bo)
82 return bo->ttm.resource->mem_type == XE_PL_STOLEN;
86 * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
89 * The stolen memory is accessed through the PCI BAR for both DGFX and some
90 * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
92 * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
94 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
96 return xe_bo_is_stolen(bo) &&
97 GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
100 static bool xe_bo_is_user(struct xe_bo *bo)
102 return bo->flags & XE_BO_CREATE_USER_BIT;
105 static struct xe_migrate *
106 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
108 struct xe_tile *tile;
110 xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
111 tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
112 return tile->migrate;
115 static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
117 struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
118 struct ttm_resource_manager *mgr;
120 xe_assert(xe, resource_is_vram(res));
121 mgr = ttm_manager_type(&xe->ttm, res->mem_type);
122 return to_xe_ttm_vram_mgr(mgr)->vram;
125 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
126 u32 bo_flags, u32 *c)
128 xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
130 if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
131 bo->placements[*c] = (struct ttm_place) {
132 .mem_type = XE_PL_TT,
136 if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
137 bo->props.preferred_mem_type = XE_PL_TT;
141 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
142 struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
144 struct ttm_place place = { .mem_type = mem_type };
145 struct xe_mem_region *vram;
148 vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
149 xe_assert(xe, vram && vram->usable_size);
150 io_size = vram->io_size;
153 * For eviction / restore on suspend / resume objects
154 * pinned in VRAM must be contiguous
156 if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
157 XE_BO_CREATE_GGTT_BIT))
158 place.flags |= TTM_PL_FLAG_CONTIGUOUS;
160 if (io_size < vram->usable_size) {
161 if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
163 place.lpfn = io_size >> PAGE_SHIFT;
165 place.flags |= TTM_PL_FLAG_TOPDOWN;
171 if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
172 bo->props.preferred_mem_type = mem_type;
175 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
176 u32 bo_flags, u32 *c)
178 xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
180 if (bo->props.preferred_gt == XE_GT1) {
181 if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
182 add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
183 if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
184 add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
186 if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
187 add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
188 if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
189 add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
193 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
194 u32 bo_flags, u32 *c)
196 xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
198 if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
199 bo->placements[*c] = (struct ttm_place) {
200 .mem_type = XE_PL_STOLEN,
201 .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
202 XE_BO_CREATE_GGTT_BIT) ?
203 TTM_PL_FLAG_CONTIGUOUS : 0,
209 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
214 bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
216 /* The order of placements should indicate preferred location */
218 if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
219 try_add_system(xe, bo, bo_flags, &c);
220 try_add_vram(xe, bo, bo_flags, &c);
222 try_add_vram(xe, bo, bo_flags, &c);
223 try_add_system(xe, bo, bo_flags, &c);
225 try_add_stolen(xe, bo, bo_flags, &c);
230 bo->placement = (struct ttm_placement) {
232 .placement = bo->placements,
233 .num_busy_placement = c,
234 .busy_placement = bo->placements,
240 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
243 xe_bo_assert_held(bo);
244 return __xe_bo_placement_for_flags(xe, bo, bo_flags);
247 static void xe_evict_flags(struct ttm_buffer_object *tbo,
248 struct ttm_placement *placement)
250 if (!xe_bo_is_xe_bo(tbo)) {
251 /* Don't handle scatter gather BOs */
252 if (tbo->type == ttm_bo_type_sg) {
253 placement->num_placement = 0;
254 placement->num_busy_placement = 0;
258 *placement = sys_placement;
263 * For xe, sg bos that are evicted to system just triggers a
264 * rebind of the sg list upon subsequent validation to XE_PL_TT.
266 switch (tbo->resource->mem_type) {
270 *placement = tt_placement;
274 *placement = sys_placement;
286 static int xe_tt_map_sg(struct ttm_tt *tt)
288 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
289 unsigned long num_pages = tt->num_pages;
292 XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
297 ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
299 (u64)num_pages << PAGE_SHIFT,
300 xe_sg_segment_size(xe_tt->dev),
305 xe_tt->sg = &xe_tt->sgt;
306 ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
307 DMA_ATTR_SKIP_CPU_SYNC);
309 sg_free_table(xe_tt->sg);
317 struct sg_table *xe_bo_sg(struct xe_bo *bo)
319 struct ttm_tt *tt = bo->ttm.ttm;
320 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
325 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
328 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
329 struct xe_device *xe = xe_bo_device(bo);
330 struct xe_ttm_tt *tt;
331 unsigned long extra_pages;
332 enum ttm_caching caching;
335 tt = kzalloc(sizeof(*tt), GFP_KERNEL);
339 tt->dev = xe->drm.dev;
342 if (xe_bo_needs_ccs_pages(bo))
343 extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
346 switch (bo->cpu_caching) {
347 case DRM_XE_GEM_CPU_CACHING_WC:
348 caching = ttm_write_combined;
351 caching = ttm_cached;
355 WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching);
358 * Display scanout is always non-coherent with the CPU cache.
360 * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
361 * require a CPU:WC mapping.
363 if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) ||
364 (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
365 caching = ttm_write_combined;
367 err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
376 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
377 struct ttm_operation_ctx *ctx)
382 * dma-bufs are not populated with pages, and the dma-
383 * addresses are set up when moved to XE_PL_TT.
385 if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
388 err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
392 /* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
393 err = xe_tt_map_sg(tt);
395 ttm_pool_free(&ttm_dev->pool, tt);
400 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
402 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
404 if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
408 dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
409 DMA_BIDIRECTIONAL, 0);
410 sg_free_table(xe_tt->sg);
414 return ttm_pool_free(&ttm_dev->pool, tt);
417 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
423 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
424 struct ttm_resource *mem)
426 struct xe_device *xe = ttm_to_xe_device(bdev);
428 switch (mem->mem_type) {
434 struct xe_ttm_vram_mgr_resource *vres =
435 to_xe_ttm_vram_mgr_resource(mem);
436 struct xe_mem_region *vram = res_to_mem_region(mem);
438 if (vres->used_visible_size < mem->size)
441 mem->bus.offset = mem->start << PAGE_SHIFT;
444 mem->placement & TTM_PL_FLAG_CONTIGUOUS)
445 mem->bus.addr = (u8 __force *)vram->mapping +
448 mem->bus.offset += vram->io_start;
449 mem->bus.is_iomem = true;
451 #if !defined(CONFIG_X86)
452 mem->bus.caching = ttm_write_combined;
456 return xe_ttm_stolen_io_mem_reserve(xe, mem);
462 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
463 const struct ttm_operation_ctx *ctx)
465 struct dma_resv_iter cursor;
466 struct dma_fence *fence;
467 struct drm_gem_object *obj = &bo->ttm.base;
468 struct drm_gpuvm_bo *vm_bo;
472 dma_resv_assert_held(bo->ttm.base.resv);
474 if (!list_empty(&bo->ttm.base.gpuva.list)) {
475 dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
476 DMA_RESV_USAGE_BOOKKEEP);
477 dma_resv_for_each_fence_unlocked(&cursor, fence)
478 dma_fence_enable_sw_signaling(fence);
479 dma_resv_iter_end(&cursor);
482 drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
483 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
484 struct drm_gpuva *gpuva;
486 if (!xe_vm_in_fault_mode(vm)) {
487 drm_gpuvm_bo_evict(vm_bo, true);
494 if (ctx->no_wait_gpu &&
495 !dma_resv_test_signaled(bo->ttm.base.resv,
496 DMA_RESV_USAGE_BOOKKEEP))
499 timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
500 DMA_RESV_USAGE_BOOKKEEP,
502 MAX_SCHEDULE_TIMEOUT);
511 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
512 struct xe_vma *vma = gpuva_to_vma(gpuva);
514 trace_xe_vma_evict(vma);
515 ret = xe_vm_invalidate_vma(vma);
525 * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
526 * Note that unmapping the attachment is deferred to the next
527 * map_attachment time, or to bo destroy (after idling) whichever comes first.
528 * This is to avoid syncing before unmap_attachment(), assuming that the
529 * caller relies on idling the reservation object before moving the
530 * backing store out. Should that assumption not hold, then we will be able
531 * to unconditionally call unmap_attachment() when moving out to system.
533 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
534 struct ttm_resource *new_res)
536 struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
537 struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
539 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
542 xe_assert(xe, attach);
543 xe_assert(xe, ttm_bo->ttm);
545 if (new_res->mem_type == XE_PL_SYSTEM)
549 dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
553 sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
561 ttm_bo_move_null(ttm_bo, new_res);
567 * xe_bo_move_notify - Notify subsystems of a pending move
568 * @bo: The buffer object
569 * @ctx: The struct ttm_operation_ctx controlling locking and waits.
571 * This function notifies subsystems of an upcoming buffer move.
572 * Upon receiving such a notification, subsystems should schedule
573 * halting access to the underlying pages and optionally add a fence
574 * to the buffer object's dma_resv object, that signals when access is
575 * stopped. The caller will wait on all dma_resv fences before
578 * A subsystem may commence access to the object after obtaining
579 * bindings to the new backing memory under the object lock.
581 * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
582 * negative error code on error.
584 static int xe_bo_move_notify(struct xe_bo *bo,
585 const struct ttm_operation_ctx *ctx)
587 struct ttm_buffer_object *ttm_bo = &bo->ttm;
588 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
589 struct ttm_resource *old_mem = ttm_bo->resource;
590 u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
594 * If this starts to call into many components, consider
595 * using a notification chain here.
598 if (xe_bo_is_pinned(bo))
602 ret = xe_bo_trigger_rebind(xe, bo, ctx);
606 /* Don't call move_notify() for imported dma-bufs. */
607 if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
608 dma_buf_move_notify(ttm_bo->base.dma_buf);
611 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
612 * so if we moved from VRAM make sure to unlink this from the userfault
615 if (mem_type_is_vram(old_mem_type)) {
616 mutex_lock(&xe->mem_access.vram_userfault.lock);
617 if (!list_empty(&bo->vram_userfault_link))
618 list_del_init(&bo->vram_userfault_link);
619 mutex_unlock(&xe->mem_access.vram_userfault.lock);
625 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
626 struct ttm_operation_ctx *ctx,
627 struct ttm_resource *new_mem,
628 struct ttm_place *hop)
630 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
631 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
632 struct ttm_resource *old_mem = ttm_bo->resource;
633 u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
634 struct ttm_tt *ttm = ttm_bo->ttm;
635 struct xe_migrate *migrate = NULL;
636 struct dma_fence *fence;
637 bool move_lacks_source;
640 bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
641 ttm && ttm_tt_is_populated(ttm)) ? true : false;
643 /* Bo creation path, moving to system or TT. */
644 if ((!old_mem && ttm) && !handle_system_ccs) {
645 ttm_bo_move_null(ttm_bo, new_mem);
649 if (ttm_bo->type == ttm_bo_type_sg) {
650 ret = xe_bo_move_notify(bo, ctx);
652 ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
656 tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
657 (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
659 move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) :
660 (!mem_type_is_vram(old_mem_type) && !tt_has_data);
662 needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
663 (!ttm && ttm_bo->type == ttm_bo_type_device);
665 if ((move_lacks_source && !needs_clear)) {
666 ttm_bo_move_null(ttm_bo, new_mem);
670 if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
671 ttm_bo_move_null(ttm_bo, new_mem);
676 * Failed multi-hop where the old_mem is still marked as
677 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
679 if (old_mem_type == XE_PL_TT &&
680 new_mem->mem_type == XE_PL_TT) {
681 ttm_bo_move_null(ttm_bo, new_mem);
685 if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
686 ret = xe_bo_move_notify(bo, ctx);
691 if (old_mem_type == XE_PL_TT &&
692 new_mem->mem_type == XE_PL_SYSTEM) {
693 long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
694 DMA_RESV_USAGE_BOOKKEEP,
696 MAX_SCHEDULE_TIMEOUT);
702 if (!handle_system_ccs) {
703 ttm_bo_move_null(ttm_bo, new_mem);
708 if (!move_lacks_source &&
709 ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
710 (mem_type_is_vram(old_mem_type) &&
711 new_mem->mem_type == XE_PL_SYSTEM))) {
714 hop->mem_type = XE_PL_TT;
715 hop->flags = TTM_PL_FLAG_TEMPORARY;
721 migrate = bo->tile->migrate;
722 else if (resource_is_vram(new_mem))
723 migrate = mem_type_to_migrate(xe, new_mem->mem_type);
724 else if (mem_type_is_vram(old_mem_type))
725 migrate = mem_type_to_migrate(xe, old_mem_type);
727 migrate = xe->tiles[0].migrate;
729 xe_assert(xe, migrate);
731 trace_xe_bo_move(bo);
732 xe_device_mem_access_get(xe);
734 if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
736 * Kernel memory that is pinned should only be moved on suspend
737 * / resume, some of the pinned memory is required for the
738 * device to resume / use the GPU to move other evicted memory
739 * (user memory) around. This likely could be optimized a bit
740 * futher where we find the minimum set of pinned memory
741 * required for resume but for simplity doing a memcpy for all
744 ret = xe_bo_vmap(bo);
746 ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
748 /* Create a new VMAP once kernel BO back in VRAM */
749 if (!ret && resource_is_vram(new_mem)) {
750 struct xe_mem_region *vram = res_to_mem_region(new_mem);
751 void __iomem *new_addr = vram->mapping +
752 (new_mem->start << PAGE_SHIFT);
754 if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
756 xe_device_mem_access_put(xe);
760 xe_assert(xe, new_mem->start ==
761 bo->placements->fpfn);
763 iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
767 if (move_lacks_source)
768 fence = xe_migrate_clear(migrate, bo, new_mem);
770 fence = xe_migrate_copy(migrate, bo, bo, old_mem,
771 new_mem, handle_system_ccs);
773 ret = PTR_ERR(fence);
774 xe_device_mem_access_put(xe);
777 if (!move_lacks_source) {
778 ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
781 dma_fence_wait(fence, false);
782 ttm_bo_move_null(ttm_bo, new_mem);
787 * ttm_bo_move_accel_cleanup() may blow up if
788 * bo->resource == NULL, so just attach the
789 * fence and set the new resource.
791 dma_resv_add_fence(ttm_bo->base.resv, fence,
792 DMA_RESV_USAGE_KERNEL);
793 ttm_bo_move_null(ttm_bo, new_mem);
796 dma_fence_put(fence);
799 xe_device_mem_access_put(xe);
807 * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
808 * @bo: The buffer object to move.
810 * On successful completion, the object memory will be moved to sytem memory.
811 * This function blocks until the object has been fully moved.
813 * This is needed to for special handling of pinned VRAM object during
816 * Return: 0 on success. Negative error code on failure.
818 int xe_bo_evict_pinned(struct xe_bo *bo)
820 struct ttm_place place = {
821 .mem_type = XE_PL_TT,
823 struct ttm_placement placement = {
827 struct ttm_operation_ctx ctx = {
828 .interruptible = false,
830 struct ttm_resource *new_mem;
833 xe_bo_assert_held(bo);
835 if (WARN_ON(!bo->ttm.resource))
838 if (WARN_ON(!xe_bo_is_pinned(bo)))
841 if (WARN_ON(!xe_bo_is_vram(bo)))
844 ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
849 bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
856 ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
860 ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
864 ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
868 dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
869 false, MAX_SCHEDULE_TIMEOUT);
874 ttm_resource_free(&bo->ttm, &new_mem);
879 * xe_bo_restore_pinned() - Restore a pinned VRAM object
880 * @bo: The buffer object to move.
882 * On successful completion, the object memory will be moved back to VRAM.
883 * This function blocks until the object has been fully moved.
885 * This is needed to for special handling of pinned VRAM object during
888 * Return: 0 on success. Negative error code on failure.
890 int xe_bo_restore_pinned(struct xe_bo *bo)
892 struct ttm_operation_ctx ctx = {
893 .interruptible = false,
895 struct ttm_resource *new_mem;
898 xe_bo_assert_held(bo);
900 if (WARN_ON(!bo->ttm.resource))
903 if (WARN_ON(!xe_bo_is_pinned(bo)))
906 if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
909 ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
913 ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
917 ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
921 ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
925 dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
926 false, MAX_SCHEDULE_TIMEOUT);
931 ttm_resource_free(&bo->ttm, &new_mem);
935 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
936 unsigned long page_offset)
938 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
939 struct xe_res_cursor cursor;
940 struct xe_mem_region *vram;
942 if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
943 return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
945 vram = res_to_mem_region(ttm_bo->resource);
946 xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
947 return (vram->io_start + cursor.start) >> PAGE_SHIFT;
950 static void __xe_bo_vunmap(struct xe_bo *bo);
953 * TODO: Move this function to TTM so we don't rely on how TTM does its
954 * locking, thereby abusing TTM internals.
956 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
958 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
961 xe_assert(xe, !kref_read(&ttm_bo->kref));
964 * We can typically only race with TTM trylocking under the
965 * lru_lock, which will immediately be unlocked again since
966 * the ttm_bo refcount is zero at this point. So trylocking *should*
967 * always succeed here, as long as we hold the lru lock.
969 spin_lock(&ttm_bo->bdev->lru_lock);
970 locked = dma_resv_trylock(ttm_bo->base.resv);
971 spin_unlock(&ttm_bo->bdev->lru_lock);
972 xe_assert(xe, locked);
977 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
979 struct dma_resv_iter cursor;
980 struct dma_fence *fence;
981 struct dma_fence *replacement = NULL;
984 if (!xe_bo_is_xe_bo(ttm_bo))
987 bo = ttm_to_xe_bo(ttm_bo);
988 xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
991 * Corner case where TTM fails to allocate memory and this BOs resv
992 * still points the VMs resv
994 if (ttm_bo->base.resv != &ttm_bo->base._resv)
997 if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1001 * Scrub the preempt fences if any. The unbind fence is already
1002 * attached to the resv.
1003 * TODO: Don't do this for external bos once we scrub them after
1006 dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1007 DMA_RESV_USAGE_BOOKKEEP, fence) {
1008 if (xe_fence_is_xe_preempt(fence) &&
1009 !dma_fence_is_signaled(fence)) {
1011 replacement = dma_fence_get_stub();
1013 dma_resv_replace_fences(ttm_bo->base.resv,
1016 DMA_RESV_USAGE_BOOKKEEP);
1019 dma_fence_put(replacement);
1021 dma_resv_unlock(ttm_bo->base.resv);
1024 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1026 if (!xe_bo_is_xe_bo(ttm_bo))
1030 * Object is idle and about to be destroyed. Release the
1031 * dma-buf attachment.
1033 if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1034 struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1035 struct xe_ttm_tt, ttm);
1037 dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1044 struct ttm_device_funcs xe_ttm_funcs = {
1045 .ttm_tt_create = xe_ttm_tt_create,
1046 .ttm_tt_populate = xe_ttm_tt_populate,
1047 .ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1048 .ttm_tt_destroy = xe_ttm_tt_destroy,
1049 .evict_flags = xe_evict_flags,
1051 .io_mem_reserve = xe_ttm_io_mem_reserve,
1052 .io_mem_pfn = xe_ttm_io_mem_pfn,
1053 .release_notify = xe_ttm_bo_release_notify,
1054 .eviction_valuable = ttm_bo_eviction_valuable,
1055 .delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1058 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1060 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1061 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1063 if (bo->ttm.base.import_attach)
1064 drm_prime_gem_destroy(&bo->ttm.base, NULL);
1065 drm_gem_object_release(&bo->ttm.base);
1067 xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1069 if (bo->ggtt_node.size)
1070 xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
1072 #ifdef CONFIG_PROC_FS
1074 xe_drm_client_remove_bo(bo);
1077 if (bo->vm && xe_bo_is_user(bo))
1080 mutex_lock(&xe->mem_access.vram_userfault.lock);
1081 if (!list_empty(&bo->vram_userfault_link))
1082 list_del(&bo->vram_userfault_link);
1083 mutex_unlock(&xe->mem_access.vram_userfault.lock);
1088 static void xe_gem_object_free(struct drm_gem_object *obj)
1090 /* Our BO reference counting scheme works as follows:
1092 * The gem object kref is typically used throughout the driver,
1093 * and the gem object holds a ttm_buffer_object refcount, so
1094 * that when the last gem object reference is put, which is when
1095 * we end up in this function, we put also that ttm_buffer_object
1096 * refcount. Anything using gem interfaces is then no longer
1097 * allowed to access the object in a way that requires a gem
1098 * refcount, including locking the object.
1100 * driver ttm callbacks is allowed to use the ttm_buffer_object
1101 * refcount directly if needed.
1103 __xe_bo_vunmap(gem_to_xe_bo(obj));
1104 ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1107 static void xe_gem_object_close(struct drm_gem_object *obj,
1108 struct drm_file *file_priv)
1110 struct xe_bo *bo = gem_to_xe_bo(obj);
1112 if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1113 xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1115 xe_bo_lock(bo, false);
1116 ttm_bo_set_bulk_move(&bo->ttm, NULL);
1121 static bool should_migrate_to_system(struct xe_bo *bo)
1123 struct xe_device *xe = xe_bo_device(bo);
1125 return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
1128 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1130 struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1131 struct drm_device *ddev = tbo->base.dev;
1132 struct xe_device *xe = to_xe_device(ddev);
1133 struct xe_bo *bo = ttm_to_xe_bo(tbo);
1134 bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK;
1139 xe_device_mem_access_get(xe);
1141 ret = ttm_bo_vm_reserve(tbo, vmf);
1145 if (drm_dev_enter(ddev, &idx)) {
1146 trace_xe_bo_cpu_fault(bo);
1148 if (should_migrate_to_system(bo)) {
1149 r = xe_bo_migrate(bo, XE_PL_TT);
1150 if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
1151 ret = VM_FAULT_NOPAGE;
1153 ret = VM_FAULT_SIGBUS;
1156 ret = ttm_bo_vm_fault_reserved(vmf,
1157 vmf->vma->vm_page_prot,
1158 TTM_BO_VM_NUM_PREFAULT);
1161 ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1164 if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1167 * ttm_bo_vm_reserve() already has dma_resv_lock.
1169 if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
1170 mutex_lock(&xe->mem_access.vram_userfault.lock);
1171 if (list_empty(&bo->vram_userfault_link))
1172 list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
1173 mutex_unlock(&xe->mem_access.vram_userfault.lock);
1176 dma_resv_unlock(tbo->base.resv);
1179 xe_device_mem_access_put(xe);
1184 static const struct vm_operations_struct xe_gem_vm_ops = {
1185 .fault = xe_gem_fault,
1186 .open = ttm_bo_vm_open,
1187 .close = ttm_bo_vm_close,
1188 .access = ttm_bo_vm_access
1191 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1192 .free = xe_gem_object_free,
1193 .close = xe_gem_object_close,
1194 .mmap = drm_gem_ttm_mmap,
1195 .export = xe_gem_prime_export,
1196 .vm_ops = &xe_gem_vm_ops,
1200 * xe_bo_alloc - Allocate storage for a struct xe_bo
1202 * This funcition is intended to allocate storage to be used for input
1203 * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1204 * created is needed before the call to __xe_bo_create_locked().
1205 * If __xe_bo_create_locked ends up never to be called, then the
1206 * storage allocated with this function needs to be freed using
1209 * Return: A pointer to an uninitialized struct xe_bo on success,
1210 * ERR_PTR(-ENOMEM) on error.
1212 struct xe_bo *xe_bo_alloc(void)
1214 struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1217 return ERR_PTR(-ENOMEM);
1223 * xe_bo_free - Free storage allocated using xe_bo_alloc()
1224 * @bo: The buffer object storage.
1226 * Refer to xe_bo_alloc() documentation for valid use-cases.
1228 void xe_bo_free(struct xe_bo *bo)
1233 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1234 struct xe_tile *tile, struct dma_resv *resv,
1235 struct ttm_lru_bulk_move *bulk, size_t size,
1236 u16 cpu_caching, enum ttm_bo_type type,
1239 struct ttm_operation_ctx ctx = {
1240 .interruptible = true,
1241 .no_wait_gpu = false,
1243 struct ttm_placement *placement;
1245 size_t aligned_size;
1248 /* Only kernel objects should set GT */
1249 xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1251 if (XE_WARN_ON(!size)) {
1253 return ERR_PTR(-EINVAL);
1256 if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
1257 !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
1258 xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
1259 aligned_size = ALIGN(size, SZ_64K);
1260 if (type != ttm_bo_type_device)
1261 size = ALIGN(size, SZ_64K);
1262 flags |= XE_BO_INTERNAL_64K;
1263 alignment = SZ_64K >> PAGE_SHIFT;
1266 aligned_size = ALIGN(size, SZ_4K);
1267 flags &= ~XE_BO_INTERNAL_64K;
1268 alignment = SZ_4K >> PAGE_SHIFT;
1271 if (type == ttm_bo_type_device && aligned_size != size)
1272 return ERR_PTR(-EINVAL);
1280 bo->ccs_cleared = false;
1284 bo->cpu_caching = cpu_caching;
1285 bo->ttm.base.funcs = &xe_gem_object_funcs;
1286 bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
1287 bo->props.preferred_gt = XE_BO_PROPS_INVALID;
1288 bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
1289 bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1290 INIT_LIST_HEAD(&bo->pinned_link);
1291 #ifdef CONFIG_PROC_FS
1292 INIT_LIST_HEAD(&bo->client_link);
1294 INIT_LIST_HEAD(&bo->vram_userfault_link);
1296 drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1299 ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT);
1303 if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
1304 err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1306 xe_ttm_bo_destroy(&bo->ttm);
1307 return ERR_PTR(err);
1311 /* Defer populating type_sg bos */
1312 placement = (type == ttm_bo_type_sg ||
1313 bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
1315 err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1316 placement, alignment,
1317 &ctx, NULL, resv, xe_ttm_bo_destroy);
1319 return ERR_PTR(err);
1322 * The VRAM pages underneath are potentially still being accessed by the
1323 * GPU, as per async GPU clearing and async evictions. However TTM makes
1324 * sure to add any corresponding move/clear fences into the objects
1325 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1327 * For KMD internal buffers we don't care about GPU clearing, however we
1328 * still need to handle async evictions, where the VRAM is still being
1329 * accessed by the GPU. Most internal callers are not expecting this,
1330 * since they are missing the required synchronisation before accessing
1331 * the memory. To keep things simple just sync wait any kernel fences
1332 * here, if the buffer is designated KMD internal.
1334 * For normal userspace objects we should already have the required
1335 * pipelining or sync waiting elsewhere, since we already have to deal
1336 * with things like async GPU clearing.
1338 if (type == ttm_bo_type_kernel) {
1339 long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1340 DMA_RESV_USAGE_KERNEL,
1342 MAX_SCHEDULE_TIMEOUT);
1346 dma_resv_unlock(bo->ttm.base.resv);
1348 return ERR_PTR(timeout);
1354 ttm_bo_set_bulk_move(&bo->ttm, bulk);
1356 ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1361 static int __xe_bo_fixed_placement(struct xe_device *xe,
1364 u64 start, u64 end, u64 size)
1366 struct ttm_place *place = bo->placements;
1368 if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT))
1371 place->flags = TTM_PL_FLAG_CONTIGUOUS;
1372 place->fpfn = start >> PAGE_SHIFT;
1373 place->lpfn = end >> PAGE_SHIFT;
1375 switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) {
1376 case XE_BO_CREATE_VRAM0_BIT:
1377 place->mem_type = XE_PL_VRAM0;
1379 case XE_BO_CREATE_VRAM1_BIT:
1380 place->mem_type = XE_PL_VRAM1;
1382 case XE_BO_CREATE_STOLEN_BIT:
1383 place->mem_type = XE_PL_STOLEN;
1387 /* 0 or multiple of the above set */
1391 bo->placement = (struct ttm_placement) {
1394 .num_busy_placement = 1,
1395 .busy_placement = place,
1401 static struct xe_bo *
1402 __xe_bo_create_locked(struct xe_device *xe,
1403 struct xe_tile *tile, struct xe_vm *vm,
1404 size_t size, u64 start, u64 end,
1405 u16 cpu_caching, enum ttm_bo_type type, u32 flags)
1407 struct xe_bo *bo = NULL;
1411 xe_vm_assert_held(vm);
1413 if (start || end != ~0ULL) {
1418 flags |= XE_BO_FIXED_PLACEMENT_BIT;
1419 err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1422 return ERR_PTR(err);
1426 bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1427 vm && !xe_vm_in_fault_mode(vm) &&
1428 flags & XE_BO_CREATE_USER_BIT ?
1429 &vm->lru_bulk_move : NULL, size,
1430 cpu_caching, type, flags);
1435 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1436 * to ensure the shared resv doesn't disappear under the bo, the bo
1437 * will keep a reference to the vm, and avoid circular references
1438 * by having all the vm's bo refereferences released at vm close
1441 if (vm && xe_bo_is_user(bo))
1445 if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
1446 if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
1447 tile = xe_device_get_root_tile(xe);
1449 xe_assert(xe, tile);
1451 if (flags & XE_BO_FIXED_PLACEMENT_BIT) {
1452 err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
1453 start + bo->size, U64_MAX);
1455 err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
1458 goto err_unlock_put_bo;
1464 __xe_bo_unset_bulk_move(bo);
1465 xe_bo_unlock_vm_held(bo);
1467 return ERR_PTR(err);
1471 xe_bo_create_locked_range(struct xe_device *xe,
1472 struct xe_tile *tile, struct xe_vm *vm,
1473 size_t size, u64 start, u64 end,
1474 enum ttm_bo_type type, u32 flags)
1476 return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
1479 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1480 struct xe_vm *vm, size_t size,
1481 enum ttm_bo_type type, u32 flags)
1483 return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
1486 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1487 struct xe_vm *vm, size_t size,
1489 enum ttm_bo_type type,
1492 struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1494 flags | XE_BO_CREATE_USER_BIT);
1496 xe_bo_unlock_vm_held(bo);
1501 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1502 struct xe_vm *vm, size_t size,
1503 enum ttm_bo_type type, u32 flags)
1505 struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1508 xe_bo_unlock_vm_held(bo);
1513 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1515 size_t size, u64 offset,
1516 enum ttm_bo_type type, u32 flags)
1520 u64 start = offset == ~0ull ? 0 : offset;
1521 u64 end = offset == ~0ull ? offset : start + size;
1523 if (flags & XE_BO_CREATE_STOLEN_BIT &&
1524 xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1525 flags |= XE_BO_CREATE_GGTT_BIT;
1527 bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1528 flags | XE_BO_NEEDS_CPU_ACCESS);
1532 err = xe_bo_pin(bo);
1536 err = xe_bo_vmap(bo);
1540 xe_bo_unlock_vm_held(bo);
1547 xe_bo_unlock_vm_held(bo);
1549 return ERR_PTR(err);
1552 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1553 struct xe_vm *vm, size_t size,
1554 enum ttm_bo_type type, u32 flags)
1556 return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1559 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1560 const void *data, size_t size,
1561 enum ttm_bo_type type, u32 flags)
1563 struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1564 ALIGN(size, PAGE_SIZE),
1569 xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1574 static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
1576 xe_bo_unpin_map_no_vm(arg);
1579 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1580 size_t size, u32 flags)
1585 bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
1589 ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
1591 return ERR_PTR(ret);
1596 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1597 const void *data, size_t size, u32 flags)
1599 struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
1604 xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1610 * XXX: This is in the VM bind data path, likely should calculate this once and
1611 * store, with a recalculation if the BO is moved.
1613 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
1615 struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
1617 if (res->mem_type == XE_PL_STOLEN)
1618 return xe_ttm_stolen_gpu_offset(xe);
1620 return res_to_mem_region(res)->dpa_base;
1624 * xe_bo_pin_external - pin an external BO
1625 * @bo: buffer object to be pinned
1627 * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1628 * BO. Unique call compared to xe_bo_pin as this function has it own set of
1629 * asserts and code to ensure evict / restore on suspend / resume.
1631 * Returns 0 for success, negative error code otherwise.
1633 int xe_bo_pin_external(struct xe_bo *bo)
1635 struct xe_device *xe = xe_bo_device(bo);
1638 xe_assert(xe, !bo->vm);
1639 xe_assert(xe, xe_bo_is_user(bo));
1641 if (!xe_bo_is_pinned(bo)) {
1642 err = xe_bo_validate(bo, NULL, false);
1646 if (xe_bo_is_vram(bo)) {
1647 spin_lock(&xe->pinned.lock);
1648 list_add_tail(&bo->pinned_link,
1649 &xe->pinned.external_vram);
1650 spin_unlock(&xe->pinned.lock);
1654 ttm_bo_pin(&bo->ttm);
1657 * FIXME: If we always use the reserve / unreserve functions for locking
1658 * we do not need this.
1660 ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1665 int xe_bo_pin(struct xe_bo *bo)
1667 struct xe_device *xe = xe_bo_device(bo);
1670 /* We currently don't expect user BO to be pinned */
1671 xe_assert(xe, !xe_bo_is_user(bo));
1673 /* Pinned object must be in GGTT or have pinned flag */
1674 xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT |
1675 XE_BO_CREATE_GGTT_BIT));
1678 * No reason we can't support pinning imported dma-bufs we just don't
1679 * expect to pin an imported dma-buf.
1681 xe_assert(xe, !bo->ttm.base.import_attach);
1683 /* We only expect at most 1 pin */
1684 xe_assert(xe, !xe_bo_is_pinned(bo));
1686 err = xe_bo_validate(bo, NULL, false);
1691 * For pinned objects in on DGFX, which are also in vram, we expect
1692 * these to be in contiguous VRAM memory. Required eviction / restore
1693 * during suspend / resume (force restore to same physical address).
1695 if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1696 bo->flags & XE_BO_INTERNAL_TEST)) {
1697 struct ttm_place *place = &(bo->placements[0]);
1699 if (mem_type_is_vram(place->mem_type)) {
1700 xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
1702 place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
1703 vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
1704 place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
1706 spin_lock(&xe->pinned.lock);
1707 list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1708 spin_unlock(&xe->pinned.lock);
1712 ttm_bo_pin(&bo->ttm);
1715 * FIXME: If we always use the reserve / unreserve functions for locking
1716 * we do not need this.
1718 ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1724 * xe_bo_unpin_external - unpin an external BO
1725 * @bo: buffer object to be unpinned
1727 * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1728 * BO. Unique call compared to xe_bo_unpin as this function has it own set of
1729 * asserts and code to ensure evict / restore on suspend / resume.
1731 * Returns 0 for success, negative error code otherwise.
1733 void xe_bo_unpin_external(struct xe_bo *bo)
1735 struct xe_device *xe = xe_bo_device(bo);
1737 xe_assert(xe, !bo->vm);
1738 xe_assert(xe, xe_bo_is_pinned(bo));
1739 xe_assert(xe, xe_bo_is_user(bo));
1741 if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
1742 spin_lock(&xe->pinned.lock);
1743 list_del_init(&bo->pinned_link);
1744 spin_unlock(&xe->pinned.lock);
1747 ttm_bo_unpin(&bo->ttm);
1750 * FIXME: If we always use the reserve / unreserve functions for locking
1751 * we do not need this.
1753 ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1756 void xe_bo_unpin(struct xe_bo *bo)
1758 struct xe_device *xe = xe_bo_device(bo);
1760 xe_assert(xe, !bo->ttm.base.import_attach);
1761 xe_assert(xe, xe_bo_is_pinned(bo));
1763 if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1764 bo->flags & XE_BO_INTERNAL_TEST)) {
1765 struct ttm_place *place = &(bo->placements[0]);
1767 if (mem_type_is_vram(place->mem_type)) {
1768 xe_assert(xe, !list_empty(&bo->pinned_link));
1770 spin_lock(&xe->pinned.lock);
1771 list_del_init(&bo->pinned_link);
1772 spin_unlock(&xe->pinned.lock);
1776 ttm_bo_unpin(&bo->ttm);
1780 * xe_bo_validate() - Make sure the bo is in an allowed placement
1782 * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
1783 * NULL. Used together with @allow_res_evict.
1784 * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
1785 * reservation object.
1787 * Make sure the bo is in allowed placement, migrating it if necessary. If
1788 * needed, other bos will be evicted. If bos selected for eviction shares
1789 * the @vm's reservation object, they can be evicted iff @allow_res_evict is
1790 * set to true, otherwise they will be bypassed.
1792 * Return: 0 on success, negative error code on failure. May return
1793 * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
1795 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
1797 struct ttm_operation_ctx ctx = {
1798 .interruptible = true,
1799 .no_wait_gpu = false,
1803 lockdep_assert_held(&vm->lock);
1804 xe_vm_assert_held(vm);
1806 ctx.allow_res_evict = allow_res_evict;
1807 ctx.resv = xe_vm_resv(vm);
1810 return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
1813 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
1815 if (bo->destroy == &xe_ttm_bo_destroy)
1822 * Resolve a BO address. There is no assert to check if the proper lock is held
1823 * so it should only be used in cases where it is not fatal to get the wrong
1824 * address, such as printing debug information, but not in cases where memory is
1825 * written based on this result.
1827 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1829 struct xe_device *xe = xe_bo_device(bo);
1830 struct xe_res_cursor cur;
1833 xe_assert(xe, page_size <= PAGE_SIZE);
1834 page = offset >> PAGE_SHIFT;
1835 offset &= (PAGE_SIZE - 1);
1837 if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
1838 xe_assert(xe, bo->ttm.ttm);
1840 xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
1842 return xe_res_dma(&cur) + offset;
1844 struct xe_res_cursor cur;
1846 xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
1848 return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
1852 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1854 if (!READ_ONCE(bo->ttm.pin_count))
1855 xe_bo_assert_held(bo);
1856 return __xe_bo_addr(bo, offset, page_size);
1859 int xe_bo_vmap(struct xe_bo *bo)
1865 xe_bo_assert_held(bo);
1867 if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
1870 if (!iosys_map_is_null(&bo->vmap))
1874 * We use this more or less deprecated interface for now since
1875 * ttm_bo_vmap() doesn't offer the optimization of kmapping
1876 * single page bos, which is done here.
1877 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
1878 * to use struct iosys_map.
1880 ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
1884 virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
1886 iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
1888 iosys_map_set_vaddr(&bo->vmap, virtual);
1893 static void __xe_bo_vunmap(struct xe_bo *bo)
1895 if (!iosys_map_is_null(&bo->vmap)) {
1896 iosys_map_clear(&bo->vmap);
1897 ttm_bo_kunmap(&bo->kmap);
1901 void xe_bo_vunmap(struct xe_bo *bo)
1903 xe_bo_assert_held(bo);
1907 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
1908 struct drm_file *file)
1910 struct xe_device *xe = to_xe_device(dev);
1911 struct xe_file *xef = to_xe_file(file);
1912 struct drm_xe_gem_create *args = data;
1913 struct xe_vm *vm = NULL;
1915 unsigned int bo_flags;
1919 if (XE_IOCTL_DBG(xe, args->extensions) ||
1920 XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
1921 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1924 /* at least one valid memory placement must be specified */
1925 if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
1929 if (XE_IOCTL_DBG(xe, args->flags &
1930 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
1931 DRM_XE_GEM_CREATE_FLAG_SCANOUT |
1932 DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
1935 if (XE_IOCTL_DBG(xe, args->handle))
1938 if (XE_IOCTL_DBG(xe, !args->size))
1941 if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
1944 if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
1948 if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
1949 bo_flags |= XE_BO_DEFER_BACKING;
1951 if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
1952 bo_flags |= XE_BO_SCANOUT_BIT;
1954 bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
1956 if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
1957 if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
1960 bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
1963 if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
1964 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
1967 if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK &&
1968 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
1971 if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT &&
1972 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
1976 vm = xe_vm_lookup(xef, args->vm_id);
1977 if (XE_IOCTL_DBG(xe, !vm))
1979 err = xe_vm_lock(vm, true);
1984 bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
1985 ttm_bo_type_device, bo_flags);
1995 err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
1999 args->handle = handle;
2003 if (vm && !xe_vm_in_fault_mode(vm)) {
2004 xe_vm_lock(vm, false);
2005 __xe_bo_unset_bulk_move(bo);
2017 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2018 struct drm_file *file)
2020 struct xe_device *xe = to_xe_device(dev);
2021 struct drm_xe_gem_mmap_offset *args = data;
2022 struct drm_gem_object *gem_obj;
2024 if (XE_IOCTL_DBG(xe, args->extensions) ||
2025 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2028 if (XE_IOCTL_DBG(xe, args->flags))
2031 gem_obj = drm_gem_object_lookup(file, args->handle);
2032 if (XE_IOCTL_DBG(xe, !gem_obj))
2035 /* The mmap offset was set up at BO allocation time. */
2036 args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2038 xe_bo_put(gem_to_xe_bo(gem_obj));
2043 * xe_bo_lock() - Lock the buffer object's dma_resv object
2044 * @bo: The struct xe_bo whose lock is to be taken
2045 * @intr: Whether to perform any wait interruptible
2047 * Locks the buffer object's dma_resv object. If the buffer object is
2048 * pointing to a shared dma_resv object, that shared lock is locked.
2050 * Return: 0 on success, -EINTR if @intr is true and the wait for a
2051 * contended lock was interrupted. If @intr is set to false, the
2052 * function always returns 0.
2054 int xe_bo_lock(struct xe_bo *bo, bool intr)
2057 return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2059 dma_resv_lock(bo->ttm.base.resv, NULL);
2065 * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2066 * @bo: The struct xe_bo whose lock is to be released.
2068 * Unlock a buffer object lock that was locked by xe_bo_lock().
2070 void xe_bo_unlock(struct xe_bo *bo)
2072 dma_resv_unlock(bo->ttm.base.resv);
2076 * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2077 * @bo: The buffer object to migrate
2078 * @mem_type: The TTM memory type intended to migrate to
2080 * Check whether the buffer object supports migration to the
2081 * given memory type. Note that pinning may affect the ability to migrate as
2082 * returned by this function.
2084 * This function is primarily intended as a helper for checking the
2085 * possibility to migrate buffer objects and can be called without
2086 * the object lock held.
2088 * Return: true if migration is possible, false otherwise.
2090 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2092 unsigned int cur_place;
2094 if (bo->ttm.type == ttm_bo_type_kernel)
2097 if (bo->ttm.type == ttm_bo_type_sg)
2100 for (cur_place = 0; cur_place < bo->placement.num_placement;
2102 if (bo->placements[cur_place].mem_type == mem_type)
2109 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2111 memset(place, 0, sizeof(*place));
2112 place->mem_type = mem_type;
2116 * xe_bo_migrate - Migrate an object to the desired region id
2117 * @bo: The buffer object to migrate.
2118 * @mem_type: The TTM region type to migrate to.
2120 * Attempt to migrate the buffer object to the desired memory region. The
2121 * buffer object may not be pinned, and must be locked.
2122 * On successful completion, the object memory type will be updated,
2123 * but an async migration task may not have completed yet, and to
2124 * accomplish that, the object's kernel fences must be signaled with
2125 * the object lock held.
2127 * Return: 0 on success. Negative error code on failure. In particular may
2128 * return -EINTR or -ERESTARTSYS if signal pending.
2130 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2132 struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2133 struct ttm_operation_ctx ctx = {
2134 .interruptible = true,
2135 .no_wait_gpu = false,
2137 struct ttm_placement placement;
2138 struct ttm_place requested;
2140 xe_bo_assert_held(bo);
2142 if (bo->ttm.resource->mem_type == mem_type)
2145 if (xe_bo_is_pinned(bo))
2148 if (!xe_bo_can_migrate(bo, mem_type))
2151 xe_place_from_ttm_type(mem_type, &requested);
2152 placement.num_placement = 1;
2153 placement.num_busy_placement = 1;
2154 placement.placement = &requested;
2155 placement.busy_placement = &requested;
2158 * Stolen needs to be handled like below VRAM handling if we ever need
2161 drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2163 if (mem_type_is_vram(mem_type)) {
2166 add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2169 return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2173 * xe_bo_evict - Evict an object to evict placement
2174 * @bo: The buffer object to migrate.
2175 * @force_alloc: Set force_alloc in ttm_operation_ctx
2177 * On successful completion, the object memory will be moved to evict
2178 * placement. Ths function blocks until the object has been fully moved.
2180 * Return: 0 on success. Negative error code on failure.
2182 int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2184 struct ttm_operation_ctx ctx = {
2185 .interruptible = false,
2186 .no_wait_gpu = false,
2187 .force_alloc = force_alloc,
2189 struct ttm_placement placement;
2192 xe_evict_flags(&bo->ttm, &placement);
2193 ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2197 dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2198 false, MAX_SCHEDULE_TIMEOUT);
2204 * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2205 * placed in system memory.
2208 * Return: true if extra pages need to be allocated, false otherwise.
2210 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2212 struct xe_device *xe = xe_bo_device(bo);
2214 if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2217 /* On discrete GPUs, if the GPU can access this buffer from
2218 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2219 * can't be used since there's no CCS storage associated with
2220 * non-VRAM addresses.
2222 if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT))
2229 * __xe_bo_release_dummy() - Dummy kref release function
2230 * @kref: The embedded struct kref.
2232 * Dummy release function for xe_bo_put_deferred(). Keep off.
2234 void __xe_bo_release_dummy(struct kref *kref)
2239 * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2240 * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2242 * Puts all bos whose put was deferred by xe_bo_put_deferred().
2243 * The @deferred list can be either an onstack local list or a global
2244 * shared list used by a workqueue.
2246 void xe_bo_put_commit(struct llist_head *deferred)
2248 struct llist_node *freed;
2249 struct xe_bo *bo, *next;
2254 freed = llist_del_all(deferred);
2258 llist_for_each_entry_safe(bo, next, freed, freed)
2259 drm_gem_object_free(&bo->ttm.base.refcount);
2263 * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2268 * See dumb_create() hook in include/drm/drm_drv.h
2272 int xe_bo_dumb_create(struct drm_file *file_priv,
2273 struct drm_device *dev,
2274 struct drm_mode_create_dumb *args)
2276 struct xe_device *xe = to_xe_device(dev);
2279 int cpp = DIV_ROUND_UP(args->bpp, 8);
2281 u32 page_size = max_t(u32, PAGE_SIZE,
2282 xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2284 args->pitch = ALIGN(args->width * cpp, 64);
2285 args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2288 bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2289 DRM_XE_GEM_CPU_CACHING_WC,
2291 XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2292 XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
2293 XE_BO_NEEDS_CPU_ACCESS);
2297 err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2298 /* drop reference from allocate - handle holds it now */
2299 drm_gem_object_put(&bo->ttm.base);
2301 args->handle = handle;
2305 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
2307 struct ttm_buffer_object *tbo = &bo->ttm;
2308 struct ttm_device *bdev = tbo->bdev;
2310 drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
2312 list_del_init(&bo->vram_userfault_link);
2315 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2316 #include "tests/xe_bo.c"