1 // SPDX-License-Identifier: MIT
3 * Copyright © 2021 Intel Corporation
8 #include <linux/dma-fence-array.h>
10 #include <drm/drm_exec.h>
11 #include <drm/drm_print.h>
12 #include <drm/ttm/ttm_execbuf_util.h>
13 #include <drm/ttm/ttm_tt.h>
14 #include <drm/xe_drm.h>
15 #include <linux/delay.h>
16 #include <linux/kthread.h>
18 #include <linux/swap.h>
20 #include "xe_assert.h"
22 #include "xe_device.h"
23 #include "xe_drm_client.h"
24 #include "xe_exec_queue.h"
26 #include "xe_gt_pagefault.h"
27 #include "xe_gt_tlb_invalidation.h"
28 #include "xe_migrate.h"
30 #include "xe_preempt_fence.h"
32 #include "xe_res_cursor.h"
35 #include "generated/xe_wa_oob.h"
38 #define TEST_VM_ASYNC_OPS_ERROR
40 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
42 return vm->gpuvm.r_obj;
46 * xe_vma_userptr_check_repin() - Advisory check for repin needed
47 * @vma: The userptr vma
49 * Check if the userptr vma has been invalidated since last successful
50 * repin. The check is advisory only and can the function can be called
51 * without the vm->userptr.notifier_lock held. There is no guarantee that the
52 * vma userptr will remain valid after a lockless check, so typically
53 * the call needs to be followed by a proper check under the notifier_lock.
55 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
57 int xe_vma_userptr_check_repin(struct xe_vma *vma)
59 return mmu_interval_check_retry(&vma->userptr.notifier,
60 vma->userptr.notifier_seq) ?
64 int xe_vma_userptr_pin_pages(struct xe_vma *vma)
66 struct xe_vm *vm = xe_vma_vm(vma);
67 struct xe_device *xe = vm->xe;
68 const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
70 bool in_kthread = !current->mm;
71 unsigned long notifier_seq;
73 bool read_only = xe_vma_read_only(vma);
75 lockdep_assert_held(&vm->lock);
76 xe_assert(xe, xe_vma_is_userptr(vma));
78 if (vma->gpuva.flags & XE_VMA_DESTROYED)
81 notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
82 if (notifier_seq == vma->userptr.notifier_seq)
85 pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
89 if (vma->userptr.sg) {
90 dma_unmap_sgtable(xe->drm.dev,
92 read_only ? DMA_TO_DEVICE :
93 DMA_BIDIRECTIONAL, 0);
94 sg_free_table(vma->userptr.sg);
95 vma->userptr.sg = NULL;
100 if (!mmget_not_zero(vma->userptr.notifier.mm)) {
104 kthread_use_mm(vma->userptr.notifier.mm);
107 while (pinned < num_pages) {
108 ret = get_user_pages_fast(xe_vma_userptr(vma) +
111 read_only ? 0 : FOLL_WRITE,
124 kthread_unuse_mm(vma->userptr.notifier.mm);
125 mmput(vma->userptr.notifier.mm);
131 ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages,
133 (u64)pinned << PAGE_SHIFT,
134 xe_sg_segment_size(xe->drm.dev),
137 vma->userptr.sg = NULL;
140 vma->userptr.sg = &vma->userptr.sgt;
142 ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
143 read_only ? DMA_TO_DEVICE :
145 DMA_ATTR_SKIP_CPU_SYNC |
146 DMA_ATTR_NO_KERNEL_MAPPING);
148 sg_free_table(vma->userptr.sg);
149 vma->userptr.sg = NULL;
153 for (i = 0; i < pinned; ++i) {
156 set_page_dirty(pages[i]);
157 unlock_page(pages[i]);
160 mark_page_accessed(pages[i]);
164 release_pages(pages, pinned);
168 vma->userptr.notifier_seq = notifier_seq;
169 if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
173 return ret < 0 ? ret : 0;
176 static bool preempt_fences_waiting(struct xe_vm *vm)
178 struct xe_exec_queue *q;
180 lockdep_assert_held(&vm->lock);
181 xe_vm_assert_held(vm);
183 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
184 if (!q->compute.pfence ||
185 (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
186 &q->compute.pfence->flags))) {
194 static void free_preempt_fences(struct list_head *list)
196 struct list_head *link, *next;
198 list_for_each_safe(link, next, list)
199 xe_preempt_fence_free(to_preempt_fence_from_link(link));
202 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
205 lockdep_assert_held(&vm->lock);
206 xe_vm_assert_held(vm);
208 if (*count >= vm->preempt.num_exec_queues)
211 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
212 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
215 return PTR_ERR(pfence);
217 list_move_tail(xe_preempt_fence_link(pfence), list);
223 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
225 struct xe_exec_queue *q;
227 xe_vm_assert_held(vm);
229 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
230 if (q->compute.pfence) {
231 long timeout = dma_fence_wait(q->compute.pfence, false);
235 dma_fence_put(q->compute.pfence);
236 q->compute.pfence = NULL;
243 static bool xe_vm_is_idle(struct xe_vm *vm)
245 struct xe_exec_queue *q;
247 xe_vm_assert_held(vm);
248 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
249 if (!xe_exec_queue_is_idle(q))
256 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
258 struct list_head *link;
259 struct xe_exec_queue *q;
261 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
262 struct dma_fence *fence;
265 xe_assert(vm->xe, link != list);
267 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
268 q, q->compute.context,
270 dma_fence_put(q->compute.pfence);
271 q->compute.pfence = fence;
275 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
277 struct xe_exec_queue *q;
280 err = xe_bo_lock(bo, true);
284 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
288 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
289 if (q->compute.pfence) {
290 dma_resv_add_fence(bo->ttm.base.resv,
292 DMA_RESV_USAGE_BOOKKEEP);
301 * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv
303 * @fence: The fence to add.
304 * @usage: The resv usage for the fence.
306 * Loops over all of the vm's external object bindings and adds a @fence
307 * with the given @usage to all of the external object's reservation
310 void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
311 enum dma_resv_usage usage)
315 list_for_each_entry(vma, &vm->extobj.list, extobj.link)
316 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, usage);
319 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
321 struct xe_exec_queue *q;
323 lockdep_assert_held(&vm->lock);
324 xe_vm_assert_held(vm);
326 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
329 dma_resv_add_fence(xe_vm_resv(vm), q->compute.pfence,
330 DMA_RESV_USAGE_BOOKKEEP);
331 xe_vm_fence_all_extobjs(vm, q->compute.pfence,
332 DMA_RESV_USAGE_BOOKKEEP);
336 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
338 struct drm_exec exec;
339 struct dma_fence *pfence;
343 xe_assert(vm->xe, xe_vm_in_compute_mode(vm));
345 down_write(&vm->lock);
346 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
347 drm_exec_until_all_locked(&exec) {
348 err = xe_vm_lock_dma_resv(vm, &exec, 1, true);
349 drm_exec_retry_on_contention(&exec);
354 pfence = xe_preempt_fence_create(q, q->compute.context,
361 list_add(&q->compute.link, &vm->preempt.exec_queues);
362 ++vm->preempt.num_exec_queues;
363 q->compute.pfence = pfence;
365 down_read(&vm->userptr.notifier_lock);
367 dma_resv_add_fence(xe_vm_resv(vm), pfence,
368 DMA_RESV_USAGE_BOOKKEEP);
370 xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP);
373 * Check to see if a preemption on VM is in flight or userptr
374 * invalidation, if so trigger this preempt fence to sync state with
375 * other preempt fences on the VM.
377 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
379 dma_fence_enable_sw_signaling(pfence);
381 up_read(&vm->userptr.notifier_lock);
384 drm_exec_fini(&exec);
391 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
392 * that need repinning.
395 * This function checks for whether the VM has userptrs that need repinning,
396 * and provides a release-type barrier on the userptr.notifier_lock after
399 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
401 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
403 lockdep_assert_held_read(&vm->userptr.notifier_lock);
405 return (list_empty(&vm->userptr.repin_list) &&
406 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
410 * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv
411 * objects of the vm's external buffer objects.
413 * @exec: Pointer to a struct drm_exec locking context.
414 * @num_shared: Number of dma-fence slots to reserve in the locked objects.
415 * @lock_vm: Lock also the vm's dma_resv.
417 * Locks the vm dma-resv objects and all the dma-resv objects of the
418 * buffer objects on the vm external object list.
420 * Return: 0 on success, Negative error code on error. In particular if
421 * @intr is set to true, -EINTR or -ERESTARTSYS may be returned.
423 int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec,
424 unsigned int num_shared, bool lock_vm)
426 struct xe_vma *vma, *next;
429 lockdep_assert_held(&vm->lock);
432 err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
437 list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
438 err = drm_exec_prepare_obj(exec, &xe_vma_bo(vma)->ttm.base, num_shared);
443 spin_lock(&vm->notifier.list_lock);
444 list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
445 notifier.rebind_link) {
446 xe_bo_assert_held(xe_vma_bo(vma));
448 list_del_init(&vma->notifier.rebind_link);
449 if (vma->tile_present && !(vma->gpuva.flags & XE_VMA_DESTROYED))
450 list_move_tail(&vma->combined_links.rebind,
453 spin_unlock(&vm->notifier.list_lock);
458 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
460 static void xe_vm_kill(struct xe_vm *vm)
462 struct xe_exec_queue *q;
464 lockdep_assert_held(&vm->lock);
466 xe_vm_lock(vm, false);
467 vm->flags |= XE_VM_FLAG_BANNED;
468 trace_xe_vm_kill(vm);
470 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
474 /* TODO: Inform user the VM is banned */
478 * xe_vm_validate_should_retry() - Whether to retry after a validate error.
479 * @exec: The drm_exec object used for locking before validation.
480 * @err: The error returned from ttm_bo_validate().
481 * @end: A ktime_t cookie that should be set to 0 before first use and
482 * that should be reused on subsequent calls.
484 * With multiple active VMs, under memory pressure, it is possible that
485 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
486 * Until ttm properly handles locking in such scenarios, best thing the
487 * driver can do is retry with a timeout. Check if that is necessary, and
488 * if so unlock the drm_exec's objects while keeping the ticket to prepare
491 * Return: true if a retry after drm_exec_init() is recommended;
494 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
502 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
503 if (!ktime_before(cur, *end))
507 * We would like to keep the ticket here with
508 * drm_exec_unlock_all(), but WW mutex asserts currently
509 * stop us from that. In any case this function could go away
510 * with proper TTM -EDEADLK handling.
518 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
525 * 1 fence for each preempt fence plus a fence for each tile from a
528 err = drm_exec_prepare_obj(exec, xe_vm_obj(vm),
529 vm->preempt.num_exec_queues +
530 vm->xe->info.tile_count);
534 if (xe_vm_is_idle(vm)) {
535 vm->preempt.rebind_deactivated = true;
540 if (!preempt_fences_waiting(vm)) {
545 err = xe_vm_lock_dma_resv(vm, exec, vm->preempt.num_exec_queues, false);
549 err = wait_for_existing_preempt_fences(vm);
553 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
554 if (xe_vma_has_no_bo(vma) ||
555 vma->gpuva.flags & XE_VMA_DESTROYED)
558 err = xe_bo_validate(xe_vma_bo(vma), vm, false);
566 static void preempt_rebind_work_func(struct work_struct *w)
568 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
569 struct drm_exec exec;
570 struct dma_fence *rebind_fence;
571 unsigned int fence_count = 0;
572 LIST_HEAD(preempt_fences);
576 int __maybe_unused tries = 0;
578 xe_assert(vm->xe, xe_vm_in_compute_mode(vm));
579 trace_xe_vm_rebind_worker_enter(vm);
581 down_write(&vm->lock);
583 if (xe_vm_is_closed_or_banned(vm)) {
585 trace_xe_vm_rebind_worker_exit(vm);
590 if (vm->async_ops.error)
591 goto out_unlock_outer;
594 * Extreme corner where we exit a VM error state with a munmap style VM
595 * unbind inflight which requires a rebind. In this case the rebind
596 * needs to install some fences into the dma-resv slots. The worker to
597 * do this queued, let that worker make progress by dropping vm->lock
598 * and trying this again.
600 if (vm->async_ops.munmap_rebind_inflight) {
602 flush_work(&vm->async_ops.work);
606 if (xe_vm_userptr_check_repin(vm)) {
607 err = xe_vm_userptr_pin(vm);
609 goto out_unlock_outer;
612 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
614 drm_exec_until_all_locked(&exec) {
617 err = xe_preempt_work_begin(&exec, vm, &done);
618 drm_exec_retry_on_contention(&exec);
619 if (err && xe_vm_validate_should_retry(&exec, err, &end)) {
621 goto out_unlock_outer;
627 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
631 rebind_fence = xe_vm_rebind(vm, true);
632 if (IS_ERR(rebind_fence)) {
633 err = PTR_ERR(rebind_fence);
638 dma_fence_wait(rebind_fence, false);
639 dma_fence_put(rebind_fence);
642 /* Wait on munmap style VM unbinds */
643 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
644 DMA_RESV_USAGE_KERNEL,
645 false, MAX_SCHEDULE_TIMEOUT);
651 #define retry_required(__tries, __vm) \
652 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
653 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
654 __xe_vm_userptr_needs_repin(__vm))
656 down_read(&vm->userptr.notifier_lock);
657 if (retry_required(tries, vm)) {
658 up_read(&vm->userptr.notifier_lock);
663 #undef retry_required
665 spin_lock(&vm->xe->ttm.lru_lock);
666 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
667 spin_unlock(&vm->xe->ttm.lru_lock);
669 /* Point of no return. */
670 arm_preempt_fences(vm, &preempt_fences);
671 resume_and_reinstall_preempt_fences(vm);
672 up_read(&vm->userptr.notifier_lock);
675 drm_exec_fini(&exec);
677 if (err == -EAGAIN) {
678 trace_xe_vm_rebind_worker_retry(vm);
683 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
688 free_preempt_fences(&preempt_fences);
690 trace_xe_vm_rebind_worker_exit(vm);
693 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
694 const struct mmu_notifier_range *range,
695 unsigned long cur_seq)
697 struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
698 struct xe_vm *vm = xe_vma_vm(vma);
699 struct dma_resv_iter cursor;
700 struct dma_fence *fence;
703 xe_assert(vm->xe, xe_vma_is_userptr(vma));
704 trace_xe_vma_userptr_invalidate(vma);
706 if (!mmu_notifier_range_blockable(range))
709 down_write(&vm->userptr.notifier_lock);
710 mmu_interval_set_seq(mni, cur_seq);
712 /* No need to stop gpu access if the userptr is not yet bound. */
713 if (!vma->userptr.initial_bind) {
714 up_write(&vm->userptr.notifier_lock);
719 * Tell exec and rebind worker they need to repin and rebind this
722 if (!xe_vm_in_fault_mode(vm) &&
723 !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
724 spin_lock(&vm->userptr.invalidated_lock);
725 list_move_tail(&vma->userptr.invalidate_link,
726 &vm->userptr.invalidated);
727 spin_unlock(&vm->userptr.invalidated_lock);
730 up_write(&vm->userptr.notifier_lock);
733 * Preempt fences turn into schedule disables, pipeline these.
734 * Note that even in fault mode, we need to wait for binds and
735 * unbinds to complete, and those are attached as BOOKMARK fences
738 dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
739 DMA_RESV_USAGE_BOOKKEEP);
740 dma_resv_for_each_fence_unlocked(&cursor, fence)
741 dma_fence_enable_sw_signaling(fence);
742 dma_resv_iter_end(&cursor);
744 err = dma_resv_wait_timeout(xe_vm_resv(vm),
745 DMA_RESV_USAGE_BOOKKEEP,
746 false, MAX_SCHEDULE_TIMEOUT);
747 XE_WARN_ON(err <= 0);
749 if (xe_vm_in_fault_mode(vm)) {
750 err = xe_vm_invalidate_vma(vma);
754 trace_xe_vma_userptr_invalidate_complete(vma);
759 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
760 .invalidate = vma_userptr_invalidate,
763 int xe_vm_userptr_pin(struct xe_vm *vm)
765 struct xe_vma *vma, *next;
767 LIST_HEAD(tmp_evict);
769 lockdep_assert_held_write(&vm->lock);
771 /* Collect invalidated userptrs */
772 spin_lock(&vm->userptr.invalidated_lock);
773 list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
774 userptr.invalidate_link) {
775 list_del_init(&vma->userptr.invalidate_link);
776 if (list_empty(&vma->combined_links.userptr))
777 list_move_tail(&vma->combined_links.userptr,
778 &vm->userptr.repin_list);
780 spin_unlock(&vm->userptr.invalidated_lock);
782 /* Pin and move to temporary list */
783 list_for_each_entry_safe(vma, next, &vm->userptr.repin_list,
784 combined_links.userptr) {
785 err = xe_vma_userptr_pin_pages(vma);
789 list_move_tail(&vma->combined_links.userptr, &tmp_evict);
792 /* Take lock and move to rebind_list for rebinding. */
793 err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
797 list_for_each_entry_safe(vma, next, &tmp_evict, combined_links.userptr)
798 list_move_tail(&vma->combined_links.rebind, &vm->rebind_list);
800 dma_resv_unlock(xe_vm_resv(vm));
805 list_splice_tail(&tmp_evict, &vm->userptr.repin_list);
811 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
812 * that need repinning.
815 * This function does an advisory check for whether the VM has userptrs that
818 * Return: 0 if there are no indications of userptrs needing repinning,
819 * -EAGAIN if there are.
821 int xe_vm_userptr_check_repin(struct xe_vm *vm)
823 return (list_empty_careful(&vm->userptr.repin_list) &&
824 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
827 static struct dma_fence *
828 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
829 struct xe_sync_entry *syncs, u32 num_syncs,
830 bool first_op, bool last_op);
832 struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
834 struct dma_fence *fence = NULL;
835 struct xe_vma *vma, *next;
837 lockdep_assert_held(&vm->lock);
838 if (xe_vm_no_dma_fences(vm) && !rebind_worker)
841 xe_vm_assert_held(vm);
842 list_for_each_entry_safe(vma, next, &vm->rebind_list,
843 combined_links.rebind) {
844 xe_assert(vm->xe, vma->tile_present);
846 list_del_init(&vma->combined_links.rebind);
847 dma_fence_put(fence);
849 trace_xe_vma_rebind_worker(vma);
851 trace_xe_vma_rebind_exec(vma);
852 fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
860 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
862 u64 bo_offset_or_userptr,
869 struct xe_tile *tile;
872 xe_assert(vm->xe, start < end);
873 xe_assert(vm->xe, end < vm->size);
875 if (!bo && !is_null) /* userptr */
876 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
878 vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr),
881 vma = ERR_PTR(-ENOMEM);
885 INIT_LIST_HEAD(&vma->combined_links.rebind);
886 INIT_LIST_HEAD(&vma->notifier.rebind_link);
887 INIT_LIST_HEAD(&vma->extobj.link);
889 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
890 vma->gpuva.vm = &vm->gpuvm;
891 vma->gpuva.va.addr = start;
892 vma->gpuva.va.range = end - start + 1;
894 vma->gpuva.flags |= XE_VMA_READ_ONLY;
896 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
899 vma->tile_mask = tile_mask;
901 for_each_tile(tile, vm->xe, id)
902 vma->tile_mask |= 0x1 << id;
905 if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
906 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
909 struct drm_gpuvm_bo *vm_bo;
911 xe_bo_assert_held(bo);
913 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
916 return ERR_CAST(vm_bo);
919 drm_gem_object_get(&bo->ttm.base);
920 vma->gpuva.gem.obj = &bo->ttm.base;
921 vma->gpuva.gem.offset = bo_offset_or_userptr;
922 drm_gpuva_link(&vma->gpuva, vm_bo);
923 drm_gpuvm_bo_put(vm_bo);
924 } else /* userptr or null */ {
926 u64 size = end - start + 1;
929 INIT_LIST_HEAD(&vma->userptr.invalidate_link);
930 vma->gpuva.gem.offset = bo_offset_or_userptr;
932 err = mmu_interval_notifier_insert(&vma->userptr.notifier,
934 xe_vma_userptr(vma), size,
935 &vma_userptr_notifier_ops);
942 vma->userptr.notifier_seq = LONG_MAX;
951 static bool vm_remove_extobj(struct xe_vma *vma)
953 if (!list_empty(&vma->extobj.link)) {
954 xe_vma_vm(vma)->extobj.entries--;
955 list_del_init(&vma->extobj.link);
961 static void xe_vma_destroy_late(struct xe_vma *vma)
963 struct xe_vm *vm = xe_vma_vm(vma);
964 struct xe_device *xe = vm->xe;
965 bool read_only = xe_vma_read_only(vma);
967 if (xe_vma_is_userptr(vma)) {
968 if (vma->userptr.sg) {
969 dma_unmap_sgtable(xe->drm.dev,
971 read_only ? DMA_TO_DEVICE :
972 DMA_BIDIRECTIONAL, 0);
973 sg_free_table(vma->userptr.sg);
974 vma->userptr.sg = NULL;
978 * Since userptr pages are not pinned, we can't remove
979 * the notifer until we're sure the GPU is not accessing
982 mmu_interval_notifier_remove(&vma->userptr.notifier);
984 } else if (xe_vma_is_null(vma)) {
987 xe_bo_put(xe_vma_bo(vma));
993 static void vma_destroy_work_func(struct work_struct *w)
996 container_of(w, struct xe_vma, destroy_work);
998 xe_vma_destroy_late(vma);
1001 static struct xe_vma *
1002 bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
1003 struct xe_vma *ignore)
1005 struct drm_gpuvm_bo *vm_bo;
1006 struct drm_gpuva *va;
1007 struct drm_gem_object *obj = &bo->ttm.base;
1009 xe_bo_assert_held(bo);
1011 drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
1012 drm_gpuvm_bo_for_each_va(va, vm_bo) {
1013 struct xe_vma *vma = gpuva_to_vma(va);
1015 if (vma != ignore && xe_vma_vm(vma) == vm)
1023 static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm,
1024 struct xe_vma *ignore)
1028 xe_bo_lock(bo, false);
1029 ret = !!bo_has_vm_references_locked(bo, vm, ignore);
1035 static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
1037 lockdep_assert_held_write(&vm->lock);
1039 list_add(&vma->extobj.link, &vm->extobj.list);
1040 vm->extobj.entries++;
1043 static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
1045 struct xe_bo *bo = xe_vma_bo(vma);
1047 lockdep_assert_held_write(&vm->lock);
1049 if (bo_has_vm_references(bo, vm, vma))
1052 __vm_insert_extobj(vm, vma);
1055 static void vma_destroy_cb(struct dma_fence *fence,
1056 struct dma_fence_cb *cb)
1058 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1060 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1061 queue_work(system_unbound_wq, &vma->destroy_work);
1064 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1066 struct xe_vm *vm = xe_vma_vm(vma);
1068 lockdep_assert_held_write(&vm->lock);
1069 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1071 if (xe_vma_is_userptr(vma)) {
1072 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1074 spin_lock(&vm->userptr.invalidated_lock);
1075 list_del(&vma->userptr.invalidate_link);
1076 spin_unlock(&vm->userptr.invalidated_lock);
1077 } else if (!xe_vma_is_null(vma)) {
1078 xe_bo_assert_held(xe_vma_bo(vma));
1080 spin_lock(&vm->notifier.list_lock);
1081 list_del(&vma->notifier.rebind_link);
1082 spin_unlock(&vm->notifier.list_lock);
1084 drm_gpuva_unlink(&vma->gpuva);
1086 if (!xe_vma_bo(vma)->vm && vm_remove_extobj(vma)) {
1087 struct xe_vma *other;
1089 other = bo_has_vm_references_locked(xe_vma_bo(vma), vm, NULL);
1092 __vm_insert_extobj(vm, other);
1096 xe_vm_assert_held(vm);
1098 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1102 XE_WARN_ON(ret != -ENOENT);
1103 xe_vma_destroy_late(vma);
1106 xe_vma_destroy_late(vma);
1111 * xe_vm_prepare_vma() - drm_exec utility to lock a vma
1112 * @exec: The drm_exec object we're currently locking for.
1113 * @vma: The vma for witch we want to lock the vm resv and any attached
1115 * @num_shared: The number of dma-fence slots to pre-allocate in the
1116 * objects' reservation objects.
1118 * Return: 0 on success, negative error code on error. In particular
1119 * may return -EDEADLK on WW transaction contention and -EINTR if
1120 * an interruptible wait is terminated by a signal.
1122 int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
1123 unsigned int num_shared)
1125 struct xe_vm *vm = xe_vma_vm(vma);
1126 struct xe_bo *bo = xe_vma_bo(vma);
1130 err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
1131 if (!err && bo && !bo->vm)
1132 err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
1137 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1139 struct drm_exec exec;
1142 drm_exec_init(&exec, 0);
1143 drm_exec_until_all_locked(&exec) {
1144 err = xe_vm_prepare_vma(&exec, vma, 0);
1145 drm_exec_retry_on_contention(&exec);
1146 if (XE_WARN_ON(err))
1150 xe_vma_destroy(vma, NULL);
1152 drm_exec_fini(&exec);
1156 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1158 struct drm_gpuva *gpuva;
1160 lockdep_assert_held(&vm->lock);
1162 if (xe_vm_is_closed_or_banned(vm))
1165 xe_assert(vm->xe, start + range <= vm->size);
1167 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1169 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1172 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1176 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1177 lockdep_assert_held(&vm->lock);
1179 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1180 XE_WARN_ON(err); /* Shouldn't be possible */
1185 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1187 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1188 lockdep_assert_held(&vm->lock);
1190 drm_gpuva_remove(&vma->gpuva);
1191 if (vm->usm.last_fault_vma == vma)
1192 vm->usm.last_fault_vma = NULL;
1195 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1197 struct xe_vma_op *op;
1199 op = kzalloc(sizeof(*op), GFP_KERNEL);
1207 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1209 static struct drm_gpuvm_ops gpuvm_ops = {
1210 .op_alloc = xe_vm_op_alloc,
1211 .vm_free = xe_vm_free,
1214 static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
1218 if (pat_index & BIT(0))
1219 pte |= XE_PPGTT_PTE_PAT0;
1221 if (pat_index & BIT(1))
1222 pte |= XE_PPGTT_PTE_PAT1;
1227 static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index)
1231 if (pat_index & BIT(0))
1232 pte |= XE_PPGTT_PTE_PAT0;
1234 if (pat_index & BIT(1))
1235 pte |= XE_PPGTT_PTE_PAT1;
1237 if (pat_index & BIT(2))
1238 pte |= XE_PPGTT_PTE_PAT2;
1240 if (pat_index & BIT(3))
1241 pte |= XELPG_PPGTT_PTE_PAT3;
1243 if (pat_index & (BIT(4)))
1244 pte |= XE2_PPGTT_PTE_PAT4;
1249 static u64 pte_encode_ps(u32 pt_level)
1251 XE_WARN_ON(pt_level > 2);
1254 return XE_PDE_PS_2M;
1255 else if (pt_level == 2)
1256 return XE_PDPE_PS_1G;
1261 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1262 const u16 pat_index)
1264 struct xe_device *xe = xe_bo_device(bo);
1267 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1268 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1269 pde |= pde_encode_pat_index(xe, pat_index);
1274 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1275 u16 pat_index, u32 pt_level)
1277 struct xe_device *xe = xe_bo_device(bo);
1280 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1281 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1282 pte |= pte_encode_pat_index(xe, pat_index);
1283 pte |= pte_encode_ps(pt_level);
1285 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1286 pte |= XE_PPGTT_PTE_DM;
1291 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1292 u16 pat_index, u32 pt_level)
1294 struct xe_device *xe = xe_vma_vm(vma)->xe;
1296 pte |= XE_PAGE_PRESENT;
1298 if (likely(!xe_vma_read_only(vma)))
1301 pte |= pte_encode_pat_index(xe, pat_index);
1302 pte |= pte_encode_ps(pt_level);
1304 if (unlikely(xe_vma_is_null(vma)))
1310 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1312 u32 pt_level, bool devmem, u64 flags)
1316 /* Avoid passing random bits directly as flags */
1317 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1320 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1321 pte |= pte_encode_pat_index(xe, pat_index);
1322 pte |= pte_encode_ps(pt_level);
1325 pte |= XE_PPGTT_PTE_DM;
1332 static const struct xe_pt_ops xelp_pt_ops = {
1333 .pte_encode_bo = xelp_pte_encode_bo,
1334 .pte_encode_vma = xelp_pte_encode_vma,
1335 .pte_encode_addr = xelp_pte_encode_addr,
1336 .pde_encode_bo = xelp_pde_encode_bo,
1339 static void xe_vma_op_work_func(struct work_struct *w);
1340 static void vm_destroy_work_func(struct work_struct *w);
1342 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1344 struct drm_gem_object *vm_resv_obj;
1346 int err, number_tiles = 0;
1347 struct xe_tile *tile;
1350 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1352 return ERR_PTR(-ENOMEM);
1356 vm->size = 1ull << xe->info.va_bits;
1360 init_rwsem(&vm->lock);
1362 INIT_LIST_HEAD(&vm->rebind_list);
1364 INIT_LIST_HEAD(&vm->userptr.repin_list);
1365 INIT_LIST_HEAD(&vm->userptr.invalidated);
1366 init_rwsem(&vm->userptr.notifier_lock);
1367 spin_lock_init(&vm->userptr.invalidated_lock);
1369 INIT_LIST_HEAD(&vm->notifier.rebind_list);
1370 spin_lock_init(&vm->notifier.list_lock);
1372 INIT_LIST_HEAD(&vm->async_ops.pending);
1373 INIT_WORK(&vm->async_ops.work, xe_vma_op_work_func);
1374 spin_lock_init(&vm->async_ops.lock);
1376 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1378 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1379 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1381 for_each_tile(tile, xe, id)
1382 xe_range_fence_tree_init(&vm->rftree[id]);
1384 INIT_LIST_HEAD(&vm->extobj.list);
1386 vm->pt_ops = &xelp_pt_ops;
1388 if (!(flags & XE_VM_FLAG_MIGRATION))
1389 xe_device_mem_access_get(xe);
1391 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1397 drm_gpuvm_init(&vm->gpuvm, "Xe VM", 0, &xe->drm, vm_resv_obj,
1398 0, vm->size, 0, 0, &gpuvm_ops);
1400 drm_gem_object_put(vm_resv_obj);
1402 err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
1406 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1407 vm->flags |= XE_VM_FLAG_64K;
1409 for_each_tile(tile, xe, id) {
1410 if (flags & XE_VM_FLAG_MIGRATION &&
1411 tile->id != XE_VM_FLAG_TILE_ID(flags))
1414 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1415 if (IS_ERR(vm->pt_root[id])) {
1416 err = PTR_ERR(vm->pt_root[id]);
1417 vm->pt_root[id] = NULL;
1418 goto err_unlock_close;
1422 if (flags & XE_VM_FLAG_SCRATCH_PAGE) {
1423 for_each_tile(tile, xe, id) {
1424 if (!vm->pt_root[id])
1427 err = xe_pt_create_scratch(xe, tile, vm);
1429 goto err_unlock_close;
1431 vm->batch_invalidate_tlb = true;
1434 if (flags & XE_VM_FLAG_COMPUTE_MODE) {
1435 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1436 vm->flags |= XE_VM_FLAG_COMPUTE_MODE;
1437 vm->batch_invalidate_tlb = false;
1440 if (flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
1441 vm->async_ops.fence.context = dma_fence_context_alloc(1);
1442 vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
1445 /* Fill pt_root after allocating scratch tables */
1446 for_each_tile(tile, xe, id) {
1447 if (!vm->pt_root[id])
1450 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1452 dma_resv_unlock(xe_vm_resv(vm));
1454 /* Kernel migration VM shouldn't have a circular loop.. */
1455 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1456 for_each_tile(tile, xe, id) {
1457 struct xe_gt *gt = tile->primary_gt;
1458 struct xe_vm *migrate_vm;
1459 struct xe_exec_queue *q;
1461 if (!vm->pt_root[id])
1464 migrate_vm = xe_migrate_get_vm(tile->migrate);
1465 q = xe_exec_queue_create_class(xe, gt, migrate_vm,
1466 XE_ENGINE_CLASS_COPY,
1467 EXEC_QUEUE_FLAG_VM);
1468 xe_vm_put(migrate_vm);
1478 if (number_tiles > 1)
1479 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1481 mutex_lock(&xe->usm.lock);
1482 if (flags & XE_VM_FLAG_FAULT_MODE)
1483 xe->usm.num_vm_in_fault_mode++;
1484 else if (!(flags & XE_VM_FLAG_MIGRATION))
1485 xe->usm.num_vm_in_non_fault_mode++;
1486 mutex_unlock(&xe->usm.lock);
1488 trace_xe_vm_create(vm);
1493 dma_resv_unlock(xe_vm_resv(vm));
1495 xe_vm_close_and_put(vm);
1496 return ERR_PTR(err);
1499 for_each_tile(tile, xe, id)
1500 xe_range_fence_tree_fini(&vm->rftree[id]);
1502 if (!(flags & XE_VM_FLAG_MIGRATION))
1503 xe_device_mem_access_put(xe);
1504 return ERR_PTR(err);
1507 static void flush_async_ops(struct xe_vm *vm)
1509 queue_work(system_unbound_wq, &vm->async_ops.work);
1510 flush_work(&vm->async_ops.work);
1513 static void vm_error_capture(struct xe_vm *vm, int err,
1514 u32 op, u64 addr, u64 size)
1516 struct drm_xe_vm_bind_op_error_capture capture;
1517 u64 __user *address =
1518 u64_to_user_ptr(vm->async_ops.error_capture.addr);
1519 bool in_kthread = !current->mm;
1521 capture.error = err;
1523 capture.addr = addr;
1524 capture.size = size;
1527 if (!mmget_not_zero(vm->async_ops.error_capture.mm))
1529 kthread_use_mm(vm->async_ops.error_capture.mm);
1532 if (copy_to_user(address, &capture, sizeof(capture)))
1533 drm_warn(&vm->xe->drm, "Copy to user failed");
1536 kthread_unuse_mm(vm->async_ops.error_capture.mm);
1537 mmput(vm->async_ops.error_capture.mm);
1541 wake_up_all(&vm->async_ops.error_capture.wq);
1544 static void xe_vm_close(struct xe_vm *vm)
1546 down_write(&vm->lock);
1548 up_write(&vm->lock);
1551 void xe_vm_close_and_put(struct xe_vm *vm)
1553 LIST_HEAD(contested);
1554 struct xe_device *xe = vm->xe;
1555 struct xe_tile *tile;
1556 struct xe_vma *vma, *next_vma;
1557 struct drm_gpuva *gpuva, *next;
1560 xe_assert(xe, !vm->preempt.num_exec_queues);
1563 flush_async_ops(vm);
1564 if (xe_vm_in_compute_mode(vm))
1565 flush_work(&vm->preempt.rebind_work);
1567 for_each_tile(tile, xe, id) {
1569 xe_exec_queue_kill(vm->q[id]);
1570 xe_exec_queue_put(vm->q[id]);
1575 down_write(&vm->lock);
1576 xe_vm_lock(vm, false);
1577 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1578 vma = gpuva_to_vma(gpuva);
1580 if (xe_vma_has_no_bo(vma)) {
1581 down_read(&vm->userptr.notifier_lock);
1582 vma->gpuva.flags |= XE_VMA_DESTROYED;
1583 up_read(&vm->userptr.notifier_lock);
1586 xe_vm_remove_vma(vm, vma);
1588 /* easy case, remove from VMA? */
1589 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1590 list_del_init(&vma->combined_links.rebind);
1591 xe_vma_destroy(vma, NULL);
1595 list_move_tail(&vma->combined_links.destroy, &contested);
1596 vma->gpuva.flags |= XE_VMA_DESTROYED;
1600 * All vm operations will add shared fences to resv.
1601 * The only exception is eviction for a shared object,
1602 * but even so, the unbind when evicted would still
1603 * install a fence to resv. Hence it's safe to
1604 * destroy the pagetables immediately.
1606 for_each_tile(tile, xe, id) {
1607 if (vm->scratch_bo[id]) {
1610 xe_bo_unpin(vm->scratch_bo[id]);
1611 xe_bo_put(vm->scratch_bo[id]);
1612 for (i = 0; i < vm->pt_root[id]->level; i++)
1613 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags,
1616 if (vm->pt_root[id]) {
1617 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1618 vm->pt_root[id] = NULL;
1624 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1625 * Since we hold a refcount to the bo, we can remove and free
1626 * the members safely without locking.
1628 list_for_each_entry_safe(vma, next_vma, &contested,
1629 combined_links.destroy) {
1630 list_del_init(&vma->combined_links.destroy);
1631 xe_vma_destroy_unlocked(vma);
1634 if (vm->async_ops.error_capture.addr)
1635 wake_up_all(&vm->async_ops.error_capture.wq);
1637 xe_assert(xe, list_empty(&vm->extobj.list));
1638 up_write(&vm->lock);
1640 mutex_lock(&xe->usm.lock);
1641 if (vm->flags & XE_VM_FLAG_FAULT_MODE)
1642 xe->usm.num_vm_in_fault_mode--;
1643 else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
1644 xe->usm.num_vm_in_non_fault_mode--;
1645 mutex_unlock(&xe->usm.lock);
1647 for_each_tile(tile, xe, id)
1648 xe_range_fence_tree_fini(&vm->rftree[id]);
1653 static void vm_destroy_work_func(struct work_struct *w)
1656 container_of(w, struct xe_vm, destroy_work);
1657 struct xe_device *xe = vm->xe;
1658 struct xe_tile *tile;
1662 /* xe_vm_close_and_put was not called? */
1663 xe_assert(xe, !vm->size);
1665 if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
1666 xe_device_mem_access_put(xe);
1668 if (xe->info.has_asid) {
1669 mutex_lock(&xe->usm.lock);
1670 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1671 xe_assert(xe, lookup == vm);
1672 mutex_unlock(&xe->usm.lock);
1676 for_each_tile(tile, xe, id)
1677 XE_WARN_ON(vm->pt_root[id]);
1679 trace_xe_vm_free(vm);
1680 dma_fence_put(vm->rebind_fence);
1684 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1686 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1688 /* To destroy the VM we need to be able to sleep */
1689 queue_work(system_unbound_wq, &vm->destroy_work);
1692 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1696 mutex_lock(&xef->vm.lock);
1697 vm = xa_load(&xef->vm.xa, id);
1700 mutex_unlock(&xef->vm.lock);
1705 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1707 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
1708 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
1711 static struct dma_fence *
1712 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1713 struct xe_sync_entry *syncs, u32 num_syncs,
1714 bool first_op, bool last_op)
1716 struct xe_tile *tile;
1717 struct dma_fence *fence = NULL;
1718 struct dma_fence **fences = NULL;
1719 struct dma_fence_array *cf = NULL;
1720 struct xe_vm *vm = xe_vma_vm(vma);
1721 int cur_fence = 0, i;
1722 int number_tiles = hweight8(vma->tile_present);
1726 trace_xe_vma_unbind(vma);
1728 if (number_tiles > 1) {
1729 fences = kmalloc_array(number_tiles, sizeof(*fences),
1732 return ERR_PTR(-ENOMEM);
1735 for_each_tile(tile, vm->xe, id) {
1736 if (!(vma->tile_present & BIT(id)))
1739 fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
1740 first_op ? syncs : NULL,
1741 first_op ? num_syncs : 0);
1742 if (IS_ERR(fence)) {
1743 err = PTR_ERR(fence);
1748 fences[cur_fence++] = fence;
1751 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1752 q = list_next_entry(q, multi_gt_list);
1756 cf = dma_fence_array_create(number_tiles, fences,
1757 vm->composite_fence_ctx,
1758 vm->composite_fence_seqno++,
1761 --vm->composite_fence_seqno;
1768 for (i = 0; i < num_syncs; i++)
1769 xe_sync_entry_signal(&syncs[i], NULL,
1770 cf ? &cf->base : fence);
1773 return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
1778 /* FIXME: Rewind the previous binds? */
1779 dma_fence_put(fences[--cur_fence]);
1784 return ERR_PTR(err);
1787 static struct dma_fence *
1788 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1789 struct xe_sync_entry *syncs, u32 num_syncs,
1790 bool first_op, bool last_op)
1792 struct xe_tile *tile;
1793 struct dma_fence *fence;
1794 struct dma_fence **fences = NULL;
1795 struct dma_fence_array *cf = NULL;
1796 struct xe_vm *vm = xe_vma_vm(vma);
1797 int cur_fence = 0, i;
1798 int number_tiles = hweight8(vma->tile_mask);
1802 trace_xe_vma_bind(vma);
1804 if (number_tiles > 1) {
1805 fences = kmalloc_array(number_tiles, sizeof(*fences),
1808 return ERR_PTR(-ENOMEM);
1811 for_each_tile(tile, vm->xe, id) {
1812 if (!(vma->tile_mask & BIT(id)))
1815 fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
1816 first_op ? syncs : NULL,
1817 first_op ? num_syncs : 0,
1818 vma->tile_present & BIT(id));
1819 if (IS_ERR(fence)) {
1820 err = PTR_ERR(fence);
1825 fences[cur_fence++] = fence;
1828 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1829 q = list_next_entry(q, multi_gt_list);
1833 cf = dma_fence_array_create(number_tiles, fences,
1834 vm->composite_fence_ctx,
1835 vm->composite_fence_seqno++,
1838 --vm->composite_fence_seqno;
1845 for (i = 0; i < num_syncs; i++)
1846 xe_sync_entry_signal(&syncs[i], NULL,
1847 cf ? &cf->base : fence);
1850 return cf ? &cf->base : fence;
1855 /* FIXME: Rewind the previous binds? */
1856 dma_fence_put(fences[--cur_fence]);
1861 return ERR_PTR(err);
1864 struct async_op_fence {
1865 struct dma_fence fence;
1866 struct dma_fence *wait_fence;
1867 struct dma_fence_cb cb;
1869 wait_queue_head_t wq;
1873 static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence)
1879 async_op_fence_get_timeline_name(struct dma_fence *dma_fence)
1881 return "async_op_fence";
1884 static const struct dma_fence_ops async_op_fence_ops = {
1885 .get_driver_name = async_op_fence_get_driver_name,
1886 .get_timeline_name = async_op_fence_get_timeline_name,
1889 static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
1891 struct async_op_fence *afence =
1892 container_of(cb, struct async_op_fence, cb);
1894 afence->fence.error = afence->wait_fence->error;
1895 dma_fence_signal(&afence->fence);
1896 xe_vm_put(afence->vm);
1897 dma_fence_put(afence->wait_fence);
1898 dma_fence_put(&afence->fence);
1901 static void add_async_op_fence_cb(struct xe_vm *vm,
1902 struct dma_fence *fence,
1903 struct async_op_fence *afence)
1907 if (!xe_vm_no_dma_fences(vm)) {
1908 afence->started = true;
1910 wake_up_all(&afence->wq);
1913 afence->wait_fence = dma_fence_get(fence);
1914 afence->vm = xe_vm_get(vm);
1915 dma_fence_get(&afence->fence);
1916 ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb);
1917 if (ret == -ENOENT) {
1918 afence->fence.error = afence->wait_fence->error;
1919 dma_fence_signal(&afence->fence);
1923 dma_fence_put(afence->wait_fence);
1924 dma_fence_put(&afence->fence);
1926 XE_WARN_ON(ret && ret != -ENOENT);
1929 int xe_vm_async_fence_wait_start(struct dma_fence *fence)
1931 if (fence->ops == &async_op_fence_ops) {
1932 struct async_op_fence *afence =
1933 container_of(fence, struct async_op_fence, fence);
1935 xe_assert(afence->vm->xe, !xe_vm_no_dma_fences(afence->vm));
1938 return wait_event_interruptible(afence->wq, afence->started);
1944 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
1945 struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1946 u32 num_syncs, struct async_op_fence *afence,
1947 bool immediate, bool first_op, bool last_op)
1949 struct dma_fence *fence;
1951 xe_vm_assert_held(vm);
1954 fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
1957 return PTR_ERR(fence);
1961 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1963 fence = dma_fence_get_stub();
1965 for (i = 0; i < num_syncs; i++)
1966 xe_sync_entry_signal(&syncs[i], NULL, fence);
1970 add_async_op_fence_cb(vm, fence, afence);
1972 dma_fence_put(fence);
1976 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
1977 struct xe_bo *bo, struct xe_sync_entry *syncs,
1978 u32 num_syncs, struct async_op_fence *afence,
1979 bool immediate, bool first_op, bool last_op)
1983 xe_vm_assert_held(vm);
1984 xe_bo_assert_held(bo);
1986 if (bo && immediate) {
1987 err = xe_bo_validate(bo, vm, true);
1992 return __xe_vm_bind(vm, vma, q, syncs, num_syncs, afence, immediate,
1996 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
1997 struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1998 u32 num_syncs, struct async_op_fence *afence,
1999 bool first_op, bool last_op)
2001 struct dma_fence *fence;
2003 xe_vm_assert_held(vm);
2004 xe_bo_assert_held(xe_vma_bo(vma));
2006 fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
2008 return PTR_ERR(fence);
2010 add_async_op_fence_cb(vm, fence, afence);
2012 xe_vma_destroy(vma, fence);
2013 dma_fence_put(fence);
2018 static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm,
2021 if (XE_IOCTL_DBG(xe, !value))
2024 if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
2027 if (XE_IOCTL_DBG(xe, vm->async_ops.error_capture.addr))
2030 vm->async_ops.error_capture.mm = current->mm;
2031 vm->async_ops.error_capture.addr = value;
2032 init_waitqueue_head(&vm->async_ops.error_capture.wq);
2037 typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm,
2040 static const xe_vm_set_property_fn vm_set_property_funcs[] = {
2041 [XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] =
2042 vm_set_error_capture_address,
2045 static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
2048 u64 __user *address = u64_to_user_ptr(extension);
2049 struct drm_xe_ext_vm_set_property ext;
2052 err = __copy_from_user(&ext, address, sizeof(ext));
2053 if (XE_IOCTL_DBG(xe, err))
2056 if (XE_IOCTL_DBG(xe, ext.property >=
2057 ARRAY_SIZE(vm_set_property_funcs)) ||
2058 XE_IOCTL_DBG(xe, ext.pad) ||
2059 XE_IOCTL_DBG(xe, ext.reserved[0] || ext.reserved[1]))
2062 return vm_set_property_funcs[ext.property](xe, vm, ext.value);
2065 typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm,
2068 static const xe_vm_set_property_fn vm_user_extension_funcs[] = {
2069 [XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property,
2072 #define MAX_USER_EXTENSIONS 16
2073 static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm,
2074 u64 extensions, int ext_number)
2076 u64 __user *address = u64_to_user_ptr(extensions);
2077 struct xe_user_extension ext;
2080 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
2083 err = __copy_from_user(&ext, address, sizeof(ext));
2084 if (XE_IOCTL_DBG(xe, err))
2087 if (XE_IOCTL_DBG(xe, ext.pad) ||
2088 XE_IOCTL_DBG(xe, ext.name >=
2089 ARRAY_SIZE(vm_user_extension_funcs)))
2092 err = vm_user_extension_funcs[ext.name](xe, vm, extensions);
2093 if (XE_IOCTL_DBG(xe, err))
2096 if (ext.next_extension)
2097 return vm_user_extensions(xe, vm, ext.next_extension,
2103 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \
2104 DRM_XE_VM_CREATE_COMPUTE_MODE | \
2105 DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \
2106 DRM_XE_VM_CREATE_FAULT_MODE)
2108 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
2109 struct drm_file *file)
2111 struct xe_device *xe = to_xe_device(dev);
2112 struct xe_file *xef = to_xe_file(file);
2113 struct drm_xe_vm_create *args = data;
2114 struct xe_tile *tile;
2120 if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
2121 args->flags |= DRM_XE_VM_CREATE_SCRATCH_PAGE;
2123 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
2124 !xe->info.supports_usm))
2127 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2130 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
2133 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE &&
2134 args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
2137 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE &&
2138 args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
2141 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
2142 xe_device_in_non_fault_mode(xe)))
2145 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) &&
2146 xe_device_in_fault_mode(xe)))
2149 if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE)
2150 flags |= XE_VM_FLAG_SCRATCH_PAGE;
2151 if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE)
2152 flags |= XE_VM_FLAG_COMPUTE_MODE;
2153 if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS)
2154 flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
2155 if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE)
2156 flags |= XE_VM_FLAG_FAULT_MODE;
2158 vm = xe_vm_create(xe, flags);
2162 if (args->extensions) {
2163 err = vm_user_extensions(xe, vm, args->extensions, 0);
2164 if (XE_IOCTL_DBG(xe, err)) {
2165 xe_vm_close_and_put(vm);
2170 mutex_lock(&xef->vm.lock);
2171 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
2172 mutex_unlock(&xef->vm.lock);
2174 xe_vm_close_and_put(vm);
2178 if (xe->info.has_asid) {
2179 mutex_lock(&xe->usm.lock);
2180 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
2181 XA_LIMIT(0, XE_MAX_ASID - 1),
2182 &xe->usm.next_asid, GFP_KERNEL);
2183 mutex_unlock(&xe->usm.lock);
2185 xe_vm_close_and_put(vm);
2188 vm->usm.asid = asid;
2194 /* Record BO memory for VM pagetable created against client */
2195 for_each_tile(tile, xe, id)
2196 if (vm->pt_root[id])
2197 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
2199 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
2200 /* Warning: Security issue - never enable by default */
2201 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
2207 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
2208 struct drm_file *file)
2210 struct xe_device *xe = to_xe_device(dev);
2211 struct xe_file *xef = to_xe_file(file);
2212 struct drm_xe_vm_destroy *args = data;
2216 if (XE_IOCTL_DBG(xe, args->pad) ||
2217 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2220 mutex_lock(&xef->vm.lock);
2221 vm = xa_load(&xef->vm.xa, args->vm_id);
2222 if (XE_IOCTL_DBG(xe, !vm))
2224 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
2227 xa_erase(&xef->vm.xa, args->vm_id);
2228 mutex_unlock(&xef->vm.lock);
2231 xe_vm_close_and_put(vm);
2236 static const u32 region_to_mem_type[] = {
2242 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
2243 struct xe_exec_queue *q, u32 region,
2244 struct xe_sync_entry *syncs, u32 num_syncs,
2245 struct async_op_fence *afence, bool first_op,
2250 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
2252 if (!xe_vma_has_no_bo(vma)) {
2253 err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
2258 if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
2259 return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
2260 afence, true, first_op, last_op);
2264 /* Nothing to do, signal fences now */
2266 for (i = 0; i < num_syncs; i++)
2267 xe_sync_entry_signal(&syncs[i], NULL,
2268 dma_fence_get_stub());
2271 dma_fence_signal(&afence->fence);
2276 static void vm_set_async_error(struct xe_vm *vm, int err)
2278 lockdep_assert_held(&vm->lock);
2279 vm->async_ops.error = err;
2282 static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
2283 u64 addr, u64 range, u32 op, u32 flags)
2285 struct xe_device *xe = vm->xe;
2287 bool async = !!(flags & XE_VM_BIND_FLAG_ASYNC);
2289 lockdep_assert_held(&vm->lock);
2292 case XE_VM_BIND_OP_MAP:
2293 case XE_VM_BIND_OP_MAP_USERPTR:
2294 vma = xe_vm_find_overlapping_vma(vm, addr, range);
2295 if (XE_IOCTL_DBG(xe, vma && !async))
2298 case XE_VM_BIND_OP_UNMAP:
2299 case XE_VM_BIND_OP_PREFETCH:
2300 vma = xe_vm_find_overlapping_vma(vm, addr, range);
2301 if (XE_IOCTL_DBG(xe, !vma))
2302 /* Not an actual error, IOCTL cleans up returns and 0 */
2304 if (XE_IOCTL_DBG(xe, (xe_vma_start(vma) != addr ||
2305 xe_vma_end(vma) != addr + range) && !async))
2308 case XE_VM_BIND_OP_UNMAP_ALL:
2309 if (XE_IOCTL_DBG(xe, list_empty(&bo->ttm.base.gpuva.list)))
2310 /* Not an actual error, IOCTL cleans up returns and 0 */
2314 drm_warn(&xe->drm, "NOT POSSIBLE");
2321 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2324 down_read(&vm->userptr.notifier_lock);
2325 vma->gpuva.flags |= XE_VMA_DESTROYED;
2326 up_read(&vm->userptr.notifier_lock);
2328 xe_vm_remove_vma(vm, vma);
2332 #define ULL unsigned long long
2334 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
2335 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2340 case DRM_GPUVA_OP_MAP:
2341 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2342 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2344 case DRM_GPUVA_OP_REMAP:
2345 vma = gpuva_to_vma(op->remap.unmap->va);
2346 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2347 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2348 op->remap.unmap->keep ? 1 : 0);
2351 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2352 (ULL)op->remap.prev->va.addr,
2353 (ULL)op->remap.prev->va.range);
2356 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2357 (ULL)op->remap.next->va.addr,
2358 (ULL)op->remap.next->va.range);
2360 case DRM_GPUVA_OP_UNMAP:
2361 vma = gpuva_to_vma(op->unmap.va);
2362 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2363 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2364 op->unmap.keep ? 1 : 0);
2366 case DRM_GPUVA_OP_PREFETCH:
2367 vma = gpuva_to_vma(op->prefetch.va);
2368 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2369 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2372 drm_warn(&xe->drm, "NOT POSSIBLE");
2376 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2382 * Create operations list from IOCTL arguments, setup operations fields so parse
2383 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2385 static struct drm_gpuva_ops *
2386 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
2387 u64 bo_offset_or_userptr, u64 addr, u64 range,
2388 u32 operation, u32 flags, u8 tile_mask, u32 region)
2390 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2391 struct drm_gpuva_ops *ops;
2392 struct drm_gpuva_op *__op;
2393 struct xe_vma_op *op;
2394 struct drm_gpuvm_bo *vm_bo;
2397 lockdep_assert_held_write(&vm->lock);
2399 vm_dbg(&vm->xe->drm,
2400 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2401 operation, (ULL)addr, (ULL)range,
2402 (ULL)bo_offset_or_userptr);
2404 switch (operation) {
2405 case XE_VM_BIND_OP_MAP:
2406 case XE_VM_BIND_OP_MAP_USERPTR:
2407 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2408 obj, bo_offset_or_userptr);
2412 drm_gpuva_for_each_op(__op, ops) {
2413 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2415 op->tile_mask = tile_mask;
2417 flags & XE_VM_BIND_FLAG_IMMEDIATE;
2419 flags & XE_VM_BIND_FLAG_READONLY;
2420 op->map.is_null = flags & XE_VM_BIND_FLAG_NULL;
2423 case XE_VM_BIND_OP_UNMAP:
2424 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2428 drm_gpuva_for_each_op(__op, ops) {
2429 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2431 op->tile_mask = tile_mask;
2434 case XE_VM_BIND_OP_PREFETCH:
2435 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2439 drm_gpuva_for_each_op(__op, ops) {
2440 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2442 op->tile_mask = tile_mask;
2443 op->prefetch.region = region;
2446 case XE_VM_BIND_OP_UNMAP_ALL:
2447 xe_assert(vm->xe, bo);
2449 err = xe_bo_lock(bo, true);
2451 return ERR_PTR(err);
2453 vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj);
2457 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2458 drm_gpuvm_bo_put(vm_bo);
2463 drm_gpuva_for_each_op(__op, ops) {
2464 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2466 op->tile_mask = tile_mask;
2470 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2471 ops = ERR_PTR(-EINVAL);
2474 #ifdef TEST_VM_ASYNC_OPS_ERROR
2475 if (operation & FORCE_ASYNC_OP_ERROR) {
2476 op = list_first_entry_or_null(&ops->list, struct xe_vma_op,
2479 op->inject_error = true;
2484 drm_gpuva_for_each_op(__op, ops)
2485 print_op(vm->xe, __op);
2490 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2491 u8 tile_mask, bool read_only, bool is_null)
2493 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2497 lockdep_assert_held_write(&vm->lock);
2500 err = xe_bo_lock(bo, true);
2502 return ERR_PTR(err);
2504 vma = xe_vma_create(vm, bo, op->gem.offset,
2505 op->va.addr, op->va.addr +
2506 op->va.range - 1, read_only, is_null,
2511 if (xe_vma_is_userptr(vma)) {
2512 err = xe_vma_userptr_pin_pages(vma);
2514 prep_vma_destroy(vm, vma, false);
2515 xe_vma_destroy_unlocked(vma);
2516 return ERR_PTR(err);
2518 } else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
2519 vm_insert_extobj(vm, vma);
2520 err = add_preempt_fences(vm, bo);
2522 prep_vma_destroy(vm, vma, false);
2523 xe_vma_destroy_unlocked(vma);
2524 return ERR_PTR(err);
2531 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2533 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2535 else if (vma->gpuva.flags & XE_VMA_PTE_2M)
2541 static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2545 vma->gpuva.flags |= XE_VMA_PTE_1G;
2548 vma->gpuva.flags |= XE_VMA_PTE_2M;
2555 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2559 lockdep_assert_held_write(&vm->lock);
2561 switch (op->base.op) {
2562 case DRM_GPUVA_OP_MAP:
2563 err |= xe_vm_insert_vma(vm, op->map.vma);
2565 op->flags |= XE_VMA_OP_COMMITTED;
2567 case DRM_GPUVA_OP_REMAP:
2568 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2570 op->flags |= XE_VMA_OP_COMMITTED;
2572 if (op->remap.prev) {
2573 err |= xe_vm_insert_vma(vm, op->remap.prev);
2575 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2576 if (!err && op->remap.skip_prev)
2577 op->remap.prev = NULL;
2579 if (op->remap.next) {
2580 err |= xe_vm_insert_vma(vm, op->remap.next);
2582 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2583 if (!err && op->remap.skip_next)
2584 op->remap.next = NULL;
2587 /* Adjust for partial unbind after removin VMA from VM */
2589 op->base.remap.unmap->va->va.addr = op->remap.start;
2590 op->base.remap.unmap->va->va.range = op->remap.range;
2593 case DRM_GPUVA_OP_UNMAP:
2594 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2595 op->flags |= XE_VMA_OP_COMMITTED;
2597 case DRM_GPUVA_OP_PREFETCH:
2598 op->flags |= XE_VMA_OP_COMMITTED;
2601 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2608 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
2609 struct drm_gpuva_ops *ops,
2610 struct xe_sync_entry *syncs, u32 num_syncs,
2611 struct list_head *ops_list, bool last,
2614 struct xe_vma_op *last_op = NULL;
2615 struct async_op_fence *fence = NULL;
2616 struct drm_gpuva_op *__op;
2619 lockdep_assert_held_write(&vm->lock);
2621 if (last && num_syncs && async) {
2624 fence = kmalloc(sizeof(*fence), GFP_KERNEL);
2628 seqno = q ? ++q->bind.fence_seqno : ++vm->async_ops.fence.seqno;
2629 dma_fence_init(&fence->fence, &async_op_fence_ops,
2630 &vm->async_ops.lock, q ? q->bind.fence_ctx :
2631 vm->async_ops.fence.context, seqno);
2633 if (!xe_vm_no_dma_fences(vm)) {
2635 fence->started = false;
2636 init_waitqueue_head(&fence->wq);
2640 drm_gpuva_for_each_op(__op, ops) {
2641 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2642 bool first = list_empty(ops_list);
2644 xe_assert(vm->xe, first || async);
2646 INIT_LIST_HEAD(&op->link);
2647 list_add_tail(&op->link, ops_list);
2650 op->flags |= XE_VMA_OP_FIRST;
2651 op->num_syncs = num_syncs;
2657 switch (op->base.op) {
2658 case DRM_GPUVA_OP_MAP:
2662 vma = new_vma(vm, &op->base.map,
2663 op->tile_mask, op->map.read_only,
2673 case DRM_GPUVA_OP_REMAP:
2675 struct xe_vma *old =
2676 gpuva_to_vma(op->base.remap.unmap->va);
2678 op->remap.start = xe_vma_start(old);
2679 op->remap.range = xe_vma_size(old);
2681 if (op->base.remap.prev) {
2684 op->base.remap.unmap->va->flags &
2687 op->base.remap.unmap->va->flags &
2690 vma = new_vma(vm, op->base.remap.prev,
2691 op->tile_mask, read_only,
2698 op->remap.prev = vma;
2701 * Userptr creates a new SG mapping so
2702 * we must also rebind.
2704 op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2705 IS_ALIGNED(xe_vma_end(vma),
2706 xe_vma_max_pte_size(old));
2707 if (op->remap.skip_prev) {
2708 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2712 op->remap.start = xe_vma_end(vma);
2716 if (op->base.remap.next) {
2719 op->base.remap.unmap->va->flags &
2723 op->base.remap.unmap->va->flags &
2726 vma = new_vma(vm, op->base.remap.next,
2727 op->tile_mask, read_only,
2734 op->remap.next = vma;
2737 * Userptr creates a new SG mapping so
2738 * we must also rebind.
2740 op->remap.skip_next = !xe_vma_is_userptr(old) &&
2741 IS_ALIGNED(xe_vma_start(vma),
2742 xe_vma_max_pte_size(old));
2743 if (op->remap.skip_next) {
2744 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2752 case DRM_GPUVA_OP_UNMAP:
2753 case DRM_GPUVA_OP_PREFETCH:
2757 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2762 err = xe_vma_op_commit(vm, op);
2767 /* FIXME: Unhandled corner case */
2768 XE_WARN_ON(!last_op && last && !list_empty(ops_list));
2774 last_op->flags |= XE_VMA_OP_LAST;
2775 last_op->num_syncs = num_syncs;
2776 last_op->syncs = syncs;
2777 last_op->fence = fence;
2787 static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
2788 struct xe_vma *vma, struct xe_vma_op *op)
2792 lockdep_assert_held_write(&vm->lock);
2794 err = xe_vm_prepare_vma(exec, vma, 1);
2798 xe_vm_assert_held(vm);
2799 xe_bo_assert_held(xe_vma_bo(vma));
2801 switch (op->base.op) {
2802 case DRM_GPUVA_OP_MAP:
2803 err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
2804 op->syncs, op->num_syncs, op->fence,
2805 op->map.immediate || !xe_vm_in_fault_mode(vm),
2806 op->flags & XE_VMA_OP_FIRST,
2807 op->flags & XE_VMA_OP_LAST);
2809 case DRM_GPUVA_OP_REMAP:
2811 bool prev = !!op->remap.prev;
2812 bool next = !!op->remap.next;
2814 if (!op->remap.unmap_done) {
2816 vm->async_ops.munmap_rebind_inflight = true;
2817 vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
2819 err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2821 !prev && !next ? op->fence : NULL,
2822 op->flags & XE_VMA_OP_FIRST,
2823 op->flags & XE_VMA_OP_LAST && !prev &&
2827 op->remap.unmap_done = true;
2831 op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
2832 err = xe_vm_bind(vm, op->remap.prev, op->q,
2833 xe_vma_bo(op->remap.prev), op->syncs,
2835 !next ? op->fence : NULL, true, false,
2836 op->flags & XE_VMA_OP_LAST && !next);
2837 op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2840 op->remap.prev = NULL;
2844 op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
2845 err = xe_vm_bind(vm, op->remap.next, op->q,
2846 xe_vma_bo(op->remap.next),
2847 op->syncs, op->num_syncs,
2848 op->fence, true, false,
2849 op->flags & XE_VMA_OP_LAST);
2850 op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2853 op->remap.next = NULL;
2855 vm->async_ops.munmap_rebind_inflight = false;
2859 case DRM_GPUVA_OP_UNMAP:
2860 err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2861 op->num_syncs, op->fence,
2862 op->flags & XE_VMA_OP_FIRST,
2863 op->flags & XE_VMA_OP_LAST);
2865 case DRM_GPUVA_OP_PREFETCH:
2866 err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
2867 op->syncs, op->num_syncs, op->fence,
2868 op->flags & XE_VMA_OP_FIRST,
2869 op->flags & XE_VMA_OP_LAST);
2872 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2876 trace_xe_vma_fail(vma);
2881 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
2882 struct xe_vma_op *op)
2884 struct drm_exec exec;
2888 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
2889 drm_exec_until_all_locked(&exec) {
2890 err = op_execute(&exec, vm, vma, op);
2891 drm_exec_retry_on_contention(&exec);
2895 drm_exec_fini(&exec);
2897 if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
2898 lockdep_assert_held_write(&vm->lock);
2899 err = xe_vma_userptr_pin_pages(vma);
2903 trace_xe_vma_fail(vma);
2909 static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
2913 lockdep_assert_held_write(&vm->lock);
2915 #ifdef TEST_VM_ASYNC_OPS_ERROR
2916 if (op->inject_error) {
2917 op->inject_error = false;
2922 switch (op->base.op) {
2923 case DRM_GPUVA_OP_MAP:
2924 ret = __xe_vma_op_execute(vm, op->map.vma, op);
2926 case DRM_GPUVA_OP_REMAP:
2930 if (!op->remap.unmap_done)
2931 vma = gpuva_to_vma(op->base.remap.unmap->va);
2932 else if (op->remap.prev)
2933 vma = op->remap.prev;
2935 vma = op->remap.next;
2937 ret = __xe_vma_op_execute(vm, vma, op);
2940 case DRM_GPUVA_OP_UNMAP:
2941 ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
2944 case DRM_GPUVA_OP_PREFETCH:
2945 ret = __xe_vma_op_execute(vm,
2946 gpuva_to_vma(op->base.prefetch.va),
2950 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2956 static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
2958 bool last = op->flags & XE_VMA_OP_LAST;
2961 while (op->num_syncs--)
2962 xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
2965 xe_exec_queue_put(op->q);
2967 dma_fence_put(&op->fence->fence);
2969 if (!list_empty(&op->link)) {
2970 spin_lock_irq(&vm->async_ops.lock);
2971 list_del(&op->link);
2972 spin_unlock_irq(&vm->async_ops.lock);
2975 drm_gpuva_ops_free(&vm->gpuvm, op->ops);
2980 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2981 bool post_commit, bool prev_post_commit,
2982 bool next_post_commit)
2984 lockdep_assert_held_write(&vm->lock);
2986 switch (op->base.op) {
2987 case DRM_GPUVA_OP_MAP:
2989 prep_vma_destroy(vm, op->map.vma, post_commit);
2990 xe_vma_destroy_unlocked(op->map.vma);
2993 case DRM_GPUVA_OP_UNMAP:
2995 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2998 down_read(&vm->userptr.notifier_lock);
2999 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
3000 up_read(&vm->userptr.notifier_lock);
3002 xe_vm_insert_vma(vm, vma);
3006 case DRM_GPUVA_OP_REMAP:
3008 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
3010 if (op->remap.prev) {
3011 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
3012 xe_vma_destroy_unlocked(op->remap.prev);
3014 if (op->remap.next) {
3015 prep_vma_destroy(vm, op->remap.next, next_post_commit);
3016 xe_vma_destroy_unlocked(op->remap.next);
3019 down_read(&vm->userptr.notifier_lock);
3020 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
3021 up_read(&vm->userptr.notifier_lock);
3023 xe_vm_insert_vma(vm, vma);
3027 case DRM_GPUVA_OP_PREFETCH:
3031 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3035 static struct xe_vma_op *next_vma_op(struct xe_vm *vm)
3037 return list_first_entry_or_null(&vm->async_ops.pending,
3038 struct xe_vma_op, link);
3041 static void xe_vma_op_work_func(struct work_struct *w)
3043 struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work);
3046 struct xe_vma_op *op;
3049 if (vm->async_ops.error && !xe_vm_is_closed(vm))
3052 spin_lock_irq(&vm->async_ops.lock);
3053 op = next_vma_op(vm);
3054 spin_unlock_irq(&vm->async_ops.lock);
3059 if (!xe_vm_is_closed(vm)) {
3060 down_write(&vm->lock);
3061 err = xe_vma_op_execute(vm, op);
3063 drm_warn(&vm->xe->drm,
3064 "Async VM op(%d) failed with %d",
3066 vm_set_async_error(vm, err);
3067 up_write(&vm->lock);
3069 if (vm->async_ops.error_capture.addr)
3070 vm_error_capture(vm, err, 0, 0, 0);
3073 up_write(&vm->lock);
3077 switch (op->base.op) {
3078 case DRM_GPUVA_OP_REMAP:
3079 vma = gpuva_to_vma(op->base.remap.unmap->va);
3080 trace_xe_vma_flush(vma);
3082 down_write(&vm->lock);
3083 xe_vma_destroy_unlocked(vma);
3084 up_write(&vm->lock);
3086 case DRM_GPUVA_OP_UNMAP:
3087 vma = gpuva_to_vma(op->base.unmap.va);
3088 trace_xe_vma_flush(vma);
3090 down_write(&vm->lock);
3091 xe_vma_destroy_unlocked(vma);
3092 up_write(&vm->lock);
3099 if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
3100 &op->fence->fence.flags)) {
3101 if (!xe_vm_no_dma_fences(vm)) {
3102 op->fence->started = true;
3103 wake_up_all(&op->fence->wq);
3105 dma_fence_signal(&op->fence->fence);
3109 xe_vma_op_cleanup(vm, op);
3113 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3114 struct list_head *ops_list, bool async)
3116 struct xe_vma_op *op, *last_op, *next;
3119 lockdep_assert_held_write(&vm->lock);
3121 last_op = list_last_entry(ops_list, struct xe_vma_op, link);
3124 err = xe_vma_op_execute(vm, last_op);
3127 xe_vma_op_cleanup(vm, last_op);
3130 bool installed = false;
3132 for (i = 0; i < last_op->num_syncs; i++)
3133 installed |= xe_sync_entry_signal(&last_op->syncs[i],
3135 &last_op->fence->fence);
3136 if (!installed && last_op->fence)
3137 dma_fence_signal(&last_op->fence->fence);
3139 spin_lock_irq(&vm->async_ops.lock);
3140 list_splice_tail(ops_list, &vm->async_ops.pending);
3141 spin_unlock_irq(&vm->async_ops.lock);
3143 if (!vm->async_ops.error)
3144 queue_work(system_unbound_wq, &vm->async_ops.work);
3150 list_for_each_entry_reverse(op, ops_list, link)
3151 xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED,
3152 op->flags & XE_VMA_OP_PREV_COMMITTED,
3153 op->flags & XE_VMA_OP_NEXT_COMMITTED);
3154 list_for_each_entry_safe(op, next, ops_list, link)
3155 xe_vma_op_cleanup(vm, op);
3160 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
3161 struct drm_gpuva_ops **ops,
3166 for (i = num_ops_list - 1; i; ++i) {
3167 struct drm_gpuva_ops *__ops = ops[i];
3168 struct drm_gpuva_op *__op;
3173 drm_gpuva_for_each_op_reverse(__op, __ops) {
3174 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
3176 xe_vma_op_unwind(vm, op,
3177 op->flags & XE_VMA_OP_COMMITTED,
3178 op->flags & XE_VMA_OP_PREV_COMMITTED,
3179 op->flags & XE_VMA_OP_NEXT_COMMITTED);
3182 drm_gpuva_ops_free(&vm->gpuvm, __ops);
3186 #ifdef TEST_VM_ASYNC_OPS_ERROR
3187 #define SUPPORTED_FLAGS \
3188 (FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
3189 XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | \
3190 XE_VM_BIND_FLAG_NULL | 0xffff)
3192 #define SUPPORTED_FLAGS \
3193 (XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
3194 XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | 0xffff)
3196 #define XE_64K_PAGE_MASK 0xffffull
3198 #define MAX_BINDS 512 /* FIXME: Picking random upper limit */
3200 static int vm_bind_ioctl_check_args(struct xe_device *xe,
3201 struct drm_xe_vm_bind *args,
3202 struct drm_xe_vm_bind_op **bind_ops,
3208 if (XE_IOCTL_DBG(xe, args->extensions) ||
3209 XE_IOCTL_DBG(xe, !args->num_binds) ||
3210 XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS))
3213 if (args->num_binds > 1) {
3214 u64 __user *bind_user =
3215 u64_to_user_ptr(args->vector_of_binds);
3217 *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
3218 args->num_binds, GFP_KERNEL);
3222 err = __copy_from_user(*bind_ops, bind_user,
3223 sizeof(struct drm_xe_vm_bind_op) *
3225 if (XE_IOCTL_DBG(xe, err)) {
3230 *bind_ops = &args->bind;
3233 for (i = 0; i < args->num_binds; ++i) {
3234 u64 range = (*bind_ops)[i].range;
3235 u64 addr = (*bind_ops)[i].addr;
3236 u32 op = (*bind_ops)[i].op;
3237 u32 flags = (*bind_ops)[i].flags;
3238 u32 obj = (*bind_ops)[i].obj;
3239 u64 obj_offset = (*bind_ops)[i].obj_offset;
3240 u32 region = (*bind_ops)[i].region;
3241 bool is_null = flags & XE_VM_BIND_FLAG_NULL;
3244 *async = !!(flags & XE_VM_BIND_FLAG_ASYNC);
3245 } else if (XE_IOCTL_DBG(xe, !*async) ||
3246 XE_IOCTL_DBG(xe, !(flags & XE_VM_BIND_FLAG_ASYNC)) ||
3247 XE_IOCTL_DBG(xe, op == XE_VM_BIND_OP_RESTART)) {
3252 if (XE_IOCTL_DBG(xe, !*async &&
3253 op == XE_VM_BIND_OP_UNMAP_ALL)) {
3258 if (XE_IOCTL_DBG(xe, !*async &&
3259 op == XE_VM_BIND_OP_PREFETCH)) {
3264 if (XE_IOCTL_DBG(xe, op > XE_VM_BIND_OP_PREFETCH) ||
3265 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3266 XE_IOCTL_DBG(xe, obj && is_null) ||
3267 XE_IOCTL_DBG(xe, obj_offset && is_null) ||
3268 XE_IOCTL_DBG(xe, op != XE_VM_BIND_OP_MAP &&
3270 XE_IOCTL_DBG(xe, !obj &&
3271 op == XE_VM_BIND_OP_MAP &&
3273 XE_IOCTL_DBG(xe, !obj &&
3274 op == XE_VM_BIND_OP_UNMAP_ALL) ||
3275 XE_IOCTL_DBG(xe, addr &&
3276 op == XE_VM_BIND_OP_UNMAP_ALL) ||
3277 XE_IOCTL_DBG(xe, range &&
3278 op == XE_VM_BIND_OP_UNMAP_ALL) ||
3279 XE_IOCTL_DBG(xe, obj &&
3280 op == XE_VM_BIND_OP_MAP_USERPTR) ||
3281 XE_IOCTL_DBG(xe, obj &&
3282 op == XE_VM_BIND_OP_PREFETCH) ||
3283 XE_IOCTL_DBG(xe, region &&
3284 op != XE_VM_BIND_OP_PREFETCH) ||
3285 XE_IOCTL_DBG(xe, !(BIT(region) &
3286 xe->info.mem_region_mask)) ||
3287 XE_IOCTL_DBG(xe, obj &&
3288 op == XE_VM_BIND_OP_UNMAP)) {
3293 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3294 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3295 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3296 XE_IOCTL_DBG(xe, !range && op !=
3297 XE_VM_BIND_OP_RESTART &&
3298 op != XE_VM_BIND_OP_UNMAP_ALL)) {
3307 if (args->num_binds > 1)
3312 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3314 struct xe_device *xe = to_xe_device(dev);
3315 struct xe_file *xef = to_xe_file(file);
3316 struct drm_xe_vm_bind *args = data;
3317 struct drm_xe_sync __user *syncs_user;
3318 struct xe_bo **bos = NULL;
3319 struct drm_gpuva_ops **ops = NULL;
3321 struct xe_exec_queue *q = NULL;
3323 struct xe_sync_entry *syncs = NULL;
3324 struct drm_xe_vm_bind_op *bind_ops;
3325 LIST_HEAD(ops_list);
3330 err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
3334 if (args->exec_queue_id) {
3335 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3336 if (XE_IOCTL_DBG(xe, !q)) {
3341 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3343 goto put_exec_queue;
3347 vm = xe_vm_lookup(xef, args->vm_id);
3348 if (XE_IOCTL_DBG(xe, !vm)) {
3350 goto put_exec_queue;
3353 err = down_write_killable(&vm->lock);
3357 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3359 goto release_vm_lock;
3362 if (bind_ops[0].op == XE_VM_BIND_OP_RESTART) {
3363 if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
3365 if (XE_IOCTL_DBG(xe, !err && args->num_syncs))
3367 if (XE_IOCTL_DBG(xe, !err && !vm->async_ops.error))
3371 trace_xe_vm_restart(vm);
3372 vm_set_async_error(vm, 0);
3374 queue_work(system_unbound_wq, &vm->async_ops.work);
3376 /* Rebinds may have been blocked, give worker a kick */
3377 if (xe_vm_in_compute_mode(vm))
3378 xe_vm_queue_rebind_worker(vm);
3381 goto release_vm_lock;
3384 if (XE_IOCTL_DBG(xe, !vm->async_ops.error &&
3385 async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
3387 goto release_vm_lock;
3390 for (i = 0; i < args->num_binds; ++i) {
3391 u64 range = bind_ops[i].range;
3392 u64 addr = bind_ops[i].addr;
3394 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3395 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3397 goto release_vm_lock;
3400 if (bind_ops[i].tile_mask) {
3401 u64 valid_tiles = BIT(xe->info.tile_count) - 1;
3403 if (XE_IOCTL_DBG(xe, bind_ops[i].tile_mask &
3406 goto release_vm_lock;
3411 bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL);
3414 goto release_vm_lock;
3417 ops = kzalloc(sizeof(*ops) * args->num_binds, GFP_KERNEL);
3420 goto release_vm_lock;
3423 for (i = 0; i < args->num_binds; ++i) {
3424 struct drm_gem_object *gem_obj;
3425 u64 range = bind_ops[i].range;
3426 u64 addr = bind_ops[i].addr;
3427 u32 obj = bind_ops[i].obj;
3428 u64 obj_offset = bind_ops[i].obj_offset;
3433 gem_obj = drm_gem_object_lookup(file, obj);
3434 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3438 bos[i] = gem_to_xe_bo(gem_obj);
3440 if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
3441 XE_IOCTL_DBG(xe, obj_offset >
3442 bos[i]->size - range)) {
3447 if (bos[i]->flags & XE_BO_INTERNAL_64K) {
3448 if (XE_IOCTL_DBG(xe, obj_offset &
3449 XE_64K_PAGE_MASK) ||
3450 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3451 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3458 if (args->num_syncs) {
3459 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3466 syncs_user = u64_to_user_ptr(args->syncs);
3467 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3468 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3469 &syncs_user[num_syncs], false,
3470 xe_vm_no_dma_fences(vm));
3475 /* Do some error checking first to make the unwind easier */
3476 for (i = 0; i < args->num_binds; ++i) {
3477 u64 range = bind_ops[i].range;
3478 u64 addr = bind_ops[i].addr;
3479 u32 op = bind_ops[i].op;
3480 u32 flags = bind_ops[i].flags;
3482 err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op, flags);
3487 for (i = 0; i < args->num_binds; ++i) {
3488 u64 range = bind_ops[i].range;
3489 u64 addr = bind_ops[i].addr;
3490 u32 op = bind_ops[i].op;
3491 u32 flags = bind_ops[i].flags;
3492 u64 obj_offset = bind_ops[i].obj_offset;
3493 u8 tile_mask = bind_ops[i].tile_mask;
3494 u32 region = bind_ops[i].region;
3496 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3497 addr, range, op, flags,
3499 if (IS_ERR(ops[i])) {
3500 err = PTR_ERR(ops[i]);
3505 err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
3507 i == args->num_binds - 1,
3514 if (list_empty(&ops_list)) {
3519 err = vm_bind_ioctl_ops_execute(vm, &ops_list, async);
3520 up_write(&vm->lock);
3522 for (i = 0; i < args->num_binds; ++i)
3527 if (args->num_binds > 1)
3533 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3535 for (i = 0; err == -ENODATA && i < num_syncs; i++)
3536 xe_sync_entry_signal(&syncs[i], NULL, dma_fence_get_stub());
3538 xe_sync_entry_cleanup(&syncs[num_syncs]);
3542 for (i = 0; i < args->num_binds; ++i)
3545 up_write(&vm->lock);
3550 xe_exec_queue_put(q);
3554 if (args->num_binds > 1)
3556 return err == -ENODATA ? 0 : err;
3560 * xe_vm_lock() - Lock the vm's dma_resv object
3561 * @vm: The struct xe_vm whose lock is to be locked
3562 * @intr: Whether to perform any wait interruptible
3564 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3565 * contended lock was interrupted. If @intr is false, the function
3568 int xe_vm_lock(struct xe_vm *vm, bool intr)
3571 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3573 return dma_resv_lock(xe_vm_resv(vm), NULL);
3577 * xe_vm_unlock() - Unlock the vm's dma_resv object
3578 * @vm: The struct xe_vm whose lock is to be released.
3580 * Unlock a buffer object lock that was locked by xe_vm_lock().
3582 void xe_vm_unlock(struct xe_vm *vm)
3584 dma_resv_unlock(xe_vm_resv(vm));
3588 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3589 * @vma: VMA to invalidate
3591 * Walks a list of page tables leaves which it memset the entries owned by this
3592 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3595 * Returns 0 for success, negative error code otherwise.
3597 int xe_vm_invalidate_vma(struct xe_vma *vma)
3599 struct xe_device *xe = xe_vma_vm(vma)->xe;
3600 struct xe_tile *tile;
3601 u32 tile_needs_invalidate = 0;
3602 int seqno[XE_MAX_TILES_PER_DEVICE];
3606 xe_assert(xe, xe_vm_in_fault_mode(xe_vma_vm(vma)));
3607 xe_assert(xe, !xe_vma_is_null(vma));
3608 trace_xe_vma_usm_invalidate(vma);
3610 /* Check that we don't race with page-table updates */
3611 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3612 if (xe_vma_is_userptr(vma)) {
3613 WARN_ON_ONCE(!mmu_interval_check_retry
3614 (&vma->userptr.notifier,
3615 vma->userptr.notifier_seq));
3616 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3617 DMA_RESV_USAGE_BOOKKEEP));
3620 xe_bo_assert_held(xe_vma_bo(vma));
3624 for_each_tile(tile, xe, id) {
3625 if (xe_pt_zap_ptes(tile, vma)) {
3626 tile_needs_invalidate |= BIT(id);
3629 * FIXME: We potentially need to invalidate multiple
3630 * GTs within the tile
3632 seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
3638 for_each_tile(tile, xe, id) {
3639 if (tile_needs_invalidate & BIT(id)) {
3640 ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
3646 vma->usm.tile_invalidated = vma->tile_mask;
3651 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
3653 struct drm_gpuva *gpuva;
3657 if (!down_read_trylock(&vm->lock)) {
3658 drm_printf(p, " Failed to acquire VM lock to dump capture");
3661 if (vm->pt_root[gt_id]) {
3662 addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
3663 is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
3664 drm_printf(p, " VM root: A:0x%llx %s\n", addr,
3665 is_vram ? "VRAM" : "SYS");
3668 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3669 struct xe_vma *vma = gpuva_to_vma(gpuva);
3670 bool is_userptr = xe_vma_is_userptr(vma);
3671 bool is_null = xe_vma_is_null(vma);
3675 } else if (is_userptr) {
3676 struct xe_res_cursor cur;
3678 if (vma->userptr.sg) {
3679 xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
3681 addr = xe_res_dma(&cur);
3686 addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
3687 is_vram = xe_bo_is_vram(xe_vma_bo(vma));
3689 drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
3690 xe_vma_start(vma), xe_vma_end(vma) - 1,
3692 addr, is_null ? "NULL" : is_userptr ? "USR" :
3693 is_vram ? "VRAM" : "SYS");