drm/xe: Fix splat during error dump
[linux-2.6-microblaze.git] / drivers / gpu / drm / xe / xe_vm.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9
10 #include <drm/ttm/ttm_execbuf_util.h>
11 #include <drm/ttm/ttm_tt.h>
12 #include <drm/xe_drm.h>
13 #include <linux/kthread.h>
14 #include <linux/mm.h>
15 #include <linux/swap.h>
16
17 #include "xe_bo.h"
18 #include "xe_device.h"
19 #include "xe_engine.h"
20 #include "xe_gt.h"
21 #include "xe_gt_pagefault.h"
22 #include "xe_gt_tlb_invalidation.h"
23 #include "xe_migrate.h"
24 #include "xe_pm.h"
25 #include "xe_preempt_fence.h"
26 #include "xe_pt.h"
27 #include "xe_res_cursor.h"
28 #include "xe_sync.h"
29 #include "xe_trace.h"
30
31 #define TEST_VM_ASYNC_OPS_ERROR
32
33 /**
34  * xe_vma_userptr_check_repin() - Advisory check for repin needed
35  * @vma: The userptr vma
36  *
37  * Check if the userptr vma has been invalidated since last successful
38  * repin. The check is advisory only and can the function can be called
39  * without the vm->userptr.notifier_lock held. There is no guarantee that the
40  * vma userptr will remain valid after a lockless check, so typically
41  * the call needs to be followed by a proper check under the notifier_lock.
42  *
43  * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
44  */
45 int xe_vma_userptr_check_repin(struct xe_vma *vma)
46 {
47         return mmu_interval_check_retry(&vma->userptr.notifier,
48                                         vma->userptr.notifier_seq) ?
49                 -EAGAIN : 0;
50 }
51
52 int xe_vma_userptr_pin_pages(struct xe_vma *vma)
53 {
54         struct xe_vm *vm = vma->vm;
55         struct xe_device *xe = vm->xe;
56         const unsigned long num_pages =
57                 (vma->end - vma->start + 1) >> PAGE_SHIFT;
58         struct page **pages;
59         bool in_kthread = !current->mm;
60         unsigned long notifier_seq;
61         int pinned, ret, i;
62         bool read_only = vma->pte_flags & XE_PTE_READ_ONLY;
63
64         lockdep_assert_held(&vm->lock);
65         XE_BUG_ON(!xe_vma_is_userptr(vma));
66 retry:
67         if (vma->destroyed)
68                 return 0;
69
70         notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
71         if (notifier_seq == vma->userptr.notifier_seq)
72                 return 0;
73
74         pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
75         if (!pages)
76                 return -ENOMEM;
77
78         if (vma->userptr.sg) {
79                 dma_unmap_sgtable(xe->drm.dev,
80                                   vma->userptr.sg,
81                                   read_only ? DMA_TO_DEVICE :
82                                   DMA_BIDIRECTIONAL, 0);
83                 sg_free_table(vma->userptr.sg);
84                 vma->userptr.sg = NULL;
85         }
86
87         pinned = ret = 0;
88         if (in_kthread) {
89                 if (!mmget_not_zero(vma->userptr.notifier.mm)) {
90                         ret = -EFAULT;
91                         goto mm_closed;
92                 }
93                 kthread_use_mm(vma->userptr.notifier.mm);
94         }
95
96         while (pinned < num_pages) {
97                 ret = get_user_pages_fast(vma->userptr.ptr + pinned * PAGE_SIZE,
98                                           num_pages - pinned,
99                                           read_only ? 0 : FOLL_WRITE,
100                                           &pages[pinned]);
101                 if (ret < 0) {
102                         if (in_kthread)
103                                 ret = 0;
104                         break;
105                 }
106
107                 pinned += ret;
108                 ret = 0;
109         }
110
111         if (in_kthread) {
112                 kthread_unuse_mm(vma->userptr.notifier.mm);
113                 mmput(vma->userptr.notifier.mm);
114         }
115 mm_closed:
116         if (ret)
117                 goto out;
118
119         ret = sg_alloc_table_from_pages(&vma->userptr.sgt, pages, pinned,
120                                         0, (u64)pinned << PAGE_SHIFT,
121                                         GFP_KERNEL);
122         if (ret) {
123                 vma->userptr.sg = NULL;
124                 goto out;
125         }
126         vma->userptr.sg = &vma->userptr.sgt;
127
128         ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
129                               read_only ? DMA_TO_DEVICE :
130                               DMA_BIDIRECTIONAL,
131                               DMA_ATTR_SKIP_CPU_SYNC |
132                               DMA_ATTR_NO_KERNEL_MAPPING);
133         if (ret) {
134                 sg_free_table(vma->userptr.sg);
135                 vma->userptr.sg = NULL;
136                 goto out;
137         }
138
139         for (i = 0; i < pinned; ++i) {
140                 if (!read_only) {
141                         lock_page(pages[i]);
142                         set_page_dirty(pages[i]);
143                         unlock_page(pages[i]);
144                 }
145
146                 mark_page_accessed(pages[i]);
147         }
148
149 out:
150         release_pages(pages, pinned);
151         kvfree(pages);
152
153         if (!(ret < 0)) {
154                 vma->userptr.notifier_seq = notifier_seq;
155                 if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
156                         goto retry;
157         }
158
159         return ret < 0 ? ret : 0;
160 }
161
162 static bool preempt_fences_waiting(struct xe_vm *vm)
163 {
164         struct xe_engine *e;
165
166         lockdep_assert_held(&vm->lock);
167         xe_vm_assert_held(vm);
168
169         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
170                 if (!e->compute.pfence || (e->compute.pfence &&
171                     test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
172                              &e->compute.pfence->flags))) {
173                         return true;
174                 }
175         }
176
177         return false;
178 }
179
180 static void free_preempt_fences(struct list_head *list)
181 {
182         struct list_head *link, *next;
183
184         list_for_each_safe(link, next, list)
185                 xe_preempt_fence_free(to_preempt_fence_from_link(link));
186 }
187
188 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
189                                 unsigned int *count)
190 {
191         lockdep_assert_held(&vm->lock);
192         xe_vm_assert_held(vm);
193
194         if (*count >= vm->preempt.num_engines)
195                 return 0;
196
197         for (; *count < vm->preempt.num_engines; ++(*count)) {
198                 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
199
200                 if (IS_ERR(pfence))
201                         return PTR_ERR(pfence);
202
203                 list_move_tail(xe_preempt_fence_link(pfence), list);
204         }
205
206         return 0;
207 }
208
209 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
210 {
211         struct xe_engine *e;
212
213         xe_vm_assert_held(vm);
214
215         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
216                 if (e->compute.pfence) {
217                         long timeout = dma_fence_wait(e->compute.pfence, false);
218
219                         if (timeout < 0)
220                                 return -ETIME;
221                         dma_fence_put(e->compute.pfence);
222                         e->compute.pfence = NULL;
223                 }
224         }
225
226         return 0;
227 }
228
229 static bool xe_vm_is_idle(struct xe_vm *vm)
230 {
231         struct xe_engine *e;
232
233         xe_vm_assert_held(vm);
234         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
235                 if (!xe_engine_is_idle(e))
236                         return false;
237         }
238
239         return true;
240 }
241
242 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
243 {
244         struct list_head *link;
245         struct xe_engine *e;
246
247         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
248                 struct dma_fence *fence;
249
250                 link = list->next;
251                 XE_BUG_ON(link == list);
252
253                 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
254                                              e, e->compute.context,
255                                              ++e->compute.seqno);
256                 dma_fence_put(e->compute.pfence);
257                 e->compute.pfence = fence;
258         }
259 }
260
261 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
262 {
263         struct xe_engine *e;
264         struct ww_acquire_ctx ww;
265         int err;
266
267         err = xe_bo_lock(bo, &ww, vm->preempt.num_engines, true);
268         if (err)
269                 return err;
270
271         list_for_each_entry(e, &vm->preempt.engines, compute.link)
272                 if (e->compute.pfence) {
273                         dma_resv_add_fence(bo->ttm.base.resv,
274                                            e->compute.pfence,
275                                            DMA_RESV_USAGE_BOOKKEEP);
276                 }
277
278         xe_bo_unlock(bo, &ww);
279         return 0;
280 }
281
282 /**
283  * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv
284  * @vm: The vm.
285  * @fence: The fence to add.
286  * @usage: The resv usage for the fence.
287  *
288  * Loops over all of the vm's external object bindings and adds a @fence
289  * with the given @usage to all of the external object's reservation
290  * objects.
291  */
292 void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
293                              enum dma_resv_usage usage)
294 {
295         struct xe_vma *vma;
296
297         list_for_each_entry(vma, &vm->extobj.list, extobj.link)
298                 dma_resv_add_fence(vma->bo->ttm.base.resv, fence, usage);
299 }
300
301 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
302 {
303         struct xe_engine *e;
304
305         lockdep_assert_held(&vm->lock);
306         xe_vm_assert_held(vm);
307
308         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
309                 e->ops->resume(e);
310
311                 dma_resv_add_fence(&vm->resv, e->compute.pfence,
312                                    DMA_RESV_USAGE_BOOKKEEP);
313                 xe_vm_fence_all_extobjs(vm, e->compute.pfence,
314                                         DMA_RESV_USAGE_BOOKKEEP);
315         }
316 }
317
318 int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
319 {
320         struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
321         struct ttm_validate_buffer *tv;
322         struct ww_acquire_ctx ww;
323         struct list_head objs;
324         struct dma_fence *pfence;
325         int err;
326         bool wait;
327
328         XE_BUG_ON(!xe_vm_in_compute_mode(vm));
329
330         down_write(&vm->lock);
331
332         err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, true, 1);
333         if (err)
334                 goto out_unlock_outer;
335
336         pfence = xe_preempt_fence_create(e, e->compute.context,
337                                          ++e->compute.seqno);
338         if (!pfence) {
339                 err = -ENOMEM;
340                 goto out_unlock;
341         }
342
343         list_add(&e->compute.link, &vm->preempt.engines);
344         ++vm->preempt.num_engines;
345         e->compute.pfence = pfence;
346
347         down_read(&vm->userptr.notifier_lock);
348
349         dma_resv_add_fence(&vm->resv, pfence,
350                            DMA_RESV_USAGE_BOOKKEEP);
351
352         xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP);
353
354         /*
355          * Check to see if a preemption on VM is in flight or userptr
356          * invalidation, if so trigger this preempt fence to sync state with
357          * other preempt fences on the VM.
358          */
359         wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
360         if (wait)
361                 dma_fence_enable_sw_signaling(pfence);
362
363         up_read(&vm->userptr.notifier_lock);
364
365 out_unlock:
366         xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
367 out_unlock_outer:
368         up_write(&vm->lock);
369
370         return err;
371 }
372
373 /**
374  * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
375  * that need repinning.
376  * @vm: The VM.
377  *
378  * This function checks for whether the VM has userptrs that need repinning,
379  * and provides a release-type barrier on the userptr.notifier_lock after
380  * checking.
381  *
382  * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
383  */
384 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
385 {
386         lockdep_assert_held_read(&vm->userptr.notifier_lock);
387
388         return (list_empty(&vm->userptr.repin_list) &&
389                 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
390 }
391
392 /**
393  * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv
394  * objects of the vm's external buffer objects.
395  * @vm: The vm.
396  * @ww: Pointer to a struct ww_acquire_ctx locking context.
397  * @tv_onstack: Array size XE_ONSTACK_TV of storage for the struct
398  * ttm_validate_buffers used for locking.
399  * @tv: Pointer to a pointer that on output contains the actual storage used.
400  * @objs: List head for the buffer objects locked.
401  * @intr: Whether to lock interruptible.
402  * @num_shared: Number of dma-fence slots to reserve in the locked objects.
403  *
404  * Locks the vm dma-resv objects and all the dma-resv objects of the
405  * buffer objects on the vm external object list. The TTM utilities require
406  * a list of struct ttm_validate_buffers pointing to the actual buffer
407  * objects to lock. Storage for those struct ttm_validate_buffers should
408  * be provided in @tv_onstack, and is typically reserved on the stack
409  * of the caller. If the size of @tv_onstack isn't sufficient, then
410  * storage will be allocated internally using kvmalloc().
411  *
412  * The function performs deadlock handling internally, and after a
413  * successful return the ww locking transaction should be considered
414  * sealed.
415  *
416  * Return: 0 on success, Negative error code on error. In particular if
417  * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. In case
418  * of error, any locking performed has been reverted.
419  */
420 int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
421                         struct ttm_validate_buffer *tv_onstack,
422                         struct ttm_validate_buffer **tv,
423                         struct list_head *objs,
424                         bool intr,
425                         unsigned int num_shared)
426 {
427         struct ttm_validate_buffer *tv_vm, *tv_bo;
428         struct xe_vma *vma, *next;
429         LIST_HEAD(dups);
430         int err;
431
432         lockdep_assert_held(&vm->lock);
433
434         if (vm->extobj.entries < XE_ONSTACK_TV) {
435                 tv_vm = tv_onstack;
436         } else {
437                 tv_vm = kvmalloc_array(vm->extobj.entries + 1, sizeof(*tv_vm),
438                                        GFP_KERNEL);
439                 if (!tv_vm)
440                         return -ENOMEM;
441         }
442         tv_bo = tv_vm + 1;
443
444         INIT_LIST_HEAD(objs);
445         list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
446                 tv_bo->num_shared = num_shared;
447                 tv_bo->bo = &vma->bo->ttm;
448
449                 list_add_tail(&tv_bo->head, objs);
450                 tv_bo++;
451         }
452         tv_vm->num_shared = num_shared;
453         tv_vm->bo = xe_vm_ttm_bo(vm);
454         list_add_tail(&tv_vm->head, objs);
455         err = ttm_eu_reserve_buffers(ww, objs, intr, &dups);
456         if (err)
457                 goto out_err;
458
459         spin_lock(&vm->notifier.list_lock);
460         list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
461                                  notifier.rebind_link) {
462                 xe_bo_assert_held(vma->bo);
463
464                 list_del_init(&vma->notifier.rebind_link);
465                 if (vma->gt_present && !vma->destroyed)
466                         list_move_tail(&vma->rebind_link, &vm->rebind_list);
467         }
468         spin_unlock(&vm->notifier.list_lock);
469
470         *tv = tv_vm;
471         return 0;
472
473 out_err:
474         if (tv_vm != tv_onstack)
475                 kvfree(tv_vm);
476
477         return err;
478 }
479
480 /**
481  * xe_vm_unlock_dma_resv() - Unlock reservation objects locked by
482  * xe_vm_lock_dma_resv()
483  * @vm: The vm.
484  * @tv_onstack: The @tv_onstack array given to xe_vm_lock_dma_resv().
485  * @tv: The value of *@tv given by xe_vm_lock_dma_resv().
486  * @ww: The ww_acquire_context used for locking.
487  * @objs: The list returned from xe_vm_lock_dma_resv().
488  *
489  * Unlocks the reservation objects and frees any memory allocated by
490  * xe_vm_lock_dma_resv().
491  */
492 void xe_vm_unlock_dma_resv(struct xe_vm *vm,
493                            struct ttm_validate_buffer *tv_onstack,
494                            struct ttm_validate_buffer *tv,
495                            struct ww_acquire_ctx *ww,
496                            struct list_head *objs)
497 {
498         /*
499          * Nothing should've been able to enter the list while we were locked,
500          * since we've held the dma-resvs of all the vm's external objects,
501          * and holding the dma_resv of an object is required for list
502          * addition, and we shouldn't add ourselves.
503          */
504         XE_WARN_ON(!list_empty(&vm->notifier.rebind_list));
505
506         ttm_eu_backoff_reservation(ww, objs);
507         if (tv && tv != tv_onstack)
508                 kvfree(tv);
509 }
510
511 static void preempt_rebind_work_func(struct work_struct *w)
512 {
513         struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
514         struct xe_vma *vma;
515         struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
516         struct ttm_validate_buffer *tv;
517         struct ww_acquire_ctx ww;
518         struct list_head objs;
519         struct dma_fence *rebind_fence;
520         unsigned int fence_count = 0;
521         LIST_HEAD(preempt_fences);
522         int err;
523         long wait;
524         int __maybe_unused tries = 0;
525
526         XE_BUG_ON(!xe_vm_in_compute_mode(vm));
527         trace_xe_vm_rebind_worker_enter(vm);
528
529         if (xe_vm_is_closed(vm)) {
530                 trace_xe_vm_rebind_worker_exit(vm);
531                 return;
532         }
533
534         down_write(&vm->lock);
535
536 retry:
537         if (vm->async_ops.error)
538                 goto out_unlock_outer;
539
540         /*
541          * Extreme corner where we exit a VM error state with a munmap style VM
542          * unbind inflight which requires a rebind. In this case the rebind
543          * needs to install some fences into the dma-resv slots. The worker to
544          * do this queued, let that worker make progress by dropping vm->lock
545          * and trying this again.
546          */
547         if (vm->async_ops.munmap_rebind_inflight) {
548                 up_write(&vm->lock);
549                 flush_work(&vm->async_ops.work);
550                 goto retry;
551         }
552
553         if (xe_vm_userptr_check_repin(vm)) {
554                 err = xe_vm_userptr_pin(vm);
555                 if (err)
556                         goto out_unlock_outer;
557         }
558
559         err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs,
560                                   false, vm->preempt.num_engines);
561         if (err)
562                 goto out_unlock_outer;
563
564         if (xe_vm_is_idle(vm)) {
565                 vm->preempt.rebind_deactivated = true;
566                 goto out_unlock;
567         }
568
569         /* Fresh preempt fences already installed. Everyting is running. */
570         if (!preempt_fences_waiting(vm))
571                 goto out_unlock;
572
573         /*
574          * This makes sure vm is completely suspended and also balances
575          * xe_engine suspend- and resume; we resume *all* vm engines below.
576          */
577         err = wait_for_existing_preempt_fences(vm);
578         if (err)
579                 goto out_unlock;
580
581         err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
582         if (err)
583                 goto out_unlock;
584
585         list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
586                 if (xe_vma_is_userptr(vma) || vma->destroyed)
587                         continue;
588
589                 err = xe_bo_validate(vma->bo, vm, false);
590                 if (err)
591                         goto out_unlock;
592         }
593
594         rebind_fence = xe_vm_rebind(vm, true);
595         if (IS_ERR(rebind_fence)) {
596                 err = PTR_ERR(rebind_fence);
597                 goto out_unlock;
598         }
599
600         if (rebind_fence) {
601                 dma_fence_wait(rebind_fence, false);
602                 dma_fence_put(rebind_fence);
603         }
604
605         /* Wait on munmap style VM unbinds */
606         wait = dma_resv_wait_timeout(&vm->resv,
607                                      DMA_RESV_USAGE_KERNEL,
608                                      false, MAX_SCHEDULE_TIMEOUT);
609         if (wait <= 0) {
610                 err = -ETIME;
611                 goto out_unlock;
612         }
613
614 #define retry_required(__tries, __vm) \
615         (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
616         (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
617         __xe_vm_userptr_needs_repin(__vm))
618
619         down_read(&vm->userptr.notifier_lock);
620         if (retry_required(tries, vm)) {
621                 up_read(&vm->userptr.notifier_lock);
622                 err = -EAGAIN;
623                 goto out_unlock;
624         }
625
626 #undef retry_required
627
628         /* Point of no return. */
629         arm_preempt_fences(vm, &preempt_fences);
630         resume_and_reinstall_preempt_fences(vm);
631         up_read(&vm->userptr.notifier_lock);
632
633 out_unlock:
634         xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
635 out_unlock_outer:
636         if (err == -EAGAIN) {
637                 trace_xe_vm_rebind_worker_retry(vm);
638                 goto retry;
639         }
640         up_write(&vm->lock);
641
642         free_preempt_fences(&preempt_fences);
643
644         XE_WARN_ON(err < 0);    /* TODO: Kill VM or put in error state */
645         trace_xe_vm_rebind_worker_exit(vm);
646 }
647
648 struct async_op_fence;
649 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
650                         struct xe_engine *e, struct xe_sync_entry *syncs,
651                         u32 num_syncs, struct async_op_fence *afence);
652
653 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
654                                    const struct mmu_notifier_range *range,
655                                    unsigned long cur_seq)
656 {
657         struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
658         struct xe_vm *vm = vma->vm;
659         struct dma_resv_iter cursor;
660         struct dma_fence *fence;
661         long err;
662
663         XE_BUG_ON(!xe_vma_is_userptr(vma));
664         trace_xe_vma_userptr_invalidate(vma);
665
666         if (!mmu_notifier_range_blockable(range))
667                 return false;
668
669         down_write(&vm->userptr.notifier_lock);
670         mmu_interval_set_seq(mni, cur_seq);
671
672         /* No need to stop gpu access if the userptr is not yet bound. */
673         if (!vma->userptr.initial_bind) {
674                 up_write(&vm->userptr.notifier_lock);
675                 return true;
676         }
677
678         /*
679          * Tell exec and rebind worker they need to repin and rebind this
680          * userptr.
681          */
682         if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->gt_present) {
683                 spin_lock(&vm->userptr.invalidated_lock);
684                 list_move_tail(&vma->userptr.invalidate_link,
685                                &vm->userptr.invalidated);
686                 spin_unlock(&vm->userptr.invalidated_lock);
687         }
688
689         up_write(&vm->userptr.notifier_lock);
690
691         /*
692          * Preempt fences turn into schedule disables, pipeline these.
693          * Note that even in fault mode, we need to wait for binds and
694          * unbinds to complete, and those are attached as BOOKMARK fences
695          * to the vm.
696          */
697         dma_resv_iter_begin(&cursor, &vm->resv,
698                             DMA_RESV_USAGE_BOOKKEEP);
699         dma_resv_for_each_fence_unlocked(&cursor, fence)
700                 dma_fence_enable_sw_signaling(fence);
701         dma_resv_iter_end(&cursor);
702
703         err = dma_resv_wait_timeout(&vm->resv,
704                                     DMA_RESV_USAGE_BOOKKEEP,
705                                     false, MAX_SCHEDULE_TIMEOUT);
706         XE_WARN_ON(err <= 0);
707
708         if (xe_vm_in_fault_mode(vm)) {
709                 err = xe_vm_invalidate_vma(vma);
710                 XE_WARN_ON(err);
711         }
712
713         trace_xe_vma_userptr_invalidate_complete(vma);
714
715         return true;
716 }
717
718 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
719         .invalidate = vma_userptr_invalidate,
720 };
721
722 int xe_vm_userptr_pin(struct xe_vm *vm)
723 {
724         struct xe_vma *vma, *next;
725         int err = 0;
726         LIST_HEAD(tmp_evict);
727
728         lockdep_assert_held_write(&vm->lock);
729
730         /* Collect invalidated userptrs */
731         spin_lock(&vm->userptr.invalidated_lock);
732         list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
733                                  userptr.invalidate_link) {
734                 list_del_init(&vma->userptr.invalidate_link);
735                 list_move_tail(&vma->userptr_link, &vm->userptr.repin_list);
736         }
737         spin_unlock(&vm->userptr.invalidated_lock);
738
739         /* Pin and move to temporary list */
740         list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, userptr_link) {
741                 err = xe_vma_userptr_pin_pages(vma);
742                 if (err < 0)
743                         goto out_err;
744
745                 list_move_tail(&vma->userptr_link, &tmp_evict);
746         }
747
748         /* Take lock and move to rebind_list for rebinding. */
749         err = dma_resv_lock_interruptible(&vm->resv, NULL);
750         if (err)
751                 goto out_err;
752
753         list_for_each_entry_safe(vma, next, &tmp_evict, userptr_link) {
754                 list_del_init(&vma->userptr_link);
755                 list_move_tail(&vma->rebind_link, &vm->rebind_list);
756         }
757
758         dma_resv_unlock(&vm->resv);
759
760         return 0;
761
762 out_err:
763         list_splice_tail(&tmp_evict, &vm->userptr.repin_list);
764
765         return err;
766 }
767
768 /**
769  * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
770  * that need repinning.
771  * @vm: The VM.
772  *
773  * This function does an advisory check for whether the VM has userptrs that
774  * need repinning.
775  *
776  * Return: 0 if there are no indications of userptrs needing repinning,
777  * -EAGAIN if there are.
778  */
779 int xe_vm_userptr_check_repin(struct xe_vm *vm)
780 {
781         return (list_empty_careful(&vm->userptr.repin_list) &&
782                 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
783 }
784
785 static struct dma_fence *
786 xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
787                struct xe_sync_entry *syncs, u32 num_syncs);
788
789 struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
790 {
791         struct dma_fence *fence = NULL;
792         struct xe_vma *vma, *next;
793
794         lockdep_assert_held(&vm->lock);
795         if (xe_vm_no_dma_fences(vm) && !rebind_worker)
796                 return NULL;
797
798         xe_vm_assert_held(vm);
799         list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) {
800                 XE_WARN_ON(!vma->gt_present);
801
802                 list_del_init(&vma->rebind_link);
803                 dma_fence_put(fence);
804                 if (rebind_worker)
805                         trace_xe_vma_rebind_worker(vma);
806                 else
807                         trace_xe_vma_rebind_exec(vma);
808                 fence = xe_vm_bind_vma(vma, NULL, NULL, 0);
809                 if (IS_ERR(fence))
810                         return fence;
811         }
812
813         return fence;
814 }
815
816 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
817                                     struct xe_bo *bo,
818                                     u64 bo_offset_or_userptr,
819                                     u64 start, u64 end,
820                                     bool read_only,
821                                     u64 gt_mask)
822 {
823         struct xe_vma *vma;
824         struct xe_gt *gt;
825         u8 id;
826
827         XE_BUG_ON(start >= end);
828         XE_BUG_ON(end >= vm->size);
829
830         vma = kzalloc(sizeof(*vma), GFP_KERNEL);
831         if (!vma) {
832                 vma = ERR_PTR(-ENOMEM);
833                 return vma;
834         }
835
836         INIT_LIST_HEAD(&vma->rebind_link);
837         INIT_LIST_HEAD(&vma->unbind_link);
838         INIT_LIST_HEAD(&vma->userptr_link);
839         INIT_LIST_HEAD(&vma->userptr.invalidate_link);
840         INIT_LIST_HEAD(&vma->notifier.rebind_link);
841         INIT_LIST_HEAD(&vma->extobj.link);
842
843         vma->vm = vm;
844         vma->start = start;
845         vma->end = end;
846         if (read_only)
847                 vma->pte_flags = XE_PTE_READ_ONLY;
848
849         if (gt_mask) {
850                 vma->gt_mask = gt_mask;
851         } else {
852                 for_each_gt(gt, vm->xe, id)
853                         if (!xe_gt_is_media_type(gt))
854                                 vma->gt_mask |= 0x1 << id;
855         }
856
857         if (vm->xe->info.platform == XE_PVC)
858                 vma->use_atomic_access_pte_bit = true;
859
860         if (bo) {
861                 xe_bo_assert_held(bo);
862                 vma->bo_offset = bo_offset_or_userptr;
863                 vma->bo = xe_bo_get(bo);
864                 list_add_tail(&vma->bo_link, &bo->vmas);
865         } else /* userptr */ {
866                 u64 size = end - start + 1;
867                 int err;
868
869                 vma->userptr.ptr = bo_offset_or_userptr;
870
871                 err = mmu_interval_notifier_insert(&vma->userptr.notifier,
872                                                    current->mm,
873                                                    vma->userptr.ptr, size,
874                                                    &vma_userptr_notifier_ops);
875                 if (err) {
876                         kfree(vma);
877                         vma = ERR_PTR(err);
878                         return vma;
879                 }
880
881                 vma->userptr.notifier_seq = LONG_MAX;
882                 xe_vm_get(vm);
883         }
884
885         return vma;
886 }
887
888 static bool vm_remove_extobj(struct xe_vma *vma)
889 {
890         if (!list_empty(&vma->extobj.link)) {
891                 vma->vm->extobj.entries--;
892                 list_del_init(&vma->extobj.link);
893                 return true;
894         }
895         return false;
896 }
897
898 static void xe_vma_destroy_late(struct xe_vma *vma)
899 {
900         struct xe_vm *vm = vma->vm;
901         struct xe_device *xe = vm->xe;
902         bool read_only = vma->pte_flags & XE_PTE_READ_ONLY;
903
904         if (xe_vma_is_userptr(vma)) {
905                 if (vma->userptr.sg) {
906                         dma_unmap_sgtable(xe->drm.dev,
907                                           vma->userptr.sg,
908                                           read_only ? DMA_TO_DEVICE :
909                                           DMA_BIDIRECTIONAL, 0);
910                         sg_free_table(vma->userptr.sg);
911                         vma->userptr.sg = NULL;
912                 }
913
914                 /*
915                  * Since userptr pages are not pinned, we can't remove
916                  * the notifer until we're sure the GPU is not accessing
917                  * them anymore
918                  */
919                 mmu_interval_notifier_remove(&vma->userptr.notifier);
920                 xe_vm_put(vm);
921         } else {
922                 xe_bo_put(vma->bo);
923         }
924
925         kfree(vma);
926 }
927
928 static void vma_destroy_work_func(struct work_struct *w)
929 {
930         struct xe_vma *vma =
931                 container_of(w, struct xe_vma, destroy_work);
932
933         xe_vma_destroy_late(vma);
934 }
935
936 static struct xe_vma *
937 bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
938                             struct xe_vma *ignore)
939 {
940         struct xe_vma *vma;
941
942         list_for_each_entry(vma, &bo->vmas, bo_link) {
943                 if (vma != ignore && vma->vm == vm && !vma->destroyed)
944                         return vma;
945         }
946
947         return NULL;
948 }
949
950 static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm,
951                                  struct xe_vma *ignore)
952 {
953         struct ww_acquire_ctx ww;
954         bool ret;
955
956         xe_bo_lock(bo, &ww, 0, false);
957         ret = !!bo_has_vm_references_locked(bo, vm, ignore);
958         xe_bo_unlock(bo, &ww);
959
960         return ret;
961 }
962
963 static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
964 {
965         list_add(&vma->extobj.link, &vm->extobj.list);
966         vm->extobj.entries++;
967 }
968
969 static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
970 {
971         struct xe_bo *bo = vma->bo;
972
973         lockdep_assert_held_write(&vm->lock);
974
975         if (bo_has_vm_references(bo, vm, vma))
976                 return;
977
978         __vm_insert_extobj(vm, vma);
979 }
980
981 static void vma_destroy_cb(struct dma_fence *fence,
982                            struct dma_fence_cb *cb)
983 {
984         struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
985
986         INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
987         queue_work(system_unbound_wq, &vma->destroy_work);
988 }
989
990 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
991 {
992         struct xe_vm *vm = vma->vm;
993
994         lockdep_assert_held_write(&vm->lock);
995         XE_BUG_ON(!list_empty(&vma->unbind_link));
996
997         if (xe_vma_is_userptr(vma)) {
998                 XE_WARN_ON(!vma->destroyed);
999                 spin_lock(&vm->userptr.invalidated_lock);
1000                 list_del_init(&vma->userptr.invalidate_link);
1001                 spin_unlock(&vm->userptr.invalidated_lock);
1002                 list_del(&vma->userptr_link);
1003         } else {
1004                 xe_bo_assert_held(vma->bo);
1005                 list_del(&vma->bo_link);
1006
1007                 spin_lock(&vm->notifier.list_lock);
1008                 list_del(&vma->notifier.rebind_link);
1009                 spin_unlock(&vm->notifier.list_lock);
1010
1011                 if (!vma->bo->vm && vm_remove_extobj(vma)) {
1012                         struct xe_vma *other;
1013
1014                         other = bo_has_vm_references_locked(vma->bo, vm, NULL);
1015
1016                         if (other)
1017                                 __vm_insert_extobj(vm, other);
1018                 }
1019         }
1020
1021         xe_vm_assert_held(vm);
1022         if (!list_empty(&vma->rebind_link))
1023                 list_del(&vma->rebind_link);
1024
1025         if (fence) {
1026                 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1027                                                  vma_destroy_cb);
1028
1029                 if (ret) {
1030                         XE_WARN_ON(ret != -ENOENT);
1031                         xe_vma_destroy_late(vma);
1032                 }
1033         } else {
1034                 xe_vma_destroy_late(vma);
1035         }
1036 }
1037
1038 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1039 {
1040         struct ttm_validate_buffer tv[2];
1041         struct ww_acquire_ctx ww;
1042         struct xe_bo *bo = vma->bo;
1043         LIST_HEAD(objs);
1044         LIST_HEAD(dups);
1045         int err;
1046
1047         memset(tv, 0, sizeof(tv));
1048         tv[0].bo = xe_vm_ttm_bo(vma->vm);
1049         list_add(&tv[0].head, &objs);
1050
1051         if (bo) {
1052                 tv[1].bo = &xe_bo_get(bo)->ttm;
1053                 list_add(&tv[1].head, &objs);
1054         }
1055         err = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
1056         XE_WARN_ON(err);
1057
1058         xe_vma_destroy(vma, NULL);
1059
1060         ttm_eu_backoff_reservation(&ww, &objs);
1061         if (bo)
1062                 xe_bo_put(bo);
1063 }
1064
1065 static struct xe_vma *to_xe_vma(const struct rb_node *node)
1066 {
1067         BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
1068         return (struct xe_vma *)node;
1069 }
1070
1071 static int xe_vma_cmp(const struct xe_vma *a, const struct xe_vma *b)
1072 {
1073         if (a->end < b->start) {
1074                 return -1;
1075         } else if (b->end < a->start) {
1076                 return 1;
1077         } else {
1078                 return 0;
1079         }
1080 }
1081
1082 static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b)
1083 {
1084         return xe_vma_cmp(to_xe_vma(a), to_xe_vma(b)) < 0;
1085 }
1086
1087 int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node)
1088 {
1089         struct xe_vma *cmp = to_xe_vma(node);
1090         const struct xe_vma *own = key;
1091
1092         if (own->start > cmp->end)
1093                 return 1;
1094
1095         if (own->end < cmp->start)
1096                 return -1;
1097
1098         return 0;
1099 }
1100
1101 struct xe_vma *
1102 xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma)
1103 {
1104         struct rb_node *node;
1105
1106         if (xe_vm_is_closed(vm))
1107                 return NULL;
1108
1109         XE_BUG_ON(vma->end >= vm->size);
1110         lockdep_assert_held(&vm->lock);
1111
1112         node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb);
1113
1114         return node ? to_xe_vma(node) : NULL;
1115 }
1116
1117 static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1118 {
1119         XE_BUG_ON(vma->vm != vm);
1120         lockdep_assert_held(&vm->lock);
1121
1122         rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb);
1123 }
1124
1125 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1126 {
1127         XE_BUG_ON(vma->vm != vm);
1128         lockdep_assert_held(&vm->lock);
1129
1130         rb_erase(&vma->vm_node, &vm->vmas);
1131         if (vm->usm.last_fault_vma == vma)
1132                 vm->usm.last_fault_vma = NULL;
1133 }
1134
1135 static void async_op_work_func(struct work_struct *w);
1136 static void vm_destroy_work_func(struct work_struct *w);
1137
1138 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1139 {
1140         struct xe_vm *vm;
1141         int err, i = 0, number_gts = 0;
1142         struct xe_gt *gt;
1143         u8 id;
1144
1145         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1146         if (!vm)
1147                 return ERR_PTR(-ENOMEM);
1148
1149         vm->xe = xe;
1150         kref_init(&vm->refcount);
1151         dma_resv_init(&vm->resv);
1152
1153         vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1);
1154
1155         vm->vmas = RB_ROOT;
1156         vm->flags = flags;
1157
1158         init_rwsem(&vm->lock);
1159
1160         INIT_LIST_HEAD(&vm->rebind_list);
1161
1162         INIT_LIST_HEAD(&vm->userptr.repin_list);
1163         INIT_LIST_HEAD(&vm->userptr.invalidated);
1164         init_rwsem(&vm->userptr.notifier_lock);
1165         spin_lock_init(&vm->userptr.invalidated_lock);
1166
1167         INIT_LIST_HEAD(&vm->notifier.rebind_list);
1168         spin_lock_init(&vm->notifier.list_lock);
1169
1170         INIT_LIST_HEAD(&vm->async_ops.pending);
1171         INIT_WORK(&vm->async_ops.work, async_op_work_func);
1172         spin_lock_init(&vm->async_ops.lock);
1173
1174         INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1175
1176         INIT_LIST_HEAD(&vm->preempt.engines);
1177         vm->preempt.min_run_period_ms = 10;     /* FIXME: Wire up to uAPI */
1178
1179         INIT_LIST_HEAD(&vm->extobj.list);
1180
1181         if (!(flags & XE_VM_FLAG_MIGRATION)) {
1182                 /* We need to immeditatelly exit from any D3 state */
1183                 xe_pm_runtime_get(xe);
1184                 xe_device_mem_access_get(xe);
1185         }
1186
1187         err = dma_resv_lock_interruptible(&vm->resv, NULL);
1188         if (err)
1189                 goto err_put;
1190
1191         if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1192                 vm->flags |= XE_VM_FLAGS_64K;
1193
1194         for_each_gt(gt, xe, id) {
1195                 if (xe_gt_is_media_type(gt))
1196                         continue;
1197
1198                 if (flags & XE_VM_FLAG_MIGRATION &&
1199                     gt->info.id != XE_VM_FLAG_GT_ID(flags))
1200                         continue;
1201
1202                 vm->pt_root[id] = xe_pt_create(vm, gt, xe->info.vm_max_level);
1203                 if (IS_ERR(vm->pt_root[id])) {
1204                         err = PTR_ERR(vm->pt_root[id]);
1205                         vm->pt_root[id] = NULL;
1206                         goto err_destroy_root;
1207                 }
1208         }
1209
1210         if (flags & XE_VM_FLAG_SCRATCH_PAGE) {
1211                 for_each_gt(gt, xe, id) {
1212                         if (!vm->pt_root[id])
1213                                 continue;
1214
1215                         err = xe_pt_create_scratch(xe, gt, vm);
1216                         if (err)
1217                                 goto err_scratch_pt;
1218                 }
1219         }
1220
1221         if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) {
1222                 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1223                 vm->flags |= XE_VM_FLAG_COMPUTE_MODE;
1224         }
1225
1226         if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) {
1227                 vm->async_ops.fence.context = dma_fence_context_alloc(1);
1228                 vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
1229         }
1230
1231         /* Fill pt_root after allocating scratch tables */
1232         for_each_gt(gt, xe, id) {
1233                 if (!vm->pt_root[id])
1234                         continue;
1235
1236                 xe_pt_populate_empty(gt, vm, vm->pt_root[id]);
1237         }
1238         dma_resv_unlock(&vm->resv);
1239
1240         /* Kernel migration VM shouldn't have a circular loop.. */
1241         if (!(flags & XE_VM_FLAG_MIGRATION)) {
1242                 for_each_gt(gt, xe, id) {
1243                         struct xe_vm *migrate_vm;
1244                         struct xe_engine *eng;
1245
1246                         if (!vm->pt_root[id])
1247                                 continue;
1248
1249                         migrate_vm = xe_migrate_get_vm(gt->migrate);
1250                         eng = xe_engine_create_class(xe, gt, migrate_vm,
1251                                                      XE_ENGINE_CLASS_COPY,
1252                                                      ENGINE_FLAG_VM);
1253                         xe_vm_put(migrate_vm);
1254                         if (IS_ERR(eng)) {
1255                                 xe_vm_close_and_put(vm);
1256                                 return ERR_CAST(eng);
1257                         }
1258                         vm->eng[id] = eng;
1259                         number_gts++;
1260                 }
1261         }
1262
1263         if (number_gts > 1)
1264                 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1265
1266         mutex_lock(&xe->usm.lock);
1267         if (flags & XE_VM_FLAG_FAULT_MODE)
1268                 xe->usm.num_vm_in_fault_mode++;
1269         else if (!(flags & XE_VM_FLAG_MIGRATION))
1270                 xe->usm.num_vm_in_non_fault_mode++;
1271         mutex_unlock(&xe->usm.lock);
1272
1273         trace_xe_vm_create(vm);
1274
1275         return vm;
1276
1277 err_scratch_pt:
1278         for_each_gt(gt, xe, id) {
1279                 if (!vm->pt_root[id])
1280                         continue;
1281
1282                 i = vm->pt_root[id]->level;
1283                 while (i)
1284                         if (vm->scratch_pt[id][--i])
1285                                 xe_pt_destroy(vm->scratch_pt[id][i],
1286                                               vm->flags, NULL);
1287                 xe_bo_unpin(vm->scratch_bo[id]);
1288                 xe_bo_put(vm->scratch_bo[id]);
1289         }
1290 err_destroy_root:
1291         for_each_gt(gt, xe, id) {
1292                 if (vm->pt_root[id])
1293                         xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1294         }
1295         dma_resv_unlock(&vm->resv);
1296 err_put:
1297         dma_resv_fini(&vm->resv);
1298         kfree(vm);
1299         if (!(flags & XE_VM_FLAG_MIGRATION)) {
1300                 xe_device_mem_access_put(xe);
1301                 xe_pm_runtime_put(xe);
1302         }
1303         return ERR_PTR(err);
1304 }
1305
1306 static void flush_async_ops(struct xe_vm *vm)
1307 {
1308         queue_work(system_unbound_wq, &vm->async_ops.work);
1309         flush_work(&vm->async_ops.work);
1310 }
1311
1312 static void vm_error_capture(struct xe_vm *vm, int err,
1313                              u32 op, u64 addr, u64 size)
1314 {
1315         struct drm_xe_vm_bind_op_error_capture capture;
1316         u64 __user *address =
1317                 u64_to_user_ptr(vm->async_ops.error_capture.addr);
1318         bool in_kthread = !current->mm;
1319
1320         capture.error = err;
1321         capture.op = op;
1322         capture.addr = addr;
1323         capture.size = size;
1324
1325         if (in_kthread) {
1326                 if (!mmget_not_zero(vm->async_ops.error_capture.mm))
1327                         goto mm_closed;
1328                 kthread_use_mm(vm->async_ops.error_capture.mm);
1329         }
1330
1331         if (copy_to_user(address, &capture, sizeof(capture)))
1332                 XE_WARN_ON("Copy to user failed");
1333
1334         if (in_kthread) {
1335                 kthread_unuse_mm(vm->async_ops.error_capture.mm);
1336                 mmput(vm->async_ops.error_capture.mm);
1337         }
1338
1339 mm_closed:
1340         wake_up_all(&vm->async_ops.error_capture.wq);
1341 }
1342
1343 void xe_vm_close_and_put(struct xe_vm *vm)
1344 {
1345         struct rb_root contested = RB_ROOT;
1346         struct ww_acquire_ctx ww;
1347         struct xe_device *xe = vm->xe;
1348         struct xe_gt *gt;
1349         u8 id;
1350
1351         XE_BUG_ON(vm->preempt.num_engines);
1352
1353         vm->size = 0;
1354         smp_mb();
1355         flush_async_ops(vm);
1356         if (xe_vm_in_compute_mode(vm))
1357                 flush_work(&vm->preempt.rebind_work);
1358
1359         for_each_gt(gt, xe, id) {
1360                 if (vm->eng[id]) {
1361                         xe_engine_kill(vm->eng[id]);
1362                         xe_engine_put(vm->eng[id]);
1363                         vm->eng[id] = NULL;
1364                 }
1365         }
1366
1367         down_write(&vm->lock);
1368         xe_vm_lock(vm, &ww, 0, false);
1369         while (vm->vmas.rb_node) {
1370                 struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node);
1371
1372                 if (xe_vma_is_userptr(vma)) {
1373                         down_read(&vm->userptr.notifier_lock);
1374                         vma->destroyed = true;
1375                         up_read(&vm->userptr.notifier_lock);
1376                 }
1377
1378                 rb_erase(&vma->vm_node, &vm->vmas);
1379
1380                 /* easy case, remove from VMA? */
1381                 if (xe_vma_is_userptr(vma) || vma->bo->vm) {
1382                         xe_vma_destroy(vma, NULL);
1383                         continue;
1384                 }
1385
1386                 rb_add(&vma->vm_node, &contested, xe_vma_less_cb);
1387         }
1388
1389         /*
1390          * All vm operations will add shared fences to resv.
1391          * The only exception is eviction for a shared object,
1392          * but even so, the unbind when evicted would still
1393          * install a fence to resv. Hence it's safe to
1394          * destroy the pagetables immediately.
1395          */
1396         for_each_gt(gt, xe, id) {
1397                 if (vm->scratch_bo[id]) {
1398                         u32 i;
1399
1400                         xe_bo_unpin(vm->scratch_bo[id]);
1401                         xe_bo_put(vm->scratch_bo[id]);
1402                         for (i = 0; i < vm->pt_root[id]->level; i++)
1403                                 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags,
1404                                               NULL);
1405                 }
1406         }
1407         xe_vm_unlock(vm, &ww);
1408
1409         if (contested.rb_node) {
1410
1411                 /*
1412                  * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1413                  * Since we hold a refcount to the bo, we can remove and free
1414                  * the members safely without locking.
1415                  */
1416                 while (contested.rb_node) {
1417                         struct xe_vma *vma = to_xe_vma(contested.rb_node);
1418
1419                         rb_erase(&vma->vm_node, &contested);
1420                         xe_vma_destroy_unlocked(vma);
1421                 }
1422         }
1423
1424         if (vm->async_ops.error_capture.addr)
1425                 wake_up_all(&vm->async_ops.error_capture.wq);
1426
1427         XE_WARN_ON(!list_empty(&vm->extobj.list));
1428         up_write(&vm->lock);
1429
1430         mutex_lock(&xe->usm.lock);
1431         if (vm->flags & XE_VM_FLAG_FAULT_MODE)
1432                 xe->usm.num_vm_in_fault_mode--;
1433         else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
1434                 xe->usm.num_vm_in_non_fault_mode--;
1435         mutex_unlock(&xe->usm.lock);
1436
1437         xe_vm_put(vm);
1438 }
1439
1440 static void vm_destroy_work_func(struct work_struct *w)
1441 {
1442         struct xe_vm *vm =
1443                 container_of(w, struct xe_vm, destroy_work);
1444         struct ww_acquire_ctx ww;
1445         struct xe_device *xe = vm->xe;
1446         struct xe_gt *gt;
1447         u8 id;
1448         void *lookup;
1449
1450         /* xe_vm_close_and_put was not called? */
1451         XE_WARN_ON(vm->size);
1452
1453         if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
1454                 xe_device_mem_access_put(xe);
1455                 xe_pm_runtime_put(xe);
1456
1457                 if (xe->info.has_asid) {
1458                         mutex_lock(&xe->usm.lock);
1459                         lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1460                         XE_WARN_ON(lookup != vm);
1461                         mutex_unlock(&xe->usm.lock);
1462                 }
1463         }
1464
1465         /*
1466          * XXX: We delay destroying the PT root until the VM if freed as PT root
1467          * is needed for xe_vm_lock to work. If we remove that dependency this
1468          * can be moved to xe_vm_close_and_put.
1469          */
1470         xe_vm_lock(vm, &ww, 0, false);
1471         for_each_gt(gt, xe, id) {
1472                 if (vm->pt_root[id]) {
1473                         xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1474                         vm->pt_root[id] = NULL;
1475                 }
1476         }
1477         xe_vm_unlock(vm, &ww);
1478
1479         trace_xe_vm_free(vm);
1480         dma_fence_put(vm->rebind_fence);
1481         dma_resv_fini(&vm->resv);
1482         kfree(vm);
1483 }
1484
1485 void xe_vm_free(struct kref *ref)
1486 {
1487         struct xe_vm *vm = container_of(ref, struct xe_vm, refcount);
1488
1489         /* To destroy the VM we need to be able to sleep */
1490         queue_work(system_unbound_wq, &vm->destroy_work);
1491 }
1492
1493 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1494 {
1495         struct xe_vm *vm;
1496
1497         mutex_lock(&xef->vm.lock);
1498         vm = xa_load(&xef->vm.xa, id);
1499         mutex_unlock(&xef->vm.lock);
1500
1501         if (vm)
1502                 xe_vm_get(vm);
1503
1504         return vm;
1505 }
1506
1507 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt)
1508 {
1509         XE_BUG_ON(xe_gt_is_media_type(full_gt));
1510
1511         return gen8_pde_encode(vm->pt_root[full_gt->info.id]->bo, 0,
1512                                XE_CACHE_WB);
1513 }
1514
1515 static struct dma_fence *
1516 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
1517                  struct xe_sync_entry *syncs, u32 num_syncs)
1518 {
1519         struct xe_gt *gt;
1520         struct dma_fence *fence = NULL;
1521         struct dma_fence **fences = NULL;
1522         struct dma_fence_array *cf = NULL;
1523         struct xe_vm *vm = vma->vm;
1524         int cur_fence = 0, i;
1525         int number_gts = hweight_long(vma->gt_present);
1526         int err;
1527         u8 id;
1528
1529         trace_xe_vma_unbind(vma);
1530
1531         if (number_gts > 1) {
1532                 fences = kmalloc_array(number_gts, sizeof(*fences),
1533                                        GFP_KERNEL);
1534                 if (!fences)
1535                         return ERR_PTR(-ENOMEM);
1536         }
1537
1538         for_each_gt(gt, vm->xe, id) {
1539                 if (!(vma->gt_present & BIT(id)))
1540                         goto next;
1541
1542                 XE_BUG_ON(xe_gt_is_media_type(gt));
1543
1544                 fence = __xe_pt_unbind_vma(gt, vma, e, syncs, num_syncs);
1545                 if (IS_ERR(fence)) {
1546                         err = PTR_ERR(fence);
1547                         goto err_fences;
1548                 }
1549
1550                 if (fences)
1551                         fences[cur_fence++] = fence;
1552
1553 next:
1554                 if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
1555                         e = list_next_entry(e, multi_gt_list);
1556         }
1557
1558         if (fences) {
1559                 cf = dma_fence_array_create(number_gts, fences,
1560                                             vm->composite_fence_ctx,
1561                                             vm->composite_fence_seqno++,
1562                                             false);
1563                 if (!cf) {
1564                         --vm->composite_fence_seqno;
1565                         err = -ENOMEM;
1566                         goto err_fences;
1567                 }
1568         }
1569
1570         for (i = 0; i < num_syncs; i++)
1571                 xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
1572
1573         return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
1574
1575 err_fences:
1576         if (fences) {
1577                 while (cur_fence) {
1578                         /* FIXME: Rewind the previous binds? */
1579                         dma_fence_put(fences[--cur_fence]);
1580                 }
1581                 kfree(fences);
1582         }
1583
1584         return ERR_PTR(err);
1585 }
1586
1587 static struct dma_fence *
1588 xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
1589                struct xe_sync_entry *syncs, u32 num_syncs)
1590 {
1591         struct xe_gt *gt;
1592         struct dma_fence *fence;
1593         struct dma_fence **fences = NULL;
1594         struct dma_fence_array *cf = NULL;
1595         struct xe_vm *vm = vma->vm;
1596         int cur_fence = 0, i;
1597         int number_gts = hweight_long(vma->gt_mask);
1598         int err;
1599         u8 id;
1600
1601         trace_xe_vma_bind(vma);
1602
1603         if (number_gts > 1) {
1604                 fences = kmalloc_array(number_gts, sizeof(*fences),
1605                                        GFP_KERNEL);
1606                 if (!fences)
1607                         return ERR_PTR(-ENOMEM);
1608         }
1609
1610         for_each_gt(gt, vm->xe, id) {
1611                 if (!(vma->gt_mask & BIT(id)))
1612                         goto next;
1613
1614                 XE_BUG_ON(xe_gt_is_media_type(gt));
1615                 fence = __xe_pt_bind_vma(gt, vma, e, syncs, num_syncs,
1616                                          vma->gt_present & BIT(id));
1617                 if (IS_ERR(fence)) {
1618                         err = PTR_ERR(fence);
1619                         goto err_fences;
1620                 }
1621
1622                 if (fences)
1623                         fences[cur_fence++] = fence;
1624
1625 next:
1626                 if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
1627                         e = list_next_entry(e, multi_gt_list);
1628         }
1629
1630         if (fences) {
1631                 cf = dma_fence_array_create(number_gts, fences,
1632                                             vm->composite_fence_ctx,
1633                                             vm->composite_fence_seqno++,
1634                                             false);
1635                 if (!cf) {
1636                         --vm->composite_fence_seqno;
1637                         err = -ENOMEM;
1638                         goto err_fences;
1639                 }
1640         }
1641
1642         for (i = 0; i < num_syncs; i++)
1643                 xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
1644
1645         return cf ? &cf->base : fence;
1646
1647 err_fences:
1648         if (fences) {
1649                 while (cur_fence) {
1650                         /* FIXME: Rewind the previous binds? */
1651                         dma_fence_put(fences[--cur_fence]);
1652                 }
1653                 kfree(fences);
1654         }
1655
1656         return ERR_PTR(err);
1657 }
1658
1659 struct async_op_fence {
1660         struct dma_fence fence;
1661         struct dma_fence *wait_fence;
1662         struct dma_fence_cb cb;
1663         struct xe_vm *vm;
1664         wait_queue_head_t wq;
1665         bool started;
1666 };
1667
1668 static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence)
1669 {
1670         return "xe";
1671 }
1672
1673 static const char *
1674 async_op_fence_get_timeline_name(struct dma_fence *dma_fence)
1675 {
1676         return "async_op_fence";
1677 }
1678
1679 static const struct dma_fence_ops async_op_fence_ops = {
1680         .get_driver_name = async_op_fence_get_driver_name,
1681         .get_timeline_name = async_op_fence_get_timeline_name,
1682 };
1683
1684 static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
1685 {
1686         struct async_op_fence *afence =
1687                 container_of(cb, struct async_op_fence, cb);
1688
1689         afence->fence.error = afence->wait_fence->error;
1690         dma_fence_signal(&afence->fence);
1691         xe_vm_put(afence->vm);
1692         dma_fence_put(afence->wait_fence);
1693         dma_fence_put(&afence->fence);
1694 }
1695
1696 static void add_async_op_fence_cb(struct xe_vm *vm,
1697                                   struct dma_fence *fence,
1698                                   struct async_op_fence *afence)
1699 {
1700         int ret;
1701
1702         if (!xe_vm_no_dma_fences(vm)) {
1703                 afence->started = true;
1704                 smp_wmb();
1705                 wake_up_all(&afence->wq);
1706         }
1707
1708         afence->wait_fence = dma_fence_get(fence);
1709         afence->vm = xe_vm_get(vm);
1710         dma_fence_get(&afence->fence);
1711         ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb);
1712         if (ret == -ENOENT) {
1713                 afence->fence.error = afence->wait_fence->error;
1714                 dma_fence_signal(&afence->fence);
1715         }
1716         if (ret) {
1717                 xe_vm_put(vm);
1718                 dma_fence_put(afence->wait_fence);
1719                 dma_fence_put(&afence->fence);
1720         }
1721         XE_WARN_ON(ret && ret != -ENOENT);
1722 }
1723
1724 int xe_vm_async_fence_wait_start(struct dma_fence *fence)
1725 {
1726         if (fence->ops == &async_op_fence_ops) {
1727                 struct async_op_fence *afence =
1728                         container_of(fence, struct async_op_fence, fence);
1729
1730                 XE_BUG_ON(xe_vm_no_dma_fences(afence->vm));
1731
1732                 smp_rmb();
1733                 return wait_event_interruptible(afence->wq, afence->started);
1734         }
1735
1736         return 0;
1737 }
1738
1739 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
1740                         struct xe_engine *e, struct xe_sync_entry *syncs,
1741                         u32 num_syncs, struct async_op_fence *afence)
1742 {
1743         struct dma_fence *fence;
1744
1745         xe_vm_assert_held(vm);
1746
1747         fence = xe_vm_bind_vma(vma, e, syncs, num_syncs);
1748         if (IS_ERR(fence))
1749                 return PTR_ERR(fence);
1750         if (afence)
1751                 add_async_op_fence_cb(vm, fence, afence);
1752
1753         dma_fence_put(fence);
1754         return 0;
1755 }
1756
1757 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e,
1758                       struct xe_bo *bo, struct xe_sync_entry *syncs,
1759                       u32 num_syncs, struct async_op_fence *afence)
1760 {
1761         int err;
1762
1763         xe_vm_assert_held(vm);
1764         xe_bo_assert_held(bo);
1765
1766         if (bo) {
1767                 err = xe_bo_validate(bo, vm, true);
1768                 if (err)
1769                         return err;
1770         }
1771
1772         return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence);
1773 }
1774
1775 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
1776                         struct xe_engine *e, struct xe_sync_entry *syncs,
1777                         u32 num_syncs, struct async_op_fence *afence)
1778 {
1779         struct dma_fence *fence;
1780
1781         xe_vm_assert_held(vm);
1782         xe_bo_assert_held(vma->bo);
1783
1784         fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs);
1785         if (IS_ERR(fence))
1786                 return PTR_ERR(fence);
1787         if (afence)
1788                 add_async_op_fence_cb(vm, fence, afence);
1789
1790         xe_vma_destroy(vma, fence);
1791         dma_fence_put(fence);
1792
1793         return 0;
1794 }
1795
1796 static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm,
1797                                         u64 value)
1798 {
1799         if (XE_IOCTL_ERR(xe, !value))
1800                 return -EINVAL;
1801
1802         if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
1803                 return -ENOTSUPP;
1804
1805         if (XE_IOCTL_ERR(xe, vm->async_ops.error_capture.addr))
1806                 return -ENOTSUPP;
1807
1808         vm->async_ops.error_capture.mm = current->mm;
1809         vm->async_ops.error_capture.addr = value;
1810         init_waitqueue_head(&vm->async_ops.error_capture.wq);
1811
1812         return 0;
1813 }
1814
1815 typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm,
1816                                      u64 value);
1817
1818 static const xe_vm_set_property_fn vm_set_property_funcs[] = {
1819         [XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] =
1820                 vm_set_error_capture_address,
1821 };
1822
1823 static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
1824                                     u64 extension)
1825 {
1826         u64 __user *address = u64_to_user_ptr(extension);
1827         struct drm_xe_ext_vm_set_property ext;
1828         int err;
1829
1830         err = __copy_from_user(&ext, address, sizeof(ext));
1831         if (XE_IOCTL_ERR(xe, err))
1832                 return -EFAULT;
1833
1834         if (XE_IOCTL_ERR(xe, ext.property >=
1835                          ARRAY_SIZE(vm_set_property_funcs)))
1836                 return -EINVAL;
1837
1838         return vm_set_property_funcs[ext.property](xe, vm, ext.value);
1839 }
1840
1841 typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm,
1842                                        u64 extension);
1843
1844 static const xe_vm_set_property_fn vm_user_extension_funcs[] = {
1845         [XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property,
1846 };
1847
1848 #define MAX_USER_EXTENSIONS     16
1849 static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm,
1850                               u64 extensions, int ext_number)
1851 {
1852         u64 __user *address = u64_to_user_ptr(extensions);
1853         struct xe_user_extension ext;
1854         int err;
1855
1856         if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS))
1857                 return -E2BIG;
1858
1859         err = __copy_from_user(&ext, address, sizeof(ext));
1860         if (XE_IOCTL_ERR(xe, err))
1861                 return -EFAULT;
1862
1863         if (XE_IOCTL_ERR(xe, ext.name >=
1864                          ARRAY_SIZE(vm_user_extension_funcs)))
1865                 return -EINVAL;
1866
1867         err = vm_user_extension_funcs[ext.name](xe, vm, extensions);
1868         if (XE_IOCTL_ERR(xe, err))
1869                 return err;
1870
1871         if (ext.next_extension)
1872                 return vm_user_extensions(xe, vm, ext.next_extension,
1873                                           ++ext_number);
1874
1875         return 0;
1876 }
1877
1878 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \
1879                                     DRM_XE_VM_CREATE_COMPUTE_MODE | \
1880                                     DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \
1881                                     DRM_XE_VM_CREATE_FAULT_MODE)
1882
1883 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1884                        struct drm_file *file)
1885 {
1886         struct xe_device *xe = to_xe_device(dev);
1887         struct xe_file *xef = to_xe_file(file);
1888         struct drm_xe_vm_create *args = data;
1889         struct xe_vm *vm;
1890         u32 id, asid;
1891         int err;
1892         u32 flags = 0;
1893
1894         if (XE_IOCTL_ERR(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1895                 return -EINVAL;
1896
1897         if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE &&
1898                          args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
1899                 return -EINVAL;
1900
1901         if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE &&
1902                          args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
1903                 return -EINVAL;
1904
1905         if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
1906                          xe_device_in_non_fault_mode(xe)))
1907                 return -EINVAL;
1908
1909         if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) &&
1910                          xe_device_in_fault_mode(xe)))
1911                 return -EINVAL;
1912
1913         if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
1914                          !xe->info.supports_usm))
1915                 return -EINVAL;
1916
1917         if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE)
1918                 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1919         if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE)
1920                 flags |= XE_VM_FLAG_COMPUTE_MODE;
1921         if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS)
1922                 flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
1923         if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE)
1924                 flags |= XE_VM_FLAG_FAULT_MODE;
1925
1926         vm = xe_vm_create(xe, flags);
1927         if (IS_ERR(vm))
1928                 return PTR_ERR(vm);
1929
1930         if (args->extensions) {
1931                 err = vm_user_extensions(xe, vm, args->extensions, 0);
1932                 if (XE_IOCTL_ERR(xe, err)) {
1933                         xe_vm_close_and_put(vm);
1934                         return err;
1935                 }
1936         }
1937
1938         mutex_lock(&xef->vm.lock);
1939         err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1940         mutex_unlock(&xef->vm.lock);
1941         if (err) {
1942                 xe_vm_close_and_put(vm);
1943                 return err;
1944         }
1945
1946         if (xe->info.has_asid) {
1947                 mutex_lock(&xe->usm.lock);
1948                 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1949                                       XA_LIMIT(0, XE_MAX_ASID - 1),
1950                                       &xe->usm.next_asid, GFP_KERNEL);
1951                 mutex_unlock(&xe->usm.lock);
1952                 if (err) {
1953                         xe_vm_close_and_put(vm);
1954                         return err;
1955                 }
1956                 vm->usm.asid = asid;
1957         }
1958
1959         args->vm_id = id;
1960
1961 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1962         /* Warning: Security issue - never enable by default */
1963         args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1964 #endif
1965
1966         return 0;
1967 }
1968
1969 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1970                         struct drm_file *file)
1971 {
1972         struct xe_device *xe = to_xe_device(dev);
1973         struct xe_file *xef = to_xe_file(file);
1974         struct drm_xe_vm_destroy *args = data;
1975         struct xe_vm *vm;
1976
1977         if (XE_IOCTL_ERR(xe, args->pad))
1978                 return -EINVAL;
1979
1980         vm = xe_vm_lookup(xef, args->vm_id);
1981         if (XE_IOCTL_ERR(xe, !vm))
1982                 return -ENOENT;
1983         xe_vm_put(vm);
1984
1985         /* FIXME: Extend this check to non-compute mode VMs */
1986         if (XE_IOCTL_ERR(xe, vm->preempt.num_engines))
1987                 return -EBUSY;
1988
1989         mutex_lock(&xef->vm.lock);
1990         xa_erase(&xef->vm.xa, args->vm_id);
1991         mutex_unlock(&xef->vm.lock);
1992
1993         xe_vm_close_and_put(vm);
1994
1995         return 0;
1996 }
1997
1998 static const u32 region_to_mem_type[] = {
1999         XE_PL_TT,
2000         XE_PL_VRAM0,
2001         XE_PL_VRAM1,
2002 };
2003
2004 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
2005                           struct xe_engine *e, u32 region,
2006                           struct xe_sync_entry *syncs, u32 num_syncs,
2007                           struct async_op_fence *afence)
2008 {
2009         int err;
2010
2011         XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type));
2012
2013         if (!xe_vma_is_userptr(vma)) {
2014                 err = xe_bo_migrate(vma->bo, region_to_mem_type[region]);
2015                 if (err)
2016                         return err;
2017         }
2018
2019         if (vma->gt_mask != (vma->gt_present & ~vma->usm.gt_invalidated)) {
2020                 return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs,
2021                                   afence);
2022         } else {
2023                 int i;
2024
2025                 /* Nothing to do, signal fences now */
2026                 for (i = 0; i < num_syncs; i++)
2027                         xe_sync_entry_signal(&syncs[i], NULL,
2028                                              dma_fence_get_stub());
2029                 if (afence)
2030                         dma_fence_signal(&afence->fence);
2031                 return 0;
2032         }
2033 }
2034
2035 #define VM_BIND_OP(op)  (op & 0xffff)
2036
2037 static int __vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
2038                            struct xe_engine *e, struct xe_bo *bo, u32 op,
2039                            u32 region, struct xe_sync_entry *syncs,
2040                            u32 num_syncs, struct async_op_fence *afence)
2041 {
2042         switch (VM_BIND_OP(op)) {
2043         case XE_VM_BIND_OP_MAP:
2044                 return xe_vm_bind(vm, vma, e, bo, syncs, num_syncs, afence);
2045         case XE_VM_BIND_OP_UNMAP:
2046         case XE_VM_BIND_OP_UNMAP_ALL:
2047                 return xe_vm_unbind(vm, vma, e, syncs, num_syncs, afence);
2048         case XE_VM_BIND_OP_MAP_USERPTR:
2049                 return xe_vm_bind(vm, vma, e, NULL, syncs, num_syncs, afence);
2050         case XE_VM_BIND_OP_PREFETCH:
2051                 return xe_vm_prefetch(vm, vma, e, region, syncs, num_syncs,
2052                                       afence);
2053                 break;
2054         default:
2055                 XE_BUG_ON("NOT POSSIBLE");
2056                 return -EINVAL;
2057         }
2058 }
2059
2060 struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm)
2061 {
2062         int idx = vm->flags & XE_VM_FLAG_MIGRATION ?
2063                 XE_VM_FLAG_GT_ID(vm->flags) : 0;
2064
2065         /* Safe to use index 0 as all BO in the VM share a single dma-resv lock */
2066         return &vm->pt_root[idx]->bo->ttm;
2067 }
2068
2069 static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv)
2070 {
2071         tv->num_shared = 1;
2072         tv->bo = xe_vm_ttm_bo(vm);
2073 }
2074
2075 static bool is_map_op(u32 op)
2076 {
2077         return VM_BIND_OP(op) == XE_VM_BIND_OP_MAP ||
2078                 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR;
2079 }
2080
2081 static bool is_unmap_op(u32 op)
2082 {
2083         return VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP ||
2084                 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL;
2085 }
2086
2087 static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
2088                          struct xe_engine *e, struct xe_bo *bo,
2089                          struct drm_xe_vm_bind_op *bind_op,
2090                          struct xe_sync_entry *syncs, u32 num_syncs,
2091                          struct async_op_fence *afence)
2092 {
2093         LIST_HEAD(objs);
2094         LIST_HEAD(dups);
2095         struct ttm_validate_buffer tv_bo, tv_vm;
2096         struct ww_acquire_ctx ww;
2097         struct xe_bo *vbo;
2098         int err, i;
2099
2100         lockdep_assert_held(&vm->lock);
2101         XE_BUG_ON(!list_empty(&vma->unbind_link));
2102
2103         /* Binds deferred to faults, signal fences now */
2104         if (xe_vm_in_fault_mode(vm) && is_map_op(bind_op->op) &&
2105             !(bind_op->op & XE_VM_BIND_FLAG_IMMEDIATE)) {
2106                 for (i = 0; i < num_syncs; i++)
2107                         xe_sync_entry_signal(&syncs[i], NULL,
2108                                              dma_fence_get_stub());
2109                 if (afence)
2110                         dma_fence_signal(&afence->fence);
2111                 return 0;
2112         }
2113
2114         xe_vm_tv_populate(vm, &tv_vm);
2115         list_add_tail(&tv_vm.head, &objs);
2116         vbo = vma->bo;
2117         if (vbo) {
2118                 /*
2119                  * An unbind can drop the last reference to the BO and
2120                  * the BO is needed for ttm_eu_backoff_reservation so
2121                  * take a reference here.
2122                  */
2123                 xe_bo_get(vbo);
2124
2125                 tv_bo.bo = &vbo->ttm;
2126                 tv_bo.num_shared = 1;
2127                 list_add(&tv_bo.head, &objs);
2128         }
2129
2130 again:
2131         err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups);
2132         if (!err) {
2133                 err = __vm_bind_ioctl(vm, vma, e, bo,
2134                                       bind_op->op, bind_op->region, syncs,
2135                                       num_syncs, afence);
2136                 ttm_eu_backoff_reservation(&ww, &objs);
2137                 if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
2138                         lockdep_assert_held_write(&vm->lock);
2139                         err = xe_vma_userptr_pin_pages(vma);
2140                         if (!err)
2141                                 goto again;
2142                 }
2143         }
2144         xe_bo_put(vbo);
2145
2146         return err;
2147 }
2148
2149 struct async_op {
2150         struct xe_vma *vma;
2151         struct xe_engine *engine;
2152         struct xe_bo *bo;
2153         struct drm_xe_vm_bind_op bind_op;
2154         struct xe_sync_entry *syncs;
2155         u32 num_syncs;
2156         struct list_head link;
2157         struct async_op_fence *fence;
2158 };
2159
2160 static void async_op_cleanup(struct xe_vm *vm, struct async_op *op)
2161 {
2162         while (op->num_syncs--)
2163                 xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
2164         kfree(op->syncs);
2165         xe_bo_put(op->bo);
2166         if (op->engine)
2167                 xe_engine_put(op->engine);
2168         xe_vm_put(vm);
2169         if (op->fence)
2170                 dma_fence_put(&op->fence->fence);
2171         kfree(op);
2172 }
2173
2174 static struct async_op *next_async_op(struct xe_vm *vm)
2175 {
2176         return list_first_entry_or_null(&vm->async_ops.pending,
2177                                         struct async_op, link);
2178 }
2179
2180 static void vm_set_async_error(struct xe_vm *vm, int err)
2181 {
2182         lockdep_assert_held(&vm->lock);
2183         vm->async_ops.error = err;
2184 }
2185
2186 static void async_op_work_func(struct work_struct *w)
2187 {
2188         struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work);
2189
2190         for (;;) {
2191                 struct async_op *op;
2192                 int err;
2193
2194                 if (vm->async_ops.error && !xe_vm_is_closed(vm))
2195                         break;
2196
2197                 spin_lock_irq(&vm->async_ops.lock);
2198                 op = next_async_op(vm);
2199                 if (op)
2200                         list_del_init(&op->link);
2201                 spin_unlock_irq(&vm->async_ops.lock);
2202
2203                 if (!op)
2204                         break;
2205
2206                 if (!xe_vm_is_closed(vm)) {
2207                         bool first, last;
2208
2209                         down_write(&vm->lock);
2210 again:
2211                         first = op->vma->first_munmap_rebind;
2212                         last = op->vma->last_munmap_rebind;
2213 #ifdef TEST_VM_ASYNC_OPS_ERROR
2214 #define FORCE_ASYNC_OP_ERROR    BIT(31)
2215                         if (!(op->bind_op.op & FORCE_ASYNC_OP_ERROR)) {
2216                                 err = vm_bind_ioctl(vm, op->vma, op->engine,
2217                                                     op->bo, &op->bind_op,
2218                                                     op->syncs, op->num_syncs,
2219                                                     op->fence);
2220                         } else {
2221                                 err = -ENOMEM;
2222                                 op->bind_op.op &= ~FORCE_ASYNC_OP_ERROR;
2223                         }
2224 #else
2225                         err = vm_bind_ioctl(vm, op->vma, op->engine, op->bo,
2226                                             &op->bind_op, op->syncs,
2227                                             op->num_syncs, op->fence);
2228 #endif
2229                         /*
2230                          * In order for the fencing to work (stall behind
2231                          * existing jobs / prevent new jobs from running) all
2232                          * the dma-resv slots need to be programmed in a batch
2233                          * relative to execs / the rebind worker. The vm->lock
2234                          * ensure this.
2235                          */
2236                         if (!err && ((first && VM_BIND_OP(op->bind_op.op) ==
2237                                       XE_VM_BIND_OP_UNMAP) ||
2238                                      vm->async_ops.munmap_rebind_inflight)) {
2239                                 if (last) {
2240                                         op->vma->last_munmap_rebind = false;
2241                                         vm->async_ops.munmap_rebind_inflight =
2242                                                 false;
2243                                 } else {
2244                                         vm->async_ops.munmap_rebind_inflight =
2245                                                 true;
2246
2247                                         async_op_cleanup(vm, op);
2248
2249                                         spin_lock_irq(&vm->async_ops.lock);
2250                                         op = next_async_op(vm);
2251                                         XE_BUG_ON(!op);
2252                                         list_del_init(&op->link);
2253                                         spin_unlock_irq(&vm->async_ops.lock);
2254
2255                                         goto again;
2256                                 }
2257                         }
2258                         if (err) {
2259                                 trace_xe_vma_fail(op->vma);
2260                                 drm_warn(&vm->xe->drm, "Async VM op(%d) failed with %d",
2261                                          VM_BIND_OP(op->bind_op.op),
2262                                          err);
2263
2264                                 spin_lock_irq(&vm->async_ops.lock);
2265                                 list_add(&op->link, &vm->async_ops.pending);
2266                                 spin_unlock_irq(&vm->async_ops.lock);
2267
2268                                 vm_set_async_error(vm, err);
2269                                 up_write(&vm->lock);
2270
2271                                 if (vm->async_ops.error_capture.addr)
2272                                         vm_error_capture(vm, err,
2273                                                          op->bind_op.op,
2274                                                          op->bind_op.addr,
2275                                                          op->bind_op.range);
2276                                 break;
2277                         }
2278                         up_write(&vm->lock);
2279                 } else {
2280                         trace_xe_vma_flush(op->vma);
2281
2282                         if (is_unmap_op(op->bind_op.op)) {
2283                                 down_write(&vm->lock);
2284                                 xe_vma_destroy_unlocked(op->vma);
2285                                 up_write(&vm->lock);
2286                         }
2287
2288                         if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
2289                                                    &op->fence->fence.flags)) {
2290                                 if (!xe_vm_no_dma_fences(vm)) {
2291                                         op->fence->started = true;
2292                                         smp_wmb();
2293                                         wake_up_all(&op->fence->wq);
2294                                 }
2295                                 dma_fence_signal(&op->fence->fence);
2296                         }
2297                 }
2298
2299                 async_op_cleanup(vm, op);
2300         }
2301 }
2302
2303 static int __vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
2304                                  struct xe_engine *e, struct xe_bo *bo,
2305                                  struct drm_xe_vm_bind_op *bind_op,
2306                                  struct xe_sync_entry *syncs, u32 num_syncs)
2307 {
2308         struct async_op *op;
2309         bool installed = false;
2310         u64 seqno;
2311         int i;
2312
2313         lockdep_assert_held(&vm->lock);
2314
2315         op = kmalloc(sizeof(*op), GFP_KERNEL);
2316         if (!op) {
2317                 return -ENOMEM;
2318         }
2319
2320         if (num_syncs) {
2321                 op->fence = kmalloc(sizeof(*op->fence), GFP_KERNEL);
2322                 if (!op->fence) {
2323                         kfree(op);
2324                         return -ENOMEM;
2325                 }
2326
2327                 seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno;
2328                 dma_fence_init(&op->fence->fence, &async_op_fence_ops,
2329                                &vm->async_ops.lock, e ? e->bind.fence_ctx :
2330                                vm->async_ops.fence.context, seqno);
2331
2332                 if (!xe_vm_no_dma_fences(vm)) {
2333                         op->fence->vm = vm;
2334                         op->fence->started = false;
2335                         init_waitqueue_head(&op->fence->wq);
2336                 }
2337         } else {
2338                 op->fence = NULL;
2339         }
2340         op->vma = vma;
2341         op->engine = e;
2342         op->bo = bo;
2343         op->bind_op = *bind_op;
2344         op->syncs = syncs;
2345         op->num_syncs = num_syncs;
2346         INIT_LIST_HEAD(&op->link);
2347
2348         for (i = 0; i < num_syncs; i++)
2349                 installed |= xe_sync_entry_signal(&syncs[i], NULL,
2350                                                   &op->fence->fence);
2351
2352         if (!installed && op->fence)
2353                 dma_fence_signal(&op->fence->fence);
2354
2355         spin_lock_irq(&vm->async_ops.lock);
2356         list_add_tail(&op->link, &vm->async_ops.pending);
2357         spin_unlock_irq(&vm->async_ops.lock);
2358
2359         if (!vm->async_ops.error)
2360                 queue_work(system_unbound_wq, &vm->async_ops.work);
2361
2362         return 0;
2363 }
2364
2365 static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
2366                                struct xe_engine *e, struct xe_bo *bo,
2367                                struct drm_xe_vm_bind_op *bind_op,
2368                                struct xe_sync_entry *syncs, u32 num_syncs)
2369 {
2370         struct xe_vma *__vma, *next;
2371         struct list_head rebind_list;
2372         struct xe_sync_entry *in_syncs = NULL, *out_syncs = NULL;
2373         u32 num_in_syncs = 0, num_out_syncs = 0;
2374         bool first = true, last;
2375         int err;
2376         int i;
2377
2378         lockdep_assert_held(&vm->lock);
2379
2380         /* Not a linked list of unbinds + rebinds, easy */
2381         if (list_empty(&vma->unbind_link))
2382                 return __vm_bind_ioctl_async(vm, vma, e, bo, bind_op,
2383                                              syncs, num_syncs);
2384
2385         /*
2386          * Linked list of unbinds + rebinds, decompose syncs into 'in / out'
2387          * passing the 'in' to the first operation and 'out' to the last. Also
2388          * the reference counting is a little tricky, increment the VM / bind
2389          * engine ref count on all but the last operation and increment the BOs
2390          * ref count on each rebind.
2391          */
2392
2393         XE_BUG_ON(VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP &&
2394                   VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP_ALL &&
2395                   VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_PREFETCH);
2396
2397         /* Decompose syncs */
2398         if (num_syncs) {
2399                 in_syncs = kmalloc(sizeof(*in_syncs) * num_syncs, GFP_KERNEL);
2400                 out_syncs = kmalloc(sizeof(*out_syncs) * num_syncs, GFP_KERNEL);
2401                 if (!in_syncs || !out_syncs) {
2402                         err = -ENOMEM;
2403                         goto out_error;
2404                 }
2405
2406                 for (i = 0; i < num_syncs; ++i) {
2407                         bool signal = syncs[i].flags & DRM_XE_SYNC_SIGNAL;
2408
2409                         if (signal)
2410                                 out_syncs[num_out_syncs++] = syncs[i];
2411                         else
2412                                 in_syncs[num_in_syncs++] = syncs[i];
2413                 }
2414         }
2415
2416         /* Do unbinds + move rebinds to new list */
2417         INIT_LIST_HEAD(&rebind_list);
2418         list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) {
2419                 if (__vma->destroyed ||
2420                     VM_BIND_OP(bind_op->op) == XE_VM_BIND_OP_PREFETCH) {
2421                         list_del_init(&__vma->unbind_link);
2422                         xe_bo_get(bo);
2423                         err = __vm_bind_ioctl_async(xe_vm_get(vm), __vma,
2424                                                     e ? xe_engine_get(e) : NULL,
2425                                                     bo, bind_op, first ?
2426                                                     in_syncs : NULL,
2427                                                     first ? num_in_syncs : 0);
2428                         if (err) {
2429                                 xe_bo_put(bo);
2430                                 xe_vm_put(vm);
2431                                 if (e)
2432                                         xe_engine_put(e);
2433                                 goto out_error;
2434                         }
2435                         in_syncs = NULL;
2436                         first = false;
2437                 } else {
2438                         list_move_tail(&__vma->unbind_link, &rebind_list);
2439                 }
2440         }
2441         last = list_empty(&rebind_list);
2442         if (!last) {
2443                 xe_vm_get(vm);
2444                 if (e)
2445                         xe_engine_get(e);
2446         }
2447         err = __vm_bind_ioctl_async(vm, vma, e,
2448                                     bo, bind_op,
2449                                     first ? in_syncs :
2450                                     last ? out_syncs : NULL,
2451                                     first ? num_in_syncs :
2452                                     last ? num_out_syncs : 0);
2453         if (err) {
2454                 if (!last) {
2455                         xe_vm_put(vm);
2456                         if (e)
2457                                 xe_engine_put(e);
2458                 }
2459                 goto out_error;
2460         }
2461         in_syncs = NULL;
2462
2463         /* Do rebinds */
2464         list_for_each_entry_safe(__vma, next, &rebind_list, unbind_link) {
2465                 list_del_init(&__vma->unbind_link);
2466                 last = list_empty(&rebind_list);
2467
2468                 if (xe_vma_is_userptr(__vma)) {
2469                         bind_op->op = XE_VM_BIND_FLAG_ASYNC |
2470                                 XE_VM_BIND_OP_MAP_USERPTR;
2471                 } else {
2472                         bind_op->op = XE_VM_BIND_FLAG_ASYNC |
2473                                 XE_VM_BIND_OP_MAP;
2474                         xe_bo_get(__vma->bo);
2475                 }
2476
2477                 if (!last) {
2478                         xe_vm_get(vm);
2479                         if (e)
2480                                 xe_engine_get(e);
2481                 }
2482
2483                 err = __vm_bind_ioctl_async(vm, __vma, e,
2484                                             __vma->bo, bind_op, last ?
2485                                             out_syncs : NULL,
2486                                             last ? num_out_syncs : 0);
2487                 if (err) {
2488                         if (!last) {
2489                                 xe_vm_put(vm);
2490                                 if (e)
2491                                         xe_engine_put(e);
2492                         }
2493                         goto out_error;
2494                 }
2495         }
2496
2497         kfree(syncs);
2498         return 0;
2499
2500 out_error:
2501         kfree(in_syncs);
2502         kfree(out_syncs);
2503         kfree(syncs);
2504
2505         return err;
2506 }
2507
2508 static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
2509                                       u64 addr, u64 range, u32 op)
2510 {
2511         struct xe_device *xe = vm->xe;
2512         struct xe_vma *vma, lookup;
2513         bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
2514
2515         lockdep_assert_held(&vm->lock);
2516
2517         lookup.start = addr;
2518         lookup.end = addr + range - 1;
2519
2520         switch (VM_BIND_OP(op)) {
2521         case XE_VM_BIND_OP_MAP:
2522         case XE_VM_BIND_OP_MAP_USERPTR:
2523                 vma = xe_vm_find_overlapping_vma(vm, &lookup);
2524                 if (XE_IOCTL_ERR(xe, vma))
2525                         return -EBUSY;
2526                 break;
2527         case XE_VM_BIND_OP_UNMAP:
2528         case XE_VM_BIND_OP_PREFETCH:
2529                 vma = xe_vm_find_overlapping_vma(vm, &lookup);
2530                 if (XE_IOCTL_ERR(xe, !vma) ||
2531                     XE_IOCTL_ERR(xe, (vma->start != addr ||
2532                                  vma->end != addr + range - 1) && !async))
2533                         return -EINVAL;
2534                 break;
2535         case XE_VM_BIND_OP_UNMAP_ALL:
2536                 break;
2537         default:
2538                 XE_BUG_ON("NOT POSSIBLE");
2539                 return -EINVAL;
2540         }
2541
2542         return 0;
2543 }
2544
2545 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma)
2546 {
2547         down_read(&vm->userptr.notifier_lock);
2548         vma->destroyed = true;
2549         up_read(&vm->userptr.notifier_lock);
2550         xe_vm_remove_vma(vm, vma);
2551 }
2552
2553 static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma *vma)
2554 {
2555         int err;
2556
2557         if (vma->bo && !vma->bo->vm) {
2558                 vm_insert_extobj(vm, vma);
2559                 err = add_preempt_fences(vm, vma->bo);
2560                 if (err)
2561                         return err;
2562         }
2563
2564         return 0;
2565 }
2566
2567 /*
2568  * Find all overlapping VMAs in lookup range and add to a list in the returned
2569  * VMA, all of VMAs found will be unbound. Also possibly add 2 new VMAs that
2570  * need to be bound if first / last VMAs are not fully unbound. This is akin to
2571  * how munmap works.
2572  */
2573 static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
2574                                             struct xe_vma *lookup)
2575 {
2576         struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup);
2577         struct rb_node *node;
2578         struct xe_vma *first = vma, *last = vma, *new_first = NULL,
2579                       *new_last = NULL, *__vma, *next;
2580         int err = 0;
2581         bool first_munmap_rebind = false;
2582
2583         lockdep_assert_held(&vm->lock);
2584         XE_BUG_ON(!vma);
2585
2586         node = &vma->vm_node;
2587         while ((node = rb_next(node))) {
2588                 if (!xe_vma_cmp_vma_cb(lookup, node)) {
2589                         __vma = to_xe_vma(node);
2590                         list_add_tail(&__vma->unbind_link, &vma->unbind_link);
2591                         last = __vma;
2592                 } else {
2593                         break;
2594                 }
2595         }
2596
2597         node = &vma->vm_node;
2598         while ((node = rb_prev(node))) {
2599                 if (!xe_vma_cmp_vma_cb(lookup, node)) {
2600                         __vma = to_xe_vma(node);
2601                         list_add(&__vma->unbind_link, &vma->unbind_link);
2602                         first = __vma;
2603                 } else {
2604                         break;
2605                 }
2606         }
2607
2608         if (first->start != lookup->start) {
2609                 struct ww_acquire_ctx ww;
2610
2611                 if (first->bo)
2612                         err = xe_bo_lock(first->bo, &ww, 0, true);
2613                 if (err)
2614                         goto unwind;
2615                 new_first = xe_vma_create(first->vm, first->bo,
2616                                           first->bo ? first->bo_offset :
2617                                           first->userptr.ptr,
2618                                           first->start,
2619                                           lookup->start - 1,
2620                                           (first->pte_flags & XE_PTE_READ_ONLY),
2621                                           first->gt_mask);
2622                 if (first->bo)
2623                         xe_bo_unlock(first->bo, &ww);
2624                 if (!new_first) {
2625                         err = -ENOMEM;
2626                         goto unwind;
2627                 }
2628                 if (!first->bo) {
2629                         err = xe_vma_userptr_pin_pages(new_first);
2630                         if (err)
2631                                 goto unwind;
2632                 }
2633                 err = prep_replacement_vma(vm, new_first);
2634                 if (err)
2635                         goto unwind;
2636         }
2637
2638         if (last->end != lookup->end) {
2639                 struct ww_acquire_ctx ww;
2640                 u64 chunk = lookup->end + 1 - last->start;
2641
2642                 if (last->bo)
2643                         err = xe_bo_lock(last->bo, &ww, 0, true);
2644                 if (err)
2645                         goto unwind;
2646                 new_last = xe_vma_create(last->vm, last->bo,
2647                                          last->bo ? last->bo_offset + chunk :
2648                                          last->userptr.ptr + chunk,
2649                                          last->start + chunk,
2650                                          last->end,
2651                                          (last->pte_flags & XE_PTE_READ_ONLY),
2652                                          last->gt_mask);
2653                 if (last->bo)
2654                         xe_bo_unlock(last->bo, &ww);
2655                 if (!new_last) {
2656                         err = -ENOMEM;
2657                         goto unwind;
2658                 }
2659                 if (!last->bo) {
2660                         err = xe_vma_userptr_pin_pages(new_last);
2661                         if (err)
2662                                 goto unwind;
2663                 }
2664                 err = prep_replacement_vma(vm, new_last);
2665                 if (err)
2666                         goto unwind;
2667         }
2668
2669         prep_vma_destroy(vm, vma);
2670         if (list_empty(&vma->unbind_link) && (new_first || new_last))
2671                 vma->first_munmap_rebind = true;
2672         list_for_each_entry(__vma, &vma->unbind_link, unbind_link) {
2673                 if ((new_first || new_last) && !first_munmap_rebind) {
2674                         __vma->first_munmap_rebind = true;
2675                         first_munmap_rebind = true;
2676                 }
2677                 prep_vma_destroy(vm, __vma);
2678         }
2679         if (new_first) {
2680                 xe_vm_insert_vma(vm, new_first);
2681                 list_add_tail(&new_first->unbind_link, &vma->unbind_link);
2682                 if (!new_last)
2683                         new_first->last_munmap_rebind = true;
2684         }
2685         if (new_last) {
2686                 xe_vm_insert_vma(vm, new_last);
2687                 list_add_tail(&new_last->unbind_link, &vma->unbind_link);
2688                 new_last->last_munmap_rebind = true;
2689         }
2690
2691         return vma;
2692
2693 unwind:
2694         list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link)
2695                 list_del_init(&__vma->unbind_link);
2696         if (new_last) {
2697                 prep_vma_destroy(vm, new_last);
2698                 xe_vma_destroy_unlocked(new_last);
2699         }
2700         if (new_first) {
2701                 prep_vma_destroy(vm, new_first);
2702                 xe_vma_destroy_unlocked(new_first);
2703         }
2704
2705         return ERR_PTR(err);
2706 }
2707
2708 /*
2709  * Similar to vm_unbind_lookup_vmas, find all VMAs in lookup range to prefetch
2710  */
2711 static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
2712                                               struct xe_vma *lookup,
2713                                               u32 region)
2714 {
2715         struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup), *__vma,
2716                       *next;
2717         struct rb_node *node;
2718
2719         if (!xe_vma_is_userptr(vma)) {
2720                 if (!xe_bo_can_migrate(vma->bo, region_to_mem_type[region]))
2721                         return ERR_PTR(-EINVAL);
2722         }
2723
2724         node = &vma->vm_node;
2725         while ((node = rb_next(node))) {
2726                 if (!xe_vma_cmp_vma_cb(lookup, node)) {
2727                         __vma = to_xe_vma(node);
2728                         if (!xe_vma_is_userptr(__vma)) {
2729                                 if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
2730                                         goto flush_list;
2731                         }
2732                         list_add_tail(&__vma->unbind_link, &vma->unbind_link);
2733                 } else {
2734                         break;
2735                 }
2736         }
2737
2738         node = &vma->vm_node;
2739         while ((node = rb_prev(node))) {
2740                 if (!xe_vma_cmp_vma_cb(lookup, node)) {
2741                         __vma = to_xe_vma(node);
2742                         if (!xe_vma_is_userptr(__vma)) {
2743                                 if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
2744                                         goto flush_list;
2745                         }
2746                         list_add(&__vma->unbind_link, &vma->unbind_link);
2747                 } else {
2748                         break;
2749                 }
2750         }
2751
2752         return vma;
2753
2754 flush_list:
2755         list_for_each_entry_safe(__vma, next, &vma->unbind_link,
2756                                  unbind_link)
2757                 list_del_init(&__vma->unbind_link);
2758
2759         return ERR_PTR(-EINVAL);
2760 }
2761
2762 static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm,
2763                                                 struct xe_bo *bo)
2764 {
2765         struct xe_vma *first = NULL, *vma;
2766
2767         lockdep_assert_held(&vm->lock);
2768         xe_bo_assert_held(bo);
2769
2770         list_for_each_entry(vma, &bo->vmas, bo_link) {
2771                 if (vma->vm != vm)
2772                         continue;
2773
2774                 prep_vma_destroy(vm, vma);
2775                 if (!first)
2776                         first = vma;
2777                 else
2778                         list_add_tail(&vma->unbind_link, &first->unbind_link);
2779         }
2780
2781         return first;
2782 }
2783
2784 static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
2785                                                struct xe_bo *bo,
2786                                                u64 bo_offset_or_userptr,
2787                                                u64 addr, u64 range, u32 op,
2788                                                u64 gt_mask, u32 region)
2789 {
2790         struct ww_acquire_ctx ww;
2791         struct xe_vma *vma, lookup;
2792         int err;
2793
2794         lockdep_assert_held(&vm->lock);
2795
2796         lookup.start = addr;
2797         lookup.end = addr + range - 1;
2798
2799         switch (VM_BIND_OP(op)) {
2800         case XE_VM_BIND_OP_MAP:
2801                 XE_BUG_ON(!bo);
2802
2803                 err = xe_bo_lock(bo, &ww, 0, true);
2804                 if (err)
2805                         return ERR_PTR(err);
2806                 vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr,
2807                                     addr + range - 1,
2808                                     op & XE_VM_BIND_FLAG_READONLY,
2809                                     gt_mask);
2810                 xe_bo_unlock(bo, &ww);
2811                 if (!vma)
2812                         return ERR_PTR(-ENOMEM);
2813
2814                 xe_vm_insert_vma(vm, vma);
2815                 if (!bo->vm) {
2816                         vm_insert_extobj(vm, vma);
2817                         err = add_preempt_fences(vm, bo);
2818                         if (err) {
2819                                 prep_vma_destroy(vm, vma);
2820                                 xe_vma_destroy_unlocked(vma);
2821
2822                                 return ERR_PTR(err);
2823                         }
2824                 }
2825                 break;
2826         case XE_VM_BIND_OP_UNMAP:
2827                 vma = vm_unbind_lookup_vmas(vm, &lookup);
2828                 break;
2829         case XE_VM_BIND_OP_PREFETCH:
2830                 vma = vm_prefetch_lookup_vmas(vm, &lookup, region);
2831                 break;
2832         case XE_VM_BIND_OP_UNMAP_ALL:
2833                 XE_BUG_ON(!bo);
2834
2835                 err = xe_bo_lock(bo, &ww, 0, true);
2836                 if (err)
2837                         return ERR_PTR(err);
2838                 vma = vm_unbind_all_lookup_vmas(vm, bo);
2839                 if (!vma)
2840                         vma = ERR_PTR(-EINVAL);
2841                 xe_bo_unlock(bo, &ww);
2842                 break;
2843         case XE_VM_BIND_OP_MAP_USERPTR:
2844                 XE_BUG_ON(bo);
2845
2846                 vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr,
2847                                     addr + range - 1,
2848                                     op & XE_VM_BIND_FLAG_READONLY,
2849                                     gt_mask);
2850                 if (!vma)
2851                         return ERR_PTR(-ENOMEM);
2852
2853                 err = xe_vma_userptr_pin_pages(vma);
2854                 if (err) {
2855                         prep_vma_destroy(vm, vma);
2856                         xe_vma_destroy_unlocked(vma);
2857
2858                         return ERR_PTR(err);
2859                 } else {
2860                         xe_vm_insert_vma(vm, vma);
2861                 }
2862                 break;
2863         default:
2864                 XE_BUG_ON("NOT POSSIBLE");
2865                 vma = ERR_PTR(-EINVAL);
2866         }
2867
2868         return vma;
2869 }
2870
2871 #ifdef TEST_VM_ASYNC_OPS_ERROR
2872 #define SUPPORTED_FLAGS \
2873         (FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
2874          XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
2875 #else
2876 #define SUPPORTED_FLAGS \
2877         (XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
2878          XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
2879 #endif
2880 #define XE_64K_PAGE_MASK 0xffffull
2881
2882 #define MAX_BINDS       512     /* FIXME: Picking random upper limit */
2883
2884 static int vm_bind_ioctl_check_args(struct xe_device *xe,
2885                                     struct drm_xe_vm_bind *args,
2886                                     struct drm_xe_vm_bind_op **bind_ops,
2887                                     bool *async)
2888 {
2889         int err;
2890         int i;
2891
2892         if (XE_IOCTL_ERR(xe, args->extensions) ||
2893             XE_IOCTL_ERR(xe, !args->num_binds) ||
2894             XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS))
2895                 return -EINVAL;
2896
2897         if (args->num_binds > 1) {
2898                 u64 __user *bind_user =
2899                         u64_to_user_ptr(args->vector_of_binds);
2900
2901                 *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
2902                                     args->num_binds, GFP_KERNEL);
2903                 if (!*bind_ops)
2904                         return -ENOMEM;
2905
2906                 err = __copy_from_user(*bind_ops, bind_user,
2907                                        sizeof(struct drm_xe_vm_bind_op) *
2908                                        args->num_binds);
2909                 if (XE_IOCTL_ERR(xe, err)) {
2910                         err = -EFAULT;
2911                         goto free_bind_ops;
2912                 }
2913         } else {
2914                 *bind_ops = &args->bind;
2915         }
2916
2917         for (i = 0; i < args->num_binds; ++i) {
2918                 u64 range = (*bind_ops)[i].range;
2919                 u64 addr = (*bind_ops)[i].addr;
2920                 u32 op = (*bind_ops)[i].op;
2921                 u32 obj = (*bind_ops)[i].obj;
2922                 u64 obj_offset = (*bind_ops)[i].obj_offset;
2923                 u32 region = (*bind_ops)[i].region;
2924
2925                 if (i == 0) {
2926                         *async = !!(op & XE_VM_BIND_FLAG_ASYNC);
2927                 } else if (XE_IOCTL_ERR(xe, !*async) ||
2928                            XE_IOCTL_ERR(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) ||
2929                            XE_IOCTL_ERR(xe, VM_BIND_OP(op) ==
2930                                         XE_VM_BIND_OP_RESTART)) {
2931                         err = -EINVAL;
2932                         goto free_bind_ops;
2933                 }
2934
2935                 if (XE_IOCTL_ERR(xe, !*async &&
2936                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) {
2937                         err = -EINVAL;
2938                         goto free_bind_ops;
2939                 }
2940
2941                 if (XE_IOCTL_ERR(xe, !*async &&
2942                                  VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) {
2943                         err = -EINVAL;
2944                         goto free_bind_ops;
2945                 }
2946
2947                 if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) >
2948                                  XE_VM_BIND_OP_PREFETCH) ||
2949                     XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) ||
2950                     XE_IOCTL_ERR(xe, !obj &&
2951                                  VM_BIND_OP(op) == XE_VM_BIND_OP_MAP) ||
2952                     XE_IOCTL_ERR(xe, !obj &&
2953                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
2954                     XE_IOCTL_ERR(xe, addr &&
2955                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
2956                     XE_IOCTL_ERR(xe, range &&
2957                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
2958                     XE_IOCTL_ERR(xe, obj &&
2959                                  VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) ||
2960                     XE_IOCTL_ERR(xe, obj &&
2961                                  VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) ||
2962                     XE_IOCTL_ERR(xe, region &&
2963                                  VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) ||
2964                     XE_IOCTL_ERR(xe, !(BIT(region) &
2965                                        xe->info.mem_region_mask)) ||
2966                     XE_IOCTL_ERR(xe, obj &&
2967                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) {
2968                         err = -EINVAL;
2969                         goto free_bind_ops;
2970                 }
2971
2972                 if (XE_IOCTL_ERR(xe, obj_offset & ~PAGE_MASK) ||
2973                     XE_IOCTL_ERR(xe, addr & ~PAGE_MASK) ||
2974                     XE_IOCTL_ERR(xe, range & ~PAGE_MASK) ||
2975                     XE_IOCTL_ERR(xe, !range && VM_BIND_OP(op) !=
2976                                  XE_VM_BIND_OP_RESTART &&
2977                                  VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) {
2978                         err = -EINVAL;
2979                         goto free_bind_ops;
2980                 }
2981         }
2982
2983         return 0;
2984
2985 free_bind_ops:
2986         if (args->num_binds > 1)
2987                 kfree(*bind_ops);
2988         return err;
2989 }
2990
2991 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2992 {
2993         struct xe_device *xe = to_xe_device(dev);
2994         struct xe_file *xef = to_xe_file(file);
2995         struct drm_xe_vm_bind *args = data;
2996         struct drm_xe_sync __user *syncs_user;
2997         struct xe_bo **bos = NULL;
2998         struct xe_vma **vmas = NULL;
2999         struct xe_vm *vm;
3000         struct xe_engine *e = NULL;
3001         u32 num_syncs;
3002         struct xe_sync_entry *syncs = NULL;
3003         struct drm_xe_vm_bind_op *bind_ops;
3004         bool async;
3005         int err;
3006         int i, j = 0;
3007
3008         err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
3009         if (err)
3010                 return err;
3011
3012         vm = xe_vm_lookup(xef, args->vm_id);
3013         if (XE_IOCTL_ERR(xe, !vm)) {
3014                 err = -EINVAL;
3015                 goto free_objs;
3016         }
3017
3018         if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) {
3019                 DRM_ERROR("VM closed while we began looking up?\n");
3020                 err = -ENOENT;
3021                 goto put_vm;
3022         }
3023
3024         if (args->engine_id) {
3025                 e = xe_engine_lookup(xef, args->engine_id);
3026                 if (XE_IOCTL_ERR(xe, !e)) {
3027                         err = -ENOENT;
3028                         goto put_vm;
3029                 }
3030                 if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) {
3031                         err = -EINVAL;
3032                         goto put_engine;
3033                 }
3034         }
3035
3036         if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) {
3037                 if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
3038                         err = -ENOTSUPP;
3039                 if (XE_IOCTL_ERR(xe, !err && args->num_syncs))
3040                         err = EINVAL;
3041                 if (XE_IOCTL_ERR(xe, !err && !vm->async_ops.error))
3042                         err = -EPROTO;
3043
3044                 if (!err) {
3045                         down_write(&vm->lock);
3046                         trace_xe_vm_restart(vm);
3047                         vm_set_async_error(vm, 0);
3048                         up_write(&vm->lock);
3049
3050                         queue_work(system_unbound_wq, &vm->async_ops.work);
3051
3052                         /* Rebinds may have been blocked, give worker a kick */
3053                         if (xe_vm_in_compute_mode(vm))
3054                                 queue_work(vm->xe->ordered_wq,
3055                                            &vm->preempt.rebind_work);
3056                 }
3057
3058                 goto put_engine;
3059         }
3060
3061         if (XE_IOCTL_ERR(xe, !vm->async_ops.error &&
3062                          async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
3063                 err = -ENOTSUPP;
3064                 goto put_engine;
3065         }
3066
3067         for (i = 0; i < args->num_binds; ++i) {
3068                 u64 range = bind_ops[i].range;
3069                 u64 addr = bind_ops[i].addr;
3070
3071                 if (XE_IOCTL_ERR(xe, range > vm->size) ||
3072                     XE_IOCTL_ERR(xe, addr > vm->size - range)) {
3073                         err = -EINVAL;
3074                         goto put_engine;
3075                 }
3076
3077                 if (bind_ops[i].gt_mask) {
3078                         u64 valid_gts = BIT(xe->info.tile_count) - 1;
3079
3080                         if (XE_IOCTL_ERR(xe, bind_ops[i].gt_mask &
3081                                          ~valid_gts)) {
3082                                 err = -EINVAL;
3083                                 goto put_engine;
3084                         }
3085                 }
3086         }
3087
3088         bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL);
3089         if (!bos) {
3090                 err = -ENOMEM;
3091                 goto put_engine;
3092         }
3093
3094         vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL);
3095         if (!vmas) {
3096                 err = -ENOMEM;
3097                 goto put_engine;
3098         }
3099
3100         for (i = 0; i < args->num_binds; ++i) {
3101                 struct drm_gem_object *gem_obj;
3102                 u64 range = bind_ops[i].range;
3103                 u64 addr = bind_ops[i].addr;
3104                 u32 obj = bind_ops[i].obj;
3105                 u64 obj_offset = bind_ops[i].obj_offset;
3106
3107                 if (!obj)
3108                         continue;
3109
3110                 gem_obj = drm_gem_object_lookup(file, obj);
3111                 if (XE_IOCTL_ERR(xe, !gem_obj)) {
3112                         err = -ENOENT;
3113                         goto put_obj;
3114                 }
3115                 bos[i] = gem_to_xe_bo(gem_obj);
3116
3117                 if (XE_IOCTL_ERR(xe, range > bos[i]->size) ||
3118                     XE_IOCTL_ERR(xe, obj_offset >
3119                                  bos[i]->size - range)) {
3120                         err = -EINVAL;
3121                         goto put_obj;
3122                 }
3123
3124                 if (bos[i]->flags & XE_BO_INTERNAL_64K) {
3125                         if (XE_IOCTL_ERR(xe, obj_offset &
3126                                          XE_64K_PAGE_MASK) ||
3127                             XE_IOCTL_ERR(xe, addr & XE_64K_PAGE_MASK) ||
3128                             XE_IOCTL_ERR(xe, range & XE_64K_PAGE_MASK)) {
3129                                 err = -EINVAL;
3130                                 goto put_obj;
3131                         }
3132                 }
3133         }
3134
3135         if (args->num_syncs) {
3136                 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3137                 if (!syncs) {
3138                         err = -ENOMEM;
3139                         goto put_obj;
3140                 }
3141         }
3142
3143         syncs_user = u64_to_user_ptr(args->syncs);
3144         for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3145                 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3146                                           &syncs_user[num_syncs], false,
3147                                           xe_vm_no_dma_fences(vm));
3148                 if (err)
3149                         goto free_syncs;
3150         }
3151
3152         err = down_write_killable(&vm->lock);
3153         if (err)
3154                 goto free_syncs;
3155
3156         /* Do some error checking first to make the unwind easier */
3157         for (i = 0; i < args->num_binds; ++i) {
3158                 u64 range = bind_ops[i].range;
3159                 u64 addr = bind_ops[i].addr;
3160                 u32 op = bind_ops[i].op;
3161
3162                 err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op);
3163                 if (err)
3164                         goto release_vm_lock;
3165         }
3166
3167         for (i = 0; i < args->num_binds; ++i) {
3168                 u64 range = bind_ops[i].range;
3169                 u64 addr = bind_ops[i].addr;
3170                 u32 op = bind_ops[i].op;
3171                 u64 obj_offset = bind_ops[i].obj_offset;
3172                 u64 gt_mask = bind_ops[i].gt_mask;
3173                 u32 region = bind_ops[i].region;
3174
3175                 vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i], obj_offset,
3176                                                    addr, range, op, gt_mask,
3177                                                    region);
3178                 if (IS_ERR(vmas[i])) {
3179                         err = PTR_ERR(vmas[i]);
3180                         vmas[i] = NULL;
3181                         goto destroy_vmas;
3182                 }
3183         }
3184
3185         for (j = 0; j < args->num_binds; ++j) {
3186                 struct xe_sync_entry *__syncs;
3187                 u32 __num_syncs = 0;
3188                 bool first_or_last = j == 0 || j == args->num_binds - 1;
3189
3190                 if (args->num_binds == 1) {
3191                         __num_syncs = num_syncs;
3192                         __syncs = syncs;
3193                 } else if (first_or_last && num_syncs) {
3194                         bool first = j == 0;
3195
3196                         __syncs = kmalloc(sizeof(*__syncs) * num_syncs,
3197                                           GFP_KERNEL);
3198                         if (!__syncs) {
3199                                 err = ENOMEM;
3200                                 break;
3201                         }
3202
3203                         /* in-syncs on first bind, out-syncs on last bind */
3204                         for (i = 0; i < num_syncs; ++i) {
3205                                 bool signal = syncs[i].flags &
3206                                         DRM_XE_SYNC_SIGNAL;
3207
3208                                 if ((first && !signal) || (!first && signal))
3209                                         __syncs[__num_syncs++] = syncs[i];
3210                         }
3211                 } else {
3212                         __num_syncs = 0;
3213                         __syncs = NULL;
3214                 }
3215
3216                 if (async) {
3217                         bool last = j == args->num_binds - 1;
3218
3219                         /*
3220                          * Each pass of async worker drops the ref, take a ref
3221                          * here, 1 set of refs taken above
3222                          */
3223                         if (!last) {
3224                                 if (e)
3225                                         xe_engine_get(e);
3226                                 xe_vm_get(vm);
3227                         }
3228
3229                         err = vm_bind_ioctl_async(vm, vmas[j], e, bos[j],
3230                                                   bind_ops + j, __syncs,
3231                                                   __num_syncs);
3232                         if (err && !last) {
3233                                 if (e)
3234                                         xe_engine_put(e);
3235                                 xe_vm_put(vm);
3236                         }
3237                         if (err)
3238                                 break;
3239                 } else {
3240                         XE_BUG_ON(j != 0);      /* Not supported */
3241                         err = vm_bind_ioctl(vm, vmas[j], e, bos[j],
3242                                             bind_ops + j, __syncs,
3243                                             __num_syncs, NULL);
3244                         break;  /* Needed so cleanup loops work */
3245                 }
3246         }
3247
3248         /* Most of cleanup owned by the async bind worker */
3249         if (async && !err) {
3250                 up_write(&vm->lock);
3251                 if (args->num_binds > 1)
3252                         kfree(syncs);
3253                 goto free_objs;
3254         }
3255
3256 destroy_vmas:
3257         for (i = j; err && i < args->num_binds; ++i) {
3258                 u32 op = bind_ops[i].op;
3259                 struct xe_vma *vma, *next;
3260
3261                 if (!vmas[i])
3262                         break;
3263
3264                 list_for_each_entry_safe(vma, next, &vma->unbind_link,
3265                                          unbind_link) {
3266                         list_del_init(&vma->unbind_link);
3267                         if (!vma->destroyed) {
3268                                 prep_vma_destroy(vm, vma);
3269                                 xe_vma_destroy_unlocked(vma);
3270                         }
3271                 }
3272
3273                 switch (VM_BIND_OP(op)) {
3274                 case XE_VM_BIND_OP_MAP:
3275                         prep_vma_destroy(vm, vmas[i]);
3276                         xe_vma_destroy_unlocked(vmas[i]);
3277                         break;
3278                 case XE_VM_BIND_OP_MAP_USERPTR:
3279                         prep_vma_destroy(vm, vmas[i]);
3280                         xe_vma_destroy_unlocked(vmas[i]);
3281                         break;
3282                 }
3283         }
3284 release_vm_lock:
3285         up_write(&vm->lock);
3286 free_syncs:
3287         while (num_syncs--) {
3288                 if (async && j &&
3289                     !(syncs[num_syncs].flags & DRM_XE_SYNC_SIGNAL))
3290                         continue;       /* Still in async worker */
3291                 xe_sync_entry_cleanup(&syncs[num_syncs]);
3292         }
3293
3294         kfree(syncs);
3295 put_obj:
3296         for (i = j; i < args->num_binds; ++i)
3297                 xe_bo_put(bos[i]);
3298 put_engine:
3299         if (e)
3300                 xe_engine_put(e);
3301 put_vm:
3302         xe_vm_put(vm);
3303 free_objs:
3304         kfree(bos);
3305         kfree(vmas);
3306         if (args->num_binds > 1)
3307                 kfree(bind_ops);
3308         return err;
3309 }
3310
3311 /*
3312  * XXX: Using the TTM wrappers for now, likely can call into dma-resv code
3313  * directly to optimize. Also this likely should be an inline function.
3314  */
3315 int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
3316                int num_resv, bool intr)
3317 {
3318         struct ttm_validate_buffer tv_vm;
3319         LIST_HEAD(objs);
3320         LIST_HEAD(dups);
3321
3322         XE_BUG_ON(!ww);
3323
3324         tv_vm.num_shared = num_resv;
3325         tv_vm.bo = xe_vm_ttm_bo(vm);;
3326         list_add_tail(&tv_vm.head, &objs);
3327
3328         return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
3329 }
3330
3331 void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww)
3332 {
3333         dma_resv_unlock(&vm->resv);
3334         ww_acquire_fini(ww);
3335 }
3336
3337 /**
3338  * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3339  * @vma: VMA to invalidate
3340  *
3341  * Walks a list of page tables leaves which it memset the entries owned by this
3342  * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3343  * complete.
3344  *
3345  * Returns 0 for success, negative error code otherwise.
3346  */
3347 int xe_vm_invalidate_vma(struct xe_vma *vma)
3348 {
3349         struct xe_device *xe = vma->vm->xe;
3350         struct xe_gt *gt;
3351         u32 gt_needs_invalidate = 0;
3352         int seqno[XE_MAX_GT];
3353         u8 id;
3354         int ret;
3355
3356         XE_BUG_ON(!xe_vm_in_fault_mode(vma->vm));
3357         trace_xe_vma_usm_invalidate(vma);
3358
3359         /* Check that we don't race with page-table updates */
3360         if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3361                 if (xe_vma_is_userptr(vma)) {
3362                         WARN_ON_ONCE(!mmu_interval_check_retry
3363                                      (&vma->userptr.notifier,
3364                                       vma->userptr.notifier_seq));
3365                         WARN_ON_ONCE(!dma_resv_test_signaled(&vma->vm->resv,
3366                                                              DMA_RESV_USAGE_BOOKKEEP));
3367
3368                 } else {
3369                         xe_bo_assert_held(vma->bo);
3370                 }
3371         }
3372
3373         for_each_gt(gt, xe, id) {
3374                 if (xe_pt_zap_ptes(gt, vma)) {
3375                         gt_needs_invalidate |= BIT(id);
3376                         xe_device_wmb(xe);
3377                         seqno[id] = xe_gt_tlb_invalidation_vma(gt, NULL, vma);
3378                         if (seqno[id] < 0)
3379                                 return seqno[id];
3380                 }
3381         }
3382
3383         for_each_gt(gt, xe, id) {
3384                 if (gt_needs_invalidate & BIT(id)) {
3385                         ret = xe_gt_tlb_invalidation_wait(gt, seqno[id]);
3386                         if (ret < 0)
3387                                 return ret;
3388                 }
3389         }
3390
3391         vma->usm.gt_invalidated = vma->gt_mask;
3392
3393         return 0;
3394 }
3395
3396 #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
3397 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
3398 {
3399         struct rb_node *node;
3400         bool is_vram;
3401         uint64_t addr;
3402
3403         if (!down_read_trylock(&vm->lock)) {
3404                 drm_printf(p, " Failed to acquire VM lock to dump capture");
3405                 return 0;
3406         }
3407         if (vm->pt_root[gt_id]) {
3408                 addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE,
3409                                   &is_vram);
3410                 drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS");
3411         }
3412
3413         for (node = rb_first(&vm->vmas); node; node = rb_next(node)) {
3414                 struct xe_vma *vma = to_xe_vma(node);
3415                 bool is_userptr = xe_vma_is_userptr(vma);
3416
3417                 if (is_userptr) {
3418                         struct xe_res_cursor cur;
3419
3420                         xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
3421                                         &cur);
3422                         addr = xe_res_dma(&cur);
3423                 } else {
3424                         addr = __xe_bo_addr(vma->bo, 0, XE_PAGE_SIZE, &is_vram);
3425                 }
3426                 drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
3427                            vma->start, vma->end, vma->end - vma->start + 1ull,
3428                            addr, is_userptr ? "USR" : is_vram ? "VRAM" : "SYS");
3429         }
3430         up_read(&vm->lock);
3431
3432         return 0;
3433 }
3434 #else
3435 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
3436 {
3437         return 0;
3438 }
3439 #endif