drm/xe: s/XE_PTE_READ_ONLY/XE_PTE_FLAG_READ_ONLY
[linux-2.6-microblaze.git] / drivers / gpu / drm / xe / xe_vm.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9
10 #include <drm/drm_print.h>
11 #include <drm/ttm/ttm_execbuf_util.h>
12 #include <drm/ttm/ttm_tt.h>
13 #include <drm/xe_drm.h>
14 #include <linux/delay.h>
15 #include <linux/kthread.h>
16 #include <linux/mm.h>
17 #include <linux/swap.h>
18
19 #include "xe_bo.h"
20 #include "xe_device.h"
21 #include "xe_engine.h"
22 #include "xe_gt.h"
23 #include "xe_gt_pagefault.h"
24 #include "xe_gt_tlb_invalidation.h"
25 #include "xe_migrate.h"
26 #include "xe_pm.h"
27 #include "xe_preempt_fence.h"
28 #include "xe_pt.h"
29 #include "xe_res_cursor.h"
30 #include "xe_sync.h"
31 #include "xe_trace.h"
32
33 #define TEST_VM_ASYNC_OPS_ERROR
34
35 /**
36  * xe_vma_userptr_check_repin() - Advisory check for repin needed
37  * @vma: The userptr vma
38  *
39  * Check if the userptr vma has been invalidated since last successful
40  * repin. The check is advisory only and can the function can be called
41  * without the vm->userptr.notifier_lock held. There is no guarantee that the
42  * vma userptr will remain valid after a lockless check, so typically
43  * the call needs to be followed by a proper check under the notifier_lock.
44  *
45  * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
46  */
47 int xe_vma_userptr_check_repin(struct xe_vma *vma)
48 {
49         return mmu_interval_check_retry(&vma->userptr.notifier,
50                                         vma->userptr.notifier_seq) ?
51                 -EAGAIN : 0;
52 }
53
54 int xe_vma_userptr_pin_pages(struct xe_vma *vma)
55 {
56         struct xe_vm *vm = vma->vm;
57         struct xe_device *xe = vm->xe;
58         const unsigned long num_pages =
59                 (vma->end - vma->start + 1) >> PAGE_SHIFT;
60         struct page **pages;
61         bool in_kthread = !current->mm;
62         unsigned long notifier_seq;
63         int pinned, ret, i;
64         bool read_only = vma->pte_flags & XE_PTE_FLAG_READ_ONLY;
65
66         lockdep_assert_held(&vm->lock);
67         XE_BUG_ON(!xe_vma_is_userptr(vma));
68 retry:
69         if (vma->destroyed)
70                 return 0;
71
72         notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
73         if (notifier_seq == vma->userptr.notifier_seq)
74                 return 0;
75
76         pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
77         if (!pages)
78                 return -ENOMEM;
79
80         if (vma->userptr.sg) {
81                 dma_unmap_sgtable(xe->drm.dev,
82                                   vma->userptr.sg,
83                                   read_only ? DMA_TO_DEVICE :
84                                   DMA_BIDIRECTIONAL, 0);
85                 sg_free_table(vma->userptr.sg);
86                 vma->userptr.sg = NULL;
87         }
88
89         pinned = ret = 0;
90         if (in_kthread) {
91                 if (!mmget_not_zero(vma->userptr.notifier.mm)) {
92                         ret = -EFAULT;
93                         goto mm_closed;
94                 }
95                 kthread_use_mm(vma->userptr.notifier.mm);
96         }
97
98         while (pinned < num_pages) {
99                 ret = get_user_pages_fast(vma->userptr.ptr + pinned * PAGE_SIZE,
100                                           num_pages - pinned,
101                                           read_only ? 0 : FOLL_WRITE,
102                                           &pages[pinned]);
103                 if (ret < 0) {
104                         if (in_kthread)
105                                 ret = 0;
106                         break;
107                 }
108
109                 pinned += ret;
110                 ret = 0;
111         }
112
113         if (in_kthread) {
114                 kthread_unuse_mm(vma->userptr.notifier.mm);
115                 mmput(vma->userptr.notifier.mm);
116         }
117 mm_closed:
118         if (ret)
119                 goto out;
120
121         ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages,
122                                                 pinned, 0,
123                                                 (u64)pinned << PAGE_SHIFT,
124                                                 xe_sg_segment_size(xe->drm.dev),
125                                                 GFP_KERNEL);
126         if (ret) {
127                 vma->userptr.sg = NULL;
128                 goto out;
129         }
130         vma->userptr.sg = &vma->userptr.sgt;
131
132         ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
133                               read_only ? DMA_TO_DEVICE :
134                               DMA_BIDIRECTIONAL,
135                               DMA_ATTR_SKIP_CPU_SYNC |
136                               DMA_ATTR_NO_KERNEL_MAPPING);
137         if (ret) {
138                 sg_free_table(vma->userptr.sg);
139                 vma->userptr.sg = NULL;
140                 goto out;
141         }
142
143         for (i = 0; i < pinned; ++i) {
144                 if (!read_only) {
145                         lock_page(pages[i]);
146                         set_page_dirty(pages[i]);
147                         unlock_page(pages[i]);
148                 }
149
150                 mark_page_accessed(pages[i]);
151         }
152
153 out:
154         release_pages(pages, pinned);
155         kvfree(pages);
156
157         if (!(ret < 0)) {
158                 vma->userptr.notifier_seq = notifier_seq;
159                 if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
160                         goto retry;
161         }
162
163         return ret < 0 ? ret : 0;
164 }
165
166 static bool preempt_fences_waiting(struct xe_vm *vm)
167 {
168         struct xe_engine *e;
169
170         lockdep_assert_held(&vm->lock);
171         xe_vm_assert_held(vm);
172
173         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
174                 if (!e->compute.pfence || (e->compute.pfence &&
175                     test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
176                              &e->compute.pfence->flags))) {
177                         return true;
178                 }
179         }
180
181         return false;
182 }
183
184 static void free_preempt_fences(struct list_head *list)
185 {
186         struct list_head *link, *next;
187
188         list_for_each_safe(link, next, list)
189                 xe_preempt_fence_free(to_preempt_fence_from_link(link));
190 }
191
192 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
193                                 unsigned int *count)
194 {
195         lockdep_assert_held(&vm->lock);
196         xe_vm_assert_held(vm);
197
198         if (*count >= vm->preempt.num_engines)
199                 return 0;
200
201         for (; *count < vm->preempt.num_engines; ++(*count)) {
202                 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
203
204                 if (IS_ERR(pfence))
205                         return PTR_ERR(pfence);
206
207                 list_move_tail(xe_preempt_fence_link(pfence), list);
208         }
209
210         return 0;
211 }
212
213 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
214 {
215         struct xe_engine *e;
216
217         xe_vm_assert_held(vm);
218
219         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
220                 if (e->compute.pfence) {
221                         long timeout = dma_fence_wait(e->compute.pfence, false);
222
223                         if (timeout < 0)
224                                 return -ETIME;
225                         dma_fence_put(e->compute.pfence);
226                         e->compute.pfence = NULL;
227                 }
228         }
229
230         return 0;
231 }
232
233 static bool xe_vm_is_idle(struct xe_vm *vm)
234 {
235         struct xe_engine *e;
236
237         xe_vm_assert_held(vm);
238         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
239                 if (!xe_engine_is_idle(e))
240                         return false;
241         }
242
243         return true;
244 }
245
246 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
247 {
248         struct list_head *link;
249         struct xe_engine *e;
250
251         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
252                 struct dma_fence *fence;
253
254                 link = list->next;
255                 XE_BUG_ON(link == list);
256
257                 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
258                                              e, e->compute.context,
259                                              ++e->compute.seqno);
260                 dma_fence_put(e->compute.pfence);
261                 e->compute.pfence = fence;
262         }
263 }
264
265 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
266 {
267         struct xe_engine *e;
268         struct ww_acquire_ctx ww;
269         int err;
270
271         err = xe_bo_lock(bo, &ww, vm->preempt.num_engines, true);
272         if (err)
273                 return err;
274
275         list_for_each_entry(e, &vm->preempt.engines, compute.link)
276                 if (e->compute.pfence) {
277                         dma_resv_add_fence(bo->ttm.base.resv,
278                                            e->compute.pfence,
279                                            DMA_RESV_USAGE_BOOKKEEP);
280                 }
281
282         xe_bo_unlock(bo, &ww);
283         return 0;
284 }
285
286 /**
287  * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv
288  * @vm: The vm.
289  * @fence: The fence to add.
290  * @usage: The resv usage for the fence.
291  *
292  * Loops over all of the vm's external object bindings and adds a @fence
293  * with the given @usage to all of the external object's reservation
294  * objects.
295  */
296 void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
297                              enum dma_resv_usage usage)
298 {
299         struct xe_vma *vma;
300
301         list_for_each_entry(vma, &vm->extobj.list, extobj.link)
302                 dma_resv_add_fence(vma->bo->ttm.base.resv, fence, usage);
303 }
304
305 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
306 {
307         struct xe_engine *e;
308
309         lockdep_assert_held(&vm->lock);
310         xe_vm_assert_held(vm);
311
312         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
313                 e->ops->resume(e);
314
315                 dma_resv_add_fence(&vm->resv, e->compute.pfence,
316                                    DMA_RESV_USAGE_BOOKKEEP);
317                 xe_vm_fence_all_extobjs(vm, e->compute.pfence,
318                                         DMA_RESV_USAGE_BOOKKEEP);
319         }
320 }
321
322 int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
323 {
324         struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
325         struct ttm_validate_buffer *tv;
326         struct ww_acquire_ctx ww;
327         struct list_head objs;
328         struct dma_fence *pfence;
329         int err;
330         bool wait;
331
332         XE_BUG_ON(!xe_vm_in_compute_mode(vm));
333
334         down_write(&vm->lock);
335
336         err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, true, 1);
337         if (err)
338                 goto out_unlock_outer;
339
340         pfence = xe_preempt_fence_create(e, e->compute.context,
341                                          ++e->compute.seqno);
342         if (!pfence) {
343                 err = -ENOMEM;
344                 goto out_unlock;
345         }
346
347         list_add(&e->compute.link, &vm->preempt.engines);
348         ++vm->preempt.num_engines;
349         e->compute.pfence = pfence;
350
351         down_read(&vm->userptr.notifier_lock);
352
353         dma_resv_add_fence(&vm->resv, pfence,
354                            DMA_RESV_USAGE_BOOKKEEP);
355
356         xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP);
357
358         /*
359          * Check to see if a preemption on VM is in flight or userptr
360          * invalidation, if so trigger this preempt fence to sync state with
361          * other preempt fences on the VM.
362          */
363         wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
364         if (wait)
365                 dma_fence_enable_sw_signaling(pfence);
366
367         up_read(&vm->userptr.notifier_lock);
368
369 out_unlock:
370         xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
371 out_unlock_outer:
372         up_write(&vm->lock);
373
374         return err;
375 }
376
377 /**
378  * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
379  * that need repinning.
380  * @vm: The VM.
381  *
382  * This function checks for whether the VM has userptrs that need repinning,
383  * and provides a release-type barrier on the userptr.notifier_lock after
384  * checking.
385  *
386  * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
387  */
388 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
389 {
390         lockdep_assert_held_read(&vm->userptr.notifier_lock);
391
392         return (list_empty(&vm->userptr.repin_list) &&
393                 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
394 }
395
396 /**
397  * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv
398  * objects of the vm's external buffer objects.
399  * @vm: The vm.
400  * @ww: Pointer to a struct ww_acquire_ctx locking context.
401  * @tv_onstack: Array size XE_ONSTACK_TV of storage for the struct
402  * ttm_validate_buffers used for locking.
403  * @tv: Pointer to a pointer that on output contains the actual storage used.
404  * @objs: List head for the buffer objects locked.
405  * @intr: Whether to lock interruptible.
406  * @num_shared: Number of dma-fence slots to reserve in the locked objects.
407  *
408  * Locks the vm dma-resv objects and all the dma-resv objects of the
409  * buffer objects on the vm external object list. The TTM utilities require
410  * a list of struct ttm_validate_buffers pointing to the actual buffer
411  * objects to lock. Storage for those struct ttm_validate_buffers should
412  * be provided in @tv_onstack, and is typically reserved on the stack
413  * of the caller. If the size of @tv_onstack isn't sufficient, then
414  * storage will be allocated internally using kvmalloc().
415  *
416  * The function performs deadlock handling internally, and after a
417  * successful return the ww locking transaction should be considered
418  * sealed.
419  *
420  * Return: 0 on success, Negative error code on error. In particular if
421  * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. In case
422  * of error, any locking performed has been reverted.
423  */
424 int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
425                         struct ttm_validate_buffer *tv_onstack,
426                         struct ttm_validate_buffer **tv,
427                         struct list_head *objs,
428                         bool intr,
429                         unsigned int num_shared)
430 {
431         struct ttm_validate_buffer *tv_vm, *tv_bo;
432         struct xe_vma *vma, *next;
433         LIST_HEAD(dups);
434         int err;
435
436         lockdep_assert_held(&vm->lock);
437
438         if (vm->extobj.entries < XE_ONSTACK_TV) {
439                 tv_vm = tv_onstack;
440         } else {
441                 tv_vm = kvmalloc_array(vm->extobj.entries + 1, sizeof(*tv_vm),
442                                        GFP_KERNEL);
443                 if (!tv_vm)
444                         return -ENOMEM;
445         }
446         tv_bo = tv_vm + 1;
447
448         INIT_LIST_HEAD(objs);
449         list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
450                 tv_bo->num_shared = num_shared;
451                 tv_bo->bo = &vma->bo->ttm;
452
453                 list_add_tail(&tv_bo->head, objs);
454                 tv_bo++;
455         }
456         tv_vm->num_shared = num_shared;
457         tv_vm->bo = xe_vm_ttm_bo(vm);
458         list_add_tail(&tv_vm->head, objs);
459         err = ttm_eu_reserve_buffers(ww, objs, intr, &dups);
460         if (err)
461                 goto out_err;
462
463         spin_lock(&vm->notifier.list_lock);
464         list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
465                                  notifier.rebind_link) {
466                 xe_bo_assert_held(vma->bo);
467
468                 list_del_init(&vma->notifier.rebind_link);
469                 if (vma->tile_present && !vma->destroyed)
470                         list_move_tail(&vma->rebind_link, &vm->rebind_list);
471         }
472         spin_unlock(&vm->notifier.list_lock);
473
474         *tv = tv_vm;
475         return 0;
476
477 out_err:
478         if (tv_vm != tv_onstack)
479                 kvfree(tv_vm);
480
481         return err;
482 }
483
484 /**
485  * xe_vm_unlock_dma_resv() - Unlock reservation objects locked by
486  * xe_vm_lock_dma_resv()
487  * @vm: The vm.
488  * @tv_onstack: The @tv_onstack array given to xe_vm_lock_dma_resv().
489  * @tv: The value of *@tv given by xe_vm_lock_dma_resv().
490  * @ww: The ww_acquire_context used for locking.
491  * @objs: The list returned from xe_vm_lock_dma_resv().
492  *
493  * Unlocks the reservation objects and frees any memory allocated by
494  * xe_vm_lock_dma_resv().
495  */
496 void xe_vm_unlock_dma_resv(struct xe_vm *vm,
497                            struct ttm_validate_buffer *tv_onstack,
498                            struct ttm_validate_buffer *tv,
499                            struct ww_acquire_ctx *ww,
500                            struct list_head *objs)
501 {
502         /*
503          * Nothing should've been able to enter the list while we were locked,
504          * since we've held the dma-resvs of all the vm's external objects,
505          * and holding the dma_resv of an object is required for list
506          * addition, and we shouldn't add ourselves.
507          */
508         XE_WARN_ON(!list_empty(&vm->notifier.rebind_list));
509
510         ttm_eu_backoff_reservation(ww, objs);
511         if (tv && tv != tv_onstack)
512                 kvfree(tv);
513 }
514
515 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
516
517 static void preempt_rebind_work_func(struct work_struct *w)
518 {
519         struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
520         struct xe_vma *vma;
521         struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
522         struct ttm_validate_buffer *tv;
523         struct ww_acquire_ctx ww;
524         struct list_head objs;
525         struct dma_fence *rebind_fence;
526         unsigned int fence_count = 0;
527         LIST_HEAD(preempt_fences);
528         ktime_t end = 0;
529         int err;
530         long wait;
531         int __maybe_unused tries = 0;
532
533         XE_BUG_ON(!xe_vm_in_compute_mode(vm));
534         trace_xe_vm_rebind_worker_enter(vm);
535
536         if (xe_vm_is_closed(vm)) {
537                 trace_xe_vm_rebind_worker_exit(vm);
538                 return;
539         }
540
541         down_write(&vm->lock);
542
543 retry:
544         if (vm->async_ops.error)
545                 goto out_unlock_outer;
546
547         /*
548          * Extreme corner where we exit a VM error state with a munmap style VM
549          * unbind inflight which requires a rebind. In this case the rebind
550          * needs to install some fences into the dma-resv slots. The worker to
551          * do this queued, let that worker make progress by dropping vm->lock
552          * and trying this again.
553          */
554         if (vm->async_ops.munmap_rebind_inflight) {
555                 up_write(&vm->lock);
556                 flush_work(&vm->async_ops.work);
557                 goto retry;
558         }
559
560         if (xe_vm_userptr_check_repin(vm)) {
561                 err = xe_vm_userptr_pin(vm);
562                 if (err)
563                         goto out_unlock_outer;
564         }
565
566         err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs,
567                                   false, vm->preempt.num_engines);
568         if (err)
569                 goto out_unlock_outer;
570
571         if (xe_vm_is_idle(vm)) {
572                 vm->preempt.rebind_deactivated = true;
573                 goto out_unlock;
574         }
575
576         /* Fresh preempt fences already installed. Everyting is running. */
577         if (!preempt_fences_waiting(vm))
578                 goto out_unlock;
579
580         /*
581          * This makes sure vm is completely suspended and also balances
582          * xe_engine suspend- and resume; we resume *all* vm engines below.
583          */
584         err = wait_for_existing_preempt_fences(vm);
585         if (err)
586                 goto out_unlock;
587
588         err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
589         if (err)
590                 goto out_unlock;
591
592         list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
593                 if (xe_vma_is_userptr(vma) || vma->destroyed)
594                         continue;
595
596                 err = xe_bo_validate(vma->bo, vm, false);
597                 if (err)
598                         goto out_unlock;
599         }
600
601         rebind_fence = xe_vm_rebind(vm, true);
602         if (IS_ERR(rebind_fence)) {
603                 err = PTR_ERR(rebind_fence);
604                 goto out_unlock;
605         }
606
607         if (rebind_fence) {
608                 dma_fence_wait(rebind_fence, false);
609                 dma_fence_put(rebind_fence);
610         }
611
612         /* Wait on munmap style VM unbinds */
613         wait = dma_resv_wait_timeout(&vm->resv,
614                                      DMA_RESV_USAGE_KERNEL,
615                                      false, MAX_SCHEDULE_TIMEOUT);
616         if (wait <= 0) {
617                 err = -ETIME;
618                 goto out_unlock;
619         }
620
621 #define retry_required(__tries, __vm) \
622         (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
623         (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
624         __xe_vm_userptr_needs_repin(__vm))
625
626         down_read(&vm->userptr.notifier_lock);
627         if (retry_required(tries, vm)) {
628                 up_read(&vm->userptr.notifier_lock);
629                 err = -EAGAIN;
630                 goto out_unlock;
631         }
632
633 #undef retry_required
634
635         /* Point of no return. */
636         arm_preempt_fences(vm, &preempt_fences);
637         resume_and_reinstall_preempt_fences(vm);
638         up_read(&vm->userptr.notifier_lock);
639
640 out_unlock:
641         xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
642 out_unlock_outer:
643         if (err == -EAGAIN) {
644                 trace_xe_vm_rebind_worker_retry(vm);
645                 goto retry;
646         }
647
648         /*
649          * With multiple active VMs, under memory pressure, it is possible that
650          * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
651          * Until ttm properly handles locking in such scenarios, best thing the
652          * driver can do is retry with a timeout. Killing the VM or putting it
653          * in error state after timeout or other error scenarios is still TBD.
654          */
655         if (err == -ENOMEM) {
656                 ktime_t cur = ktime_get();
657
658                 end = end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
659                 if (ktime_before(cur, end)) {
660                         msleep(20);
661                         trace_xe_vm_rebind_worker_retry(vm);
662                         goto retry;
663                 }
664         }
665         up_write(&vm->lock);
666
667         free_preempt_fences(&preempt_fences);
668
669         XE_WARN_ON(err < 0);    /* TODO: Kill VM or put in error state */
670         trace_xe_vm_rebind_worker_exit(vm);
671 }
672
673 struct async_op_fence;
674 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
675                         struct xe_engine *e, struct xe_sync_entry *syncs,
676                         u32 num_syncs, struct async_op_fence *afence);
677
678 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
679                                    const struct mmu_notifier_range *range,
680                                    unsigned long cur_seq)
681 {
682         struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
683         struct xe_vm *vm = vma->vm;
684         struct dma_resv_iter cursor;
685         struct dma_fence *fence;
686         long err;
687
688         XE_BUG_ON(!xe_vma_is_userptr(vma));
689         trace_xe_vma_userptr_invalidate(vma);
690
691         if (!mmu_notifier_range_blockable(range))
692                 return false;
693
694         down_write(&vm->userptr.notifier_lock);
695         mmu_interval_set_seq(mni, cur_seq);
696
697         /* No need to stop gpu access if the userptr is not yet bound. */
698         if (!vma->userptr.initial_bind) {
699                 up_write(&vm->userptr.notifier_lock);
700                 return true;
701         }
702
703         /*
704          * Tell exec and rebind worker they need to repin and rebind this
705          * userptr.
706          */
707         if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->tile_present) {
708                 spin_lock(&vm->userptr.invalidated_lock);
709                 list_move_tail(&vma->userptr.invalidate_link,
710                                &vm->userptr.invalidated);
711                 spin_unlock(&vm->userptr.invalidated_lock);
712         }
713
714         up_write(&vm->userptr.notifier_lock);
715
716         /*
717          * Preempt fences turn into schedule disables, pipeline these.
718          * Note that even in fault mode, we need to wait for binds and
719          * unbinds to complete, and those are attached as BOOKMARK fences
720          * to the vm.
721          */
722         dma_resv_iter_begin(&cursor, &vm->resv,
723                             DMA_RESV_USAGE_BOOKKEEP);
724         dma_resv_for_each_fence_unlocked(&cursor, fence)
725                 dma_fence_enable_sw_signaling(fence);
726         dma_resv_iter_end(&cursor);
727
728         err = dma_resv_wait_timeout(&vm->resv,
729                                     DMA_RESV_USAGE_BOOKKEEP,
730                                     false, MAX_SCHEDULE_TIMEOUT);
731         XE_WARN_ON(err <= 0);
732
733         if (xe_vm_in_fault_mode(vm)) {
734                 err = xe_vm_invalidate_vma(vma);
735                 XE_WARN_ON(err);
736         }
737
738         trace_xe_vma_userptr_invalidate_complete(vma);
739
740         return true;
741 }
742
743 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
744         .invalidate = vma_userptr_invalidate,
745 };
746
747 int xe_vm_userptr_pin(struct xe_vm *vm)
748 {
749         struct xe_vma *vma, *next;
750         int err = 0;
751         LIST_HEAD(tmp_evict);
752
753         lockdep_assert_held_write(&vm->lock);
754
755         /* Collect invalidated userptrs */
756         spin_lock(&vm->userptr.invalidated_lock);
757         list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
758                                  userptr.invalidate_link) {
759                 list_del_init(&vma->userptr.invalidate_link);
760                 list_move_tail(&vma->userptr_link, &vm->userptr.repin_list);
761         }
762         spin_unlock(&vm->userptr.invalidated_lock);
763
764         /* Pin and move to temporary list */
765         list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, userptr_link) {
766                 err = xe_vma_userptr_pin_pages(vma);
767                 if (err < 0)
768                         goto out_err;
769
770                 list_move_tail(&vma->userptr_link, &tmp_evict);
771         }
772
773         /* Take lock and move to rebind_list for rebinding. */
774         err = dma_resv_lock_interruptible(&vm->resv, NULL);
775         if (err)
776                 goto out_err;
777
778         list_for_each_entry_safe(vma, next, &tmp_evict, userptr_link) {
779                 list_del_init(&vma->userptr_link);
780                 list_move_tail(&vma->rebind_link, &vm->rebind_list);
781         }
782
783         dma_resv_unlock(&vm->resv);
784
785         return 0;
786
787 out_err:
788         list_splice_tail(&tmp_evict, &vm->userptr.repin_list);
789
790         return err;
791 }
792
793 /**
794  * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
795  * that need repinning.
796  * @vm: The VM.
797  *
798  * This function does an advisory check for whether the VM has userptrs that
799  * need repinning.
800  *
801  * Return: 0 if there are no indications of userptrs needing repinning,
802  * -EAGAIN if there are.
803  */
804 int xe_vm_userptr_check_repin(struct xe_vm *vm)
805 {
806         return (list_empty_careful(&vm->userptr.repin_list) &&
807                 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
808 }
809
810 static struct dma_fence *
811 xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
812                struct xe_sync_entry *syncs, u32 num_syncs);
813
814 struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
815 {
816         struct dma_fence *fence = NULL;
817         struct xe_vma *vma, *next;
818
819         lockdep_assert_held(&vm->lock);
820         if (xe_vm_no_dma_fences(vm) && !rebind_worker)
821                 return NULL;
822
823         xe_vm_assert_held(vm);
824         list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) {
825                 XE_WARN_ON(!vma->tile_present);
826
827                 list_del_init(&vma->rebind_link);
828                 dma_fence_put(fence);
829                 if (rebind_worker)
830                         trace_xe_vma_rebind_worker(vma);
831                 else
832                         trace_xe_vma_rebind_exec(vma);
833                 fence = xe_vm_bind_vma(vma, NULL, NULL, 0);
834                 if (IS_ERR(fence))
835                         return fence;
836         }
837
838         return fence;
839 }
840
841 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
842                                     struct xe_bo *bo,
843                                     u64 bo_offset_or_userptr,
844                                     u64 start, u64 end,
845                                     bool read_only,
846                                     u64 tile_mask)
847 {
848         struct xe_vma *vma;
849         struct xe_tile *tile;
850         u8 id;
851
852         XE_BUG_ON(start >= end);
853         XE_BUG_ON(end >= vm->size);
854
855         vma = kzalloc(sizeof(*vma), GFP_KERNEL);
856         if (!vma) {
857                 vma = ERR_PTR(-ENOMEM);
858                 return vma;
859         }
860
861         INIT_LIST_HEAD(&vma->rebind_link);
862         INIT_LIST_HEAD(&vma->unbind_link);
863         INIT_LIST_HEAD(&vma->userptr_link);
864         INIT_LIST_HEAD(&vma->userptr.invalidate_link);
865         INIT_LIST_HEAD(&vma->notifier.rebind_link);
866         INIT_LIST_HEAD(&vma->extobj.link);
867
868         vma->vm = vm;
869         vma->start = start;
870         vma->end = end;
871         if (read_only)
872                 vma->pte_flags = XE_PTE_FLAG_READ_ONLY;
873
874         if (tile_mask) {
875                 vma->tile_mask = tile_mask;
876         } else {
877                 for_each_tile(tile, vm->xe, id)
878                         vma->tile_mask |= 0x1 << id;
879         }
880
881         if (vm->xe->info.platform == XE_PVC)
882                 vma->use_atomic_access_pte_bit = true;
883
884         if (bo) {
885                 xe_bo_assert_held(bo);
886                 vma->bo_offset = bo_offset_or_userptr;
887                 vma->bo = xe_bo_get(bo);
888                 list_add_tail(&vma->bo_link, &bo->vmas);
889         } else /* userptr */ {
890                 u64 size = end - start + 1;
891                 int err;
892
893                 vma->userptr.ptr = bo_offset_or_userptr;
894
895                 err = mmu_interval_notifier_insert(&vma->userptr.notifier,
896                                                    current->mm,
897                                                    vma->userptr.ptr, size,
898                                                    &vma_userptr_notifier_ops);
899                 if (err) {
900                         kfree(vma);
901                         vma = ERR_PTR(err);
902                         return vma;
903                 }
904
905                 vma->userptr.notifier_seq = LONG_MAX;
906                 xe_vm_get(vm);
907         }
908
909         return vma;
910 }
911
912 static bool vm_remove_extobj(struct xe_vma *vma)
913 {
914         if (!list_empty(&vma->extobj.link)) {
915                 vma->vm->extobj.entries--;
916                 list_del_init(&vma->extobj.link);
917                 return true;
918         }
919         return false;
920 }
921
922 static void xe_vma_destroy_late(struct xe_vma *vma)
923 {
924         struct xe_vm *vm = vma->vm;
925         struct xe_device *xe = vm->xe;
926         bool read_only = vma->pte_flags & XE_PTE_FLAG_READ_ONLY;
927
928         if (xe_vma_is_userptr(vma)) {
929                 if (vma->userptr.sg) {
930                         dma_unmap_sgtable(xe->drm.dev,
931                                           vma->userptr.sg,
932                                           read_only ? DMA_TO_DEVICE :
933                                           DMA_BIDIRECTIONAL, 0);
934                         sg_free_table(vma->userptr.sg);
935                         vma->userptr.sg = NULL;
936                 }
937
938                 /*
939                  * Since userptr pages are not pinned, we can't remove
940                  * the notifer until we're sure the GPU is not accessing
941                  * them anymore
942                  */
943                 mmu_interval_notifier_remove(&vma->userptr.notifier);
944                 xe_vm_put(vm);
945         } else {
946                 xe_bo_put(vma->bo);
947         }
948
949         kfree(vma);
950 }
951
952 static void vma_destroy_work_func(struct work_struct *w)
953 {
954         struct xe_vma *vma =
955                 container_of(w, struct xe_vma, destroy_work);
956
957         xe_vma_destroy_late(vma);
958 }
959
960 static struct xe_vma *
961 bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
962                             struct xe_vma *ignore)
963 {
964         struct xe_vma *vma;
965
966         list_for_each_entry(vma, &bo->vmas, bo_link) {
967                 if (vma != ignore && vma->vm == vm)
968                         return vma;
969         }
970
971         return NULL;
972 }
973
974 static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm,
975                                  struct xe_vma *ignore)
976 {
977         struct ww_acquire_ctx ww;
978         bool ret;
979
980         xe_bo_lock(bo, &ww, 0, false);
981         ret = !!bo_has_vm_references_locked(bo, vm, ignore);
982         xe_bo_unlock(bo, &ww);
983
984         return ret;
985 }
986
987 static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
988 {
989         list_add(&vma->extobj.link, &vm->extobj.list);
990         vm->extobj.entries++;
991 }
992
993 static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
994 {
995         struct xe_bo *bo = vma->bo;
996
997         lockdep_assert_held_write(&vm->lock);
998
999         if (bo_has_vm_references(bo, vm, vma))
1000                 return;
1001
1002         __vm_insert_extobj(vm, vma);
1003 }
1004
1005 static void vma_destroy_cb(struct dma_fence *fence,
1006                            struct dma_fence_cb *cb)
1007 {
1008         struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1009
1010         INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1011         queue_work(system_unbound_wq, &vma->destroy_work);
1012 }
1013
1014 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1015 {
1016         struct xe_vm *vm = vma->vm;
1017
1018         lockdep_assert_held_write(&vm->lock);
1019         XE_BUG_ON(!list_empty(&vma->unbind_link));
1020
1021         if (xe_vma_is_userptr(vma)) {
1022                 XE_WARN_ON(!vma->destroyed);
1023                 spin_lock(&vm->userptr.invalidated_lock);
1024                 list_del_init(&vma->userptr.invalidate_link);
1025                 spin_unlock(&vm->userptr.invalidated_lock);
1026                 list_del(&vma->userptr_link);
1027         } else {
1028                 xe_bo_assert_held(vma->bo);
1029                 list_del(&vma->bo_link);
1030
1031                 spin_lock(&vm->notifier.list_lock);
1032                 list_del(&vma->notifier.rebind_link);
1033                 spin_unlock(&vm->notifier.list_lock);
1034
1035                 if (!vma->bo->vm && vm_remove_extobj(vma)) {
1036                         struct xe_vma *other;
1037
1038                         other = bo_has_vm_references_locked(vma->bo, vm, NULL);
1039
1040                         if (other)
1041                                 __vm_insert_extobj(vm, other);
1042                 }
1043         }
1044
1045         xe_vm_assert_held(vm);
1046         if (!list_empty(&vma->rebind_link))
1047                 list_del(&vma->rebind_link);
1048
1049         if (fence) {
1050                 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1051                                                  vma_destroy_cb);
1052
1053                 if (ret) {
1054                         XE_WARN_ON(ret != -ENOENT);
1055                         xe_vma_destroy_late(vma);
1056                 }
1057         } else {
1058                 xe_vma_destroy_late(vma);
1059         }
1060 }
1061
1062 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1063 {
1064         struct ttm_validate_buffer tv[2];
1065         struct ww_acquire_ctx ww;
1066         struct xe_bo *bo = vma->bo;
1067         LIST_HEAD(objs);
1068         LIST_HEAD(dups);
1069         int err;
1070
1071         memset(tv, 0, sizeof(tv));
1072         tv[0].bo = xe_vm_ttm_bo(vma->vm);
1073         list_add(&tv[0].head, &objs);
1074
1075         if (bo) {
1076                 tv[1].bo = &xe_bo_get(bo)->ttm;
1077                 list_add(&tv[1].head, &objs);
1078         }
1079         err = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
1080         XE_WARN_ON(err);
1081
1082         xe_vma_destroy(vma, NULL);
1083
1084         ttm_eu_backoff_reservation(&ww, &objs);
1085         if (bo)
1086                 xe_bo_put(bo);
1087 }
1088
1089 static struct xe_vma *to_xe_vma(const struct rb_node *node)
1090 {
1091         BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
1092         return (struct xe_vma *)node;
1093 }
1094
1095 static int xe_vma_cmp(const struct xe_vma *a, const struct xe_vma *b)
1096 {
1097         if (a->end < b->start) {
1098                 return -1;
1099         } else if (b->end < a->start) {
1100                 return 1;
1101         } else {
1102                 return 0;
1103         }
1104 }
1105
1106 static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b)
1107 {
1108         return xe_vma_cmp(to_xe_vma(a), to_xe_vma(b)) < 0;
1109 }
1110
1111 int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node)
1112 {
1113         struct xe_vma *cmp = to_xe_vma(node);
1114         const struct xe_vma *own = key;
1115
1116         if (own->start > cmp->end)
1117                 return 1;
1118
1119         if (own->end < cmp->start)
1120                 return -1;
1121
1122         return 0;
1123 }
1124
1125 struct xe_vma *
1126 xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma)
1127 {
1128         struct rb_node *node;
1129
1130         if (xe_vm_is_closed(vm))
1131                 return NULL;
1132
1133         XE_BUG_ON(vma->end >= vm->size);
1134         lockdep_assert_held(&vm->lock);
1135
1136         node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb);
1137
1138         return node ? to_xe_vma(node) : NULL;
1139 }
1140
1141 static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1142 {
1143         XE_BUG_ON(vma->vm != vm);
1144         lockdep_assert_held(&vm->lock);
1145
1146         rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb);
1147 }
1148
1149 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1150 {
1151         XE_BUG_ON(vma->vm != vm);
1152         lockdep_assert_held(&vm->lock);
1153
1154         rb_erase(&vma->vm_node, &vm->vmas);
1155         if (vm->usm.last_fault_vma == vma)
1156                 vm->usm.last_fault_vma = NULL;
1157 }
1158
1159 static void async_op_work_func(struct work_struct *w);
1160 static void vm_destroy_work_func(struct work_struct *w);
1161
1162 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1163 {
1164         struct xe_vm *vm;
1165         int err, i = 0, number_tiles = 0;
1166         struct xe_tile *tile;
1167         u8 id;
1168
1169         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1170         if (!vm)
1171                 return ERR_PTR(-ENOMEM);
1172
1173         vm->xe = xe;
1174         kref_init(&vm->refcount);
1175         dma_resv_init(&vm->resv);
1176
1177         vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1);
1178
1179         vm->vmas = RB_ROOT;
1180         vm->flags = flags;
1181
1182         init_rwsem(&vm->lock);
1183
1184         INIT_LIST_HEAD(&vm->rebind_list);
1185
1186         INIT_LIST_HEAD(&vm->userptr.repin_list);
1187         INIT_LIST_HEAD(&vm->userptr.invalidated);
1188         init_rwsem(&vm->userptr.notifier_lock);
1189         spin_lock_init(&vm->userptr.invalidated_lock);
1190
1191         INIT_LIST_HEAD(&vm->notifier.rebind_list);
1192         spin_lock_init(&vm->notifier.list_lock);
1193
1194         INIT_LIST_HEAD(&vm->async_ops.pending);
1195         INIT_WORK(&vm->async_ops.work, async_op_work_func);
1196         spin_lock_init(&vm->async_ops.lock);
1197
1198         INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1199
1200         INIT_LIST_HEAD(&vm->preempt.engines);
1201         vm->preempt.min_run_period_ms = 10;     /* FIXME: Wire up to uAPI */
1202
1203         INIT_LIST_HEAD(&vm->extobj.list);
1204
1205         if (!(flags & XE_VM_FLAG_MIGRATION)) {
1206                 /* We need to immeditatelly exit from any D3 state */
1207                 xe_pm_runtime_get(xe);
1208                 xe_device_mem_access_get(xe);
1209         }
1210
1211         err = dma_resv_lock_interruptible(&vm->resv, NULL);
1212         if (err)
1213                 goto err_put;
1214
1215         if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1216                 vm->flags |= XE_VM_FLAGS_64K;
1217
1218         for_each_tile(tile, xe, id) {
1219                 if (flags & XE_VM_FLAG_MIGRATION &&
1220                     tile->id != XE_VM_FLAG_GT_ID(flags))
1221                         continue;
1222
1223                 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1224                 if (IS_ERR(vm->pt_root[id])) {
1225                         err = PTR_ERR(vm->pt_root[id]);
1226                         vm->pt_root[id] = NULL;
1227                         goto err_destroy_root;
1228                 }
1229         }
1230
1231         if (flags & XE_VM_FLAG_SCRATCH_PAGE) {
1232                 for_each_tile(tile, xe, id) {
1233                         if (!vm->pt_root[id])
1234                                 continue;
1235
1236                         err = xe_pt_create_scratch(xe, tile, vm);
1237                         if (err)
1238                                 goto err_scratch_pt;
1239                 }
1240                 vm->batch_invalidate_tlb = true;
1241         }
1242
1243         if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) {
1244                 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1245                 vm->flags |= XE_VM_FLAG_COMPUTE_MODE;
1246                 vm->batch_invalidate_tlb = false;
1247         }
1248
1249         if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) {
1250                 vm->async_ops.fence.context = dma_fence_context_alloc(1);
1251                 vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
1252         }
1253
1254         /* Fill pt_root after allocating scratch tables */
1255         for_each_tile(tile, xe, id) {
1256                 if (!vm->pt_root[id])
1257                         continue;
1258
1259                 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1260         }
1261         dma_resv_unlock(&vm->resv);
1262
1263         /* Kernel migration VM shouldn't have a circular loop.. */
1264         if (!(flags & XE_VM_FLAG_MIGRATION)) {
1265                 for_each_tile(tile, xe, id) {
1266                         struct xe_gt *gt = tile->primary_gt;
1267                         struct xe_vm *migrate_vm;
1268                         struct xe_engine *eng;
1269
1270                         if (!vm->pt_root[id])
1271                                 continue;
1272
1273                         migrate_vm = xe_migrate_get_vm(tile->migrate);
1274                         eng = xe_engine_create_class(xe, gt, migrate_vm,
1275                                                      XE_ENGINE_CLASS_COPY,
1276                                                      ENGINE_FLAG_VM);
1277                         xe_vm_put(migrate_vm);
1278                         if (IS_ERR(eng)) {
1279                                 xe_vm_close_and_put(vm);
1280                                 return ERR_CAST(eng);
1281                         }
1282                         vm->eng[id] = eng;
1283                         number_tiles++;
1284                 }
1285         }
1286
1287         if (number_tiles > 1)
1288                 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1289
1290         mutex_lock(&xe->usm.lock);
1291         if (flags & XE_VM_FLAG_FAULT_MODE)
1292                 xe->usm.num_vm_in_fault_mode++;
1293         else if (!(flags & XE_VM_FLAG_MIGRATION))
1294                 xe->usm.num_vm_in_non_fault_mode++;
1295         mutex_unlock(&xe->usm.lock);
1296
1297         trace_xe_vm_create(vm);
1298
1299         return vm;
1300
1301 err_scratch_pt:
1302         for_each_tile(tile, xe, id) {
1303                 if (!vm->pt_root[id])
1304                         continue;
1305
1306                 i = vm->pt_root[id]->level;
1307                 while (i)
1308                         if (vm->scratch_pt[id][--i])
1309                                 xe_pt_destroy(vm->scratch_pt[id][i],
1310                                               vm->flags, NULL);
1311                 xe_bo_unpin(vm->scratch_bo[id]);
1312                 xe_bo_put(vm->scratch_bo[id]);
1313         }
1314 err_destroy_root:
1315         for_each_tile(tile, xe, id) {
1316                 if (vm->pt_root[id])
1317                         xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1318         }
1319         dma_resv_unlock(&vm->resv);
1320 err_put:
1321         dma_resv_fini(&vm->resv);
1322         kfree(vm);
1323         if (!(flags & XE_VM_FLAG_MIGRATION)) {
1324                 xe_device_mem_access_put(xe);
1325                 xe_pm_runtime_put(xe);
1326         }
1327         return ERR_PTR(err);
1328 }
1329
1330 static void flush_async_ops(struct xe_vm *vm)
1331 {
1332         queue_work(system_unbound_wq, &vm->async_ops.work);
1333         flush_work(&vm->async_ops.work);
1334 }
1335
1336 static void vm_error_capture(struct xe_vm *vm, int err,
1337                              u32 op, u64 addr, u64 size)
1338 {
1339         struct drm_xe_vm_bind_op_error_capture capture;
1340         u64 __user *address =
1341                 u64_to_user_ptr(vm->async_ops.error_capture.addr);
1342         bool in_kthread = !current->mm;
1343
1344         capture.error = err;
1345         capture.op = op;
1346         capture.addr = addr;
1347         capture.size = size;
1348
1349         if (in_kthread) {
1350                 if (!mmget_not_zero(vm->async_ops.error_capture.mm))
1351                         goto mm_closed;
1352                 kthread_use_mm(vm->async_ops.error_capture.mm);
1353         }
1354
1355         if (copy_to_user(address, &capture, sizeof(capture)))
1356                 XE_WARN_ON("Copy to user failed");
1357
1358         if (in_kthread) {
1359                 kthread_unuse_mm(vm->async_ops.error_capture.mm);
1360                 mmput(vm->async_ops.error_capture.mm);
1361         }
1362
1363 mm_closed:
1364         wake_up_all(&vm->async_ops.error_capture.wq);
1365 }
1366
1367 void xe_vm_close_and_put(struct xe_vm *vm)
1368 {
1369         struct rb_root contested = RB_ROOT;
1370         struct ww_acquire_ctx ww;
1371         struct xe_device *xe = vm->xe;
1372         struct xe_tile *tile;
1373         u8 id;
1374
1375         XE_BUG_ON(vm->preempt.num_engines);
1376
1377         vm->size = 0;
1378         smp_mb();
1379         flush_async_ops(vm);
1380         if (xe_vm_in_compute_mode(vm))
1381                 flush_work(&vm->preempt.rebind_work);
1382
1383         for_each_tile(tile, xe, id) {
1384                 if (vm->eng[id]) {
1385                         xe_engine_kill(vm->eng[id]);
1386                         xe_engine_put(vm->eng[id]);
1387                         vm->eng[id] = NULL;
1388                 }
1389         }
1390
1391         down_write(&vm->lock);
1392         xe_vm_lock(vm, &ww, 0, false);
1393         while (vm->vmas.rb_node) {
1394                 struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node);
1395
1396                 if (xe_vma_is_userptr(vma)) {
1397                         down_read(&vm->userptr.notifier_lock);
1398                         vma->destroyed = true;
1399                         up_read(&vm->userptr.notifier_lock);
1400                 }
1401
1402                 rb_erase(&vma->vm_node, &vm->vmas);
1403
1404                 /* easy case, remove from VMA? */
1405                 if (xe_vma_is_userptr(vma) || vma->bo->vm) {
1406                         xe_vma_destroy(vma, NULL);
1407                         continue;
1408                 }
1409
1410                 rb_add(&vma->vm_node, &contested, xe_vma_less_cb);
1411         }
1412
1413         /*
1414          * All vm operations will add shared fences to resv.
1415          * The only exception is eviction for a shared object,
1416          * but even so, the unbind when evicted would still
1417          * install a fence to resv. Hence it's safe to
1418          * destroy the pagetables immediately.
1419          */
1420         for_each_tile(tile, xe, id) {
1421                 if (vm->scratch_bo[id]) {
1422                         u32 i;
1423
1424                         xe_bo_unpin(vm->scratch_bo[id]);
1425                         xe_bo_put(vm->scratch_bo[id]);
1426                         for (i = 0; i < vm->pt_root[id]->level; i++)
1427                                 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags,
1428                                               NULL);
1429                 }
1430         }
1431         xe_vm_unlock(vm, &ww);
1432
1433         if (contested.rb_node) {
1434
1435                 /*
1436                  * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1437                  * Since we hold a refcount to the bo, we can remove and free
1438                  * the members safely without locking.
1439                  */
1440                 while (contested.rb_node) {
1441                         struct xe_vma *vma = to_xe_vma(contested.rb_node);
1442
1443                         rb_erase(&vma->vm_node, &contested);
1444                         xe_vma_destroy_unlocked(vma);
1445                 }
1446         }
1447
1448         if (vm->async_ops.error_capture.addr)
1449                 wake_up_all(&vm->async_ops.error_capture.wq);
1450
1451         XE_WARN_ON(!list_empty(&vm->extobj.list));
1452         up_write(&vm->lock);
1453
1454         mutex_lock(&xe->usm.lock);
1455         if (vm->flags & XE_VM_FLAG_FAULT_MODE)
1456                 xe->usm.num_vm_in_fault_mode--;
1457         else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
1458                 xe->usm.num_vm_in_non_fault_mode--;
1459         mutex_unlock(&xe->usm.lock);
1460
1461         xe_vm_put(vm);
1462 }
1463
1464 static void vm_destroy_work_func(struct work_struct *w)
1465 {
1466         struct xe_vm *vm =
1467                 container_of(w, struct xe_vm, destroy_work);
1468         struct ww_acquire_ctx ww;
1469         struct xe_device *xe = vm->xe;
1470         struct xe_tile *tile;
1471         u8 id;
1472         void *lookup;
1473
1474         /* xe_vm_close_and_put was not called? */
1475         XE_WARN_ON(vm->size);
1476
1477         if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
1478                 xe_device_mem_access_put(xe);
1479                 xe_pm_runtime_put(xe);
1480
1481                 if (xe->info.has_asid) {
1482                         mutex_lock(&xe->usm.lock);
1483                         lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1484                         XE_WARN_ON(lookup != vm);
1485                         mutex_unlock(&xe->usm.lock);
1486                 }
1487         }
1488
1489         /*
1490          * XXX: We delay destroying the PT root until the VM if freed as PT root
1491          * is needed for xe_vm_lock to work. If we remove that dependency this
1492          * can be moved to xe_vm_close_and_put.
1493          */
1494         xe_vm_lock(vm, &ww, 0, false);
1495         for_each_tile(tile, xe, id) {
1496                 if (vm->pt_root[id]) {
1497                         xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1498                         vm->pt_root[id] = NULL;
1499                 }
1500         }
1501         xe_vm_unlock(vm, &ww);
1502
1503         trace_xe_vm_free(vm);
1504         dma_fence_put(vm->rebind_fence);
1505         dma_resv_fini(&vm->resv);
1506         kfree(vm);
1507 }
1508
1509 void xe_vm_free(struct kref *ref)
1510 {
1511         struct xe_vm *vm = container_of(ref, struct xe_vm, refcount);
1512
1513         /* To destroy the VM we need to be able to sleep */
1514         queue_work(system_unbound_wq, &vm->destroy_work);
1515 }
1516
1517 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1518 {
1519         struct xe_vm *vm;
1520
1521         mutex_lock(&xef->vm.lock);
1522         vm = xa_load(&xef->vm.xa, id);
1523         mutex_unlock(&xef->vm.lock);
1524
1525         if (vm)
1526                 xe_vm_get(vm);
1527
1528         return vm;
1529 }
1530
1531 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1532 {
1533         return gen8_pde_encode(vm->pt_root[tile->id]->bo, 0,
1534                                XE_CACHE_WB);
1535 }
1536
1537 static struct dma_fence *
1538 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
1539                  struct xe_sync_entry *syncs, u32 num_syncs)
1540 {
1541         struct xe_tile *tile;
1542         struct dma_fence *fence = NULL;
1543         struct dma_fence **fences = NULL;
1544         struct dma_fence_array *cf = NULL;
1545         struct xe_vm *vm = vma->vm;
1546         int cur_fence = 0, i;
1547         int number_tiles = hweight_long(vma->tile_present);
1548         int err;
1549         u8 id;
1550
1551         trace_xe_vma_unbind(vma);
1552
1553         if (number_tiles > 1) {
1554                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1555                                        GFP_KERNEL);
1556                 if (!fences)
1557                         return ERR_PTR(-ENOMEM);
1558         }
1559
1560         for_each_tile(tile, vm->xe, id) {
1561                 if (!(vma->tile_present & BIT(id)))
1562                         goto next;
1563
1564                 fence = __xe_pt_unbind_vma(tile, vma, e, syncs, num_syncs);
1565                 if (IS_ERR(fence)) {
1566                         err = PTR_ERR(fence);
1567                         goto err_fences;
1568                 }
1569
1570                 if (fences)
1571                         fences[cur_fence++] = fence;
1572
1573 next:
1574                 if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
1575                         e = list_next_entry(e, multi_gt_list);
1576         }
1577
1578         if (fences) {
1579                 cf = dma_fence_array_create(number_tiles, fences,
1580                                             vm->composite_fence_ctx,
1581                                             vm->composite_fence_seqno++,
1582                                             false);
1583                 if (!cf) {
1584                         --vm->composite_fence_seqno;
1585                         err = -ENOMEM;
1586                         goto err_fences;
1587                 }
1588         }
1589
1590         for (i = 0; i < num_syncs; i++)
1591                 xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
1592
1593         return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
1594
1595 err_fences:
1596         if (fences) {
1597                 while (cur_fence) {
1598                         /* FIXME: Rewind the previous binds? */
1599                         dma_fence_put(fences[--cur_fence]);
1600                 }
1601                 kfree(fences);
1602         }
1603
1604         return ERR_PTR(err);
1605 }
1606
1607 static struct dma_fence *
1608 xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
1609                struct xe_sync_entry *syncs, u32 num_syncs)
1610 {
1611         struct xe_tile *tile;
1612         struct dma_fence *fence;
1613         struct dma_fence **fences = NULL;
1614         struct dma_fence_array *cf = NULL;
1615         struct xe_vm *vm = vma->vm;
1616         int cur_fence = 0, i;
1617         int number_tiles = hweight_long(vma->tile_mask);
1618         int err;
1619         u8 id;
1620
1621         trace_xe_vma_bind(vma);
1622
1623         if (number_tiles > 1) {
1624                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1625                                        GFP_KERNEL);
1626                 if (!fences)
1627                         return ERR_PTR(-ENOMEM);
1628         }
1629
1630         for_each_tile(tile, vm->xe, id) {
1631                 if (!(vma->tile_mask & BIT(id)))
1632                         goto next;
1633
1634                 fence = __xe_pt_bind_vma(tile, vma, e, syncs, num_syncs,
1635                                          vma->tile_present & BIT(id));
1636                 if (IS_ERR(fence)) {
1637                         err = PTR_ERR(fence);
1638                         goto err_fences;
1639                 }
1640
1641                 if (fences)
1642                         fences[cur_fence++] = fence;
1643
1644 next:
1645                 if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
1646                         e = list_next_entry(e, multi_gt_list);
1647         }
1648
1649         if (fences) {
1650                 cf = dma_fence_array_create(number_tiles, fences,
1651                                             vm->composite_fence_ctx,
1652                                             vm->composite_fence_seqno++,
1653                                             false);
1654                 if (!cf) {
1655                         --vm->composite_fence_seqno;
1656                         err = -ENOMEM;
1657                         goto err_fences;
1658                 }
1659         }
1660
1661         for (i = 0; i < num_syncs; i++)
1662                 xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
1663
1664         return cf ? &cf->base : fence;
1665
1666 err_fences:
1667         if (fences) {
1668                 while (cur_fence) {
1669                         /* FIXME: Rewind the previous binds? */
1670                         dma_fence_put(fences[--cur_fence]);
1671                 }
1672                 kfree(fences);
1673         }
1674
1675         return ERR_PTR(err);
1676 }
1677
1678 struct async_op_fence {
1679         struct dma_fence fence;
1680         struct dma_fence *wait_fence;
1681         struct dma_fence_cb cb;
1682         struct xe_vm *vm;
1683         wait_queue_head_t wq;
1684         bool started;
1685 };
1686
1687 static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence)
1688 {
1689         return "xe";
1690 }
1691
1692 static const char *
1693 async_op_fence_get_timeline_name(struct dma_fence *dma_fence)
1694 {
1695         return "async_op_fence";
1696 }
1697
1698 static const struct dma_fence_ops async_op_fence_ops = {
1699         .get_driver_name = async_op_fence_get_driver_name,
1700         .get_timeline_name = async_op_fence_get_timeline_name,
1701 };
1702
1703 static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
1704 {
1705         struct async_op_fence *afence =
1706                 container_of(cb, struct async_op_fence, cb);
1707
1708         afence->fence.error = afence->wait_fence->error;
1709         dma_fence_signal(&afence->fence);
1710         xe_vm_put(afence->vm);
1711         dma_fence_put(afence->wait_fence);
1712         dma_fence_put(&afence->fence);
1713 }
1714
1715 static void add_async_op_fence_cb(struct xe_vm *vm,
1716                                   struct dma_fence *fence,
1717                                   struct async_op_fence *afence)
1718 {
1719         int ret;
1720
1721         if (!xe_vm_no_dma_fences(vm)) {
1722                 afence->started = true;
1723                 smp_wmb();
1724                 wake_up_all(&afence->wq);
1725         }
1726
1727         afence->wait_fence = dma_fence_get(fence);
1728         afence->vm = xe_vm_get(vm);
1729         dma_fence_get(&afence->fence);
1730         ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb);
1731         if (ret == -ENOENT) {
1732                 afence->fence.error = afence->wait_fence->error;
1733                 dma_fence_signal(&afence->fence);
1734         }
1735         if (ret) {
1736                 xe_vm_put(vm);
1737                 dma_fence_put(afence->wait_fence);
1738                 dma_fence_put(&afence->fence);
1739         }
1740         XE_WARN_ON(ret && ret != -ENOENT);
1741 }
1742
1743 int xe_vm_async_fence_wait_start(struct dma_fence *fence)
1744 {
1745         if (fence->ops == &async_op_fence_ops) {
1746                 struct async_op_fence *afence =
1747                         container_of(fence, struct async_op_fence, fence);
1748
1749                 XE_BUG_ON(xe_vm_no_dma_fences(afence->vm));
1750
1751                 smp_rmb();
1752                 return wait_event_interruptible(afence->wq, afence->started);
1753         }
1754
1755         return 0;
1756 }
1757
1758 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
1759                         struct xe_engine *e, struct xe_sync_entry *syncs,
1760                         u32 num_syncs, struct async_op_fence *afence)
1761 {
1762         struct dma_fence *fence;
1763
1764         xe_vm_assert_held(vm);
1765
1766         fence = xe_vm_bind_vma(vma, e, syncs, num_syncs);
1767         if (IS_ERR(fence))
1768                 return PTR_ERR(fence);
1769         if (afence)
1770                 add_async_op_fence_cb(vm, fence, afence);
1771
1772         dma_fence_put(fence);
1773         return 0;
1774 }
1775
1776 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e,
1777                       struct xe_bo *bo, struct xe_sync_entry *syncs,
1778                       u32 num_syncs, struct async_op_fence *afence)
1779 {
1780         int err;
1781
1782         xe_vm_assert_held(vm);
1783         xe_bo_assert_held(bo);
1784
1785         if (bo) {
1786                 err = xe_bo_validate(bo, vm, true);
1787                 if (err)
1788                         return err;
1789         }
1790
1791         return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence);
1792 }
1793
1794 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
1795                         struct xe_engine *e, struct xe_sync_entry *syncs,
1796                         u32 num_syncs, struct async_op_fence *afence)
1797 {
1798         struct dma_fence *fence;
1799
1800         xe_vm_assert_held(vm);
1801         xe_bo_assert_held(vma->bo);
1802
1803         fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs);
1804         if (IS_ERR(fence))
1805                 return PTR_ERR(fence);
1806         if (afence)
1807                 add_async_op_fence_cb(vm, fence, afence);
1808
1809         xe_vma_destroy(vma, fence);
1810         dma_fence_put(fence);
1811
1812         return 0;
1813 }
1814
1815 static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm,
1816                                         u64 value)
1817 {
1818         if (XE_IOCTL_ERR(xe, !value))
1819                 return -EINVAL;
1820
1821         if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
1822                 return -ENOTSUPP;
1823
1824         if (XE_IOCTL_ERR(xe, vm->async_ops.error_capture.addr))
1825                 return -ENOTSUPP;
1826
1827         vm->async_ops.error_capture.mm = current->mm;
1828         vm->async_ops.error_capture.addr = value;
1829         init_waitqueue_head(&vm->async_ops.error_capture.wq);
1830
1831         return 0;
1832 }
1833
1834 typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm,
1835                                      u64 value);
1836
1837 static const xe_vm_set_property_fn vm_set_property_funcs[] = {
1838         [XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] =
1839                 vm_set_error_capture_address,
1840 };
1841
1842 static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
1843                                     u64 extension)
1844 {
1845         u64 __user *address = u64_to_user_ptr(extension);
1846         struct drm_xe_ext_vm_set_property ext;
1847         int err;
1848
1849         err = __copy_from_user(&ext, address, sizeof(ext));
1850         if (XE_IOCTL_ERR(xe, err))
1851                 return -EFAULT;
1852
1853         if (XE_IOCTL_ERR(xe, ext.property >=
1854                          ARRAY_SIZE(vm_set_property_funcs)) ||
1855             XE_IOCTL_ERR(xe, ext.pad) ||
1856             XE_IOCTL_ERR(xe, ext.reserved[0] || ext.reserved[1]))
1857                 return -EINVAL;
1858
1859         return vm_set_property_funcs[ext.property](xe, vm, ext.value);
1860 }
1861
1862 typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm,
1863                                        u64 extension);
1864
1865 static const xe_vm_set_property_fn vm_user_extension_funcs[] = {
1866         [XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property,
1867 };
1868
1869 #define MAX_USER_EXTENSIONS     16
1870 static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm,
1871                               u64 extensions, int ext_number)
1872 {
1873         u64 __user *address = u64_to_user_ptr(extensions);
1874         struct xe_user_extension ext;
1875         int err;
1876
1877         if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS))
1878                 return -E2BIG;
1879
1880         err = __copy_from_user(&ext, address, sizeof(ext));
1881         if (XE_IOCTL_ERR(xe, err))
1882                 return -EFAULT;
1883
1884         if (XE_IOCTL_ERR(xe, ext.pad) ||
1885             XE_IOCTL_ERR(xe, ext.name >=
1886                          ARRAY_SIZE(vm_user_extension_funcs)))
1887                 return -EINVAL;
1888
1889         err = vm_user_extension_funcs[ext.name](xe, vm, extensions);
1890         if (XE_IOCTL_ERR(xe, err))
1891                 return err;
1892
1893         if (ext.next_extension)
1894                 return vm_user_extensions(xe, vm, ext.next_extension,
1895                                           ++ext_number);
1896
1897         return 0;
1898 }
1899
1900 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \
1901                                     DRM_XE_VM_CREATE_COMPUTE_MODE | \
1902                                     DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \
1903                                     DRM_XE_VM_CREATE_FAULT_MODE)
1904
1905 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1906                        struct drm_file *file)
1907 {
1908         struct xe_device *xe = to_xe_device(dev);
1909         struct xe_file *xef = to_xe_file(file);
1910         struct drm_xe_vm_create *args = data;
1911         struct xe_vm *vm;
1912         u32 id, asid;
1913         int err;
1914         u32 flags = 0;
1915
1916         if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
1917                 return -EINVAL;
1918
1919         if (XE_IOCTL_ERR(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1920                 return -EINVAL;
1921
1922         if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE &&
1923                          args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
1924                 return -EINVAL;
1925
1926         if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE &&
1927                          args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
1928                 return -EINVAL;
1929
1930         if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
1931                          xe_device_in_non_fault_mode(xe)))
1932                 return -EINVAL;
1933
1934         if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) &&
1935                          xe_device_in_fault_mode(xe)))
1936                 return -EINVAL;
1937
1938         if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
1939                          !xe->info.supports_usm))
1940                 return -EINVAL;
1941
1942         if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE)
1943                 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1944         if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE)
1945                 flags |= XE_VM_FLAG_COMPUTE_MODE;
1946         if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS)
1947                 flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
1948         if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE)
1949                 flags |= XE_VM_FLAG_FAULT_MODE;
1950
1951         vm = xe_vm_create(xe, flags);
1952         if (IS_ERR(vm))
1953                 return PTR_ERR(vm);
1954
1955         if (args->extensions) {
1956                 err = vm_user_extensions(xe, vm, args->extensions, 0);
1957                 if (XE_IOCTL_ERR(xe, err)) {
1958                         xe_vm_close_and_put(vm);
1959                         return err;
1960                 }
1961         }
1962
1963         mutex_lock(&xef->vm.lock);
1964         err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1965         mutex_unlock(&xef->vm.lock);
1966         if (err) {
1967                 xe_vm_close_and_put(vm);
1968                 return err;
1969         }
1970
1971         if (xe->info.has_asid) {
1972                 mutex_lock(&xe->usm.lock);
1973                 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1974                                       XA_LIMIT(0, XE_MAX_ASID - 1),
1975                                       &xe->usm.next_asid, GFP_KERNEL);
1976                 mutex_unlock(&xe->usm.lock);
1977                 if (err) {
1978                         xe_vm_close_and_put(vm);
1979                         return err;
1980                 }
1981                 vm->usm.asid = asid;
1982         }
1983
1984         args->vm_id = id;
1985
1986 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1987         /* Warning: Security issue - never enable by default */
1988         args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1989 #endif
1990
1991         return 0;
1992 }
1993
1994 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1995                         struct drm_file *file)
1996 {
1997         struct xe_device *xe = to_xe_device(dev);
1998         struct xe_file *xef = to_xe_file(file);
1999         struct drm_xe_vm_destroy *args = data;
2000         struct xe_vm *vm;
2001
2002         if (XE_IOCTL_ERR(xe, args->pad) ||
2003             XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
2004                 return -EINVAL;
2005
2006         vm = xe_vm_lookup(xef, args->vm_id);
2007         if (XE_IOCTL_ERR(xe, !vm))
2008                 return -ENOENT;
2009         xe_vm_put(vm);
2010
2011         /* FIXME: Extend this check to non-compute mode VMs */
2012         if (XE_IOCTL_ERR(xe, vm->preempt.num_engines))
2013                 return -EBUSY;
2014
2015         mutex_lock(&xef->vm.lock);
2016         xa_erase(&xef->vm.xa, args->vm_id);
2017         mutex_unlock(&xef->vm.lock);
2018
2019         xe_vm_close_and_put(vm);
2020
2021         return 0;
2022 }
2023
2024 static const u32 region_to_mem_type[] = {
2025         XE_PL_TT,
2026         XE_PL_VRAM0,
2027         XE_PL_VRAM1,
2028 };
2029
2030 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
2031                           struct xe_engine *e, u32 region,
2032                           struct xe_sync_entry *syncs, u32 num_syncs,
2033                           struct async_op_fence *afence)
2034 {
2035         int err;
2036
2037         XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type));
2038
2039         if (!xe_vma_is_userptr(vma)) {
2040                 err = xe_bo_migrate(vma->bo, region_to_mem_type[region]);
2041                 if (err)
2042                         return err;
2043         }
2044
2045         if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
2046                 return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs,
2047                                   afence);
2048         } else {
2049                 int i;
2050
2051                 /* Nothing to do, signal fences now */
2052                 for (i = 0; i < num_syncs; i++)
2053                         xe_sync_entry_signal(&syncs[i], NULL,
2054                                              dma_fence_get_stub());
2055                 if (afence)
2056                         dma_fence_signal(&afence->fence);
2057                 return 0;
2058         }
2059 }
2060
2061 #define VM_BIND_OP(op)  (op & 0xffff)
2062
2063 static int __vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
2064                            struct xe_engine *e, struct xe_bo *bo, u32 op,
2065                            u32 region, struct xe_sync_entry *syncs,
2066                            u32 num_syncs, struct async_op_fence *afence)
2067 {
2068         switch (VM_BIND_OP(op)) {
2069         case XE_VM_BIND_OP_MAP:
2070                 return xe_vm_bind(vm, vma, e, bo, syncs, num_syncs, afence);
2071         case XE_VM_BIND_OP_UNMAP:
2072         case XE_VM_BIND_OP_UNMAP_ALL:
2073                 return xe_vm_unbind(vm, vma, e, syncs, num_syncs, afence);
2074         case XE_VM_BIND_OP_MAP_USERPTR:
2075                 return xe_vm_bind(vm, vma, e, NULL, syncs, num_syncs, afence);
2076         case XE_VM_BIND_OP_PREFETCH:
2077                 return xe_vm_prefetch(vm, vma, e, region, syncs, num_syncs,
2078                                       afence);
2079                 break;
2080         default:
2081                 XE_BUG_ON("NOT POSSIBLE");
2082                 return -EINVAL;
2083         }
2084 }
2085
2086 struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm)
2087 {
2088         int idx = vm->flags & XE_VM_FLAG_MIGRATION ?
2089                 XE_VM_FLAG_GT_ID(vm->flags) : 0;
2090
2091         /* Safe to use index 0 as all BO in the VM share a single dma-resv lock */
2092         return &vm->pt_root[idx]->bo->ttm;
2093 }
2094
2095 static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv)
2096 {
2097         tv->num_shared = 1;
2098         tv->bo = xe_vm_ttm_bo(vm);
2099 }
2100
2101 static bool is_map_op(u32 op)
2102 {
2103         return VM_BIND_OP(op) == XE_VM_BIND_OP_MAP ||
2104                 VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR;
2105 }
2106
2107 static bool is_unmap_op(u32 op)
2108 {
2109         return VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP ||
2110                 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL;
2111 }
2112
2113 static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
2114                          struct xe_engine *e, struct xe_bo *bo,
2115                          struct drm_xe_vm_bind_op *bind_op,
2116                          struct xe_sync_entry *syncs, u32 num_syncs,
2117                          struct async_op_fence *afence)
2118 {
2119         LIST_HEAD(objs);
2120         LIST_HEAD(dups);
2121         struct ttm_validate_buffer tv_bo, tv_vm;
2122         struct ww_acquire_ctx ww;
2123         struct xe_bo *vbo;
2124         int err, i;
2125
2126         lockdep_assert_held(&vm->lock);
2127         XE_BUG_ON(!list_empty(&vma->unbind_link));
2128
2129         /* Binds deferred to faults, signal fences now */
2130         if (xe_vm_in_fault_mode(vm) && is_map_op(bind_op->op) &&
2131             !(bind_op->op & XE_VM_BIND_FLAG_IMMEDIATE)) {
2132                 for (i = 0; i < num_syncs; i++)
2133                         xe_sync_entry_signal(&syncs[i], NULL,
2134                                              dma_fence_get_stub());
2135                 if (afence)
2136                         dma_fence_signal(&afence->fence);
2137                 return 0;
2138         }
2139
2140         xe_vm_tv_populate(vm, &tv_vm);
2141         list_add_tail(&tv_vm.head, &objs);
2142         vbo = vma->bo;
2143         if (vbo) {
2144                 /*
2145                  * An unbind can drop the last reference to the BO and
2146                  * the BO is needed for ttm_eu_backoff_reservation so
2147                  * take a reference here.
2148                  */
2149                 xe_bo_get(vbo);
2150
2151                 tv_bo.bo = &vbo->ttm;
2152                 tv_bo.num_shared = 1;
2153                 list_add(&tv_bo.head, &objs);
2154         }
2155
2156 again:
2157         err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups);
2158         if (!err) {
2159                 err = __vm_bind_ioctl(vm, vma, e, bo,
2160                                       bind_op->op, bind_op->region, syncs,
2161                                       num_syncs, afence);
2162                 ttm_eu_backoff_reservation(&ww, &objs);
2163                 if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
2164                         lockdep_assert_held_write(&vm->lock);
2165                         err = xe_vma_userptr_pin_pages(vma);
2166                         if (!err)
2167                                 goto again;
2168                 }
2169         }
2170         xe_bo_put(vbo);
2171
2172         return err;
2173 }
2174
2175 struct async_op {
2176         struct xe_vma *vma;
2177         struct xe_engine *engine;
2178         struct xe_bo *bo;
2179         struct drm_xe_vm_bind_op bind_op;
2180         struct xe_sync_entry *syncs;
2181         u32 num_syncs;
2182         struct list_head link;
2183         struct async_op_fence *fence;
2184 };
2185
2186 static void async_op_cleanup(struct xe_vm *vm, struct async_op *op)
2187 {
2188         while (op->num_syncs--)
2189                 xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
2190         kfree(op->syncs);
2191         xe_bo_put(op->bo);
2192         if (op->engine)
2193                 xe_engine_put(op->engine);
2194         xe_vm_put(vm);
2195         if (op->fence)
2196                 dma_fence_put(&op->fence->fence);
2197         kfree(op);
2198 }
2199
2200 static struct async_op *next_async_op(struct xe_vm *vm)
2201 {
2202         return list_first_entry_or_null(&vm->async_ops.pending,
2203                                         struct async_op, link);
2204 }
2205
2206 static void vm_set_async_error(struct xe_vm *vm, int err)
2207 {
2208         lockdep_assert_held(&vm->lock);
2209         vm->async_ops.error = err;
2210 }
2211
2212 static void async_op_work_func(struct work_struct *w)
2213 {
2214         struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work);
2215
2216         for (;;) {
2217                 struct async_op *op;
2218                 int err;
2219
2220                 if (vm->async_ops.error && !xe_vm_is_closed(vm))
2221                         break;
2222
2223                 spin_lock_irq(&vm->async_ops.lock);
2224                 op = next_async_op(vm);
2225                 if (op)
2226                         list_del_init(&op->link);
2227                 spin_unlock_irq(&vm->async_ops.lock);
2228
2229                 if (!op)
2230                         break;
2231
2232                 if (!xe_vm_is_closed(vm)) {
2233                         bool first, last;
2234
2235                         down_write(&vm->lock);
2236 again:
2237                         first = op->vma->first_munmap_rebind;
2238                         last = op->vma->last_munmap_rebind;
2239 #ifdef TEST_VM_ASYNC_OPS_ERROR
2240 #define FORCE_ASYNC_OP_ERROR    BIT(31)
2241                         if (!(op->bind_op.op & FORCE_ASYNC_OP_ERROR)) {
2242                                 err = vm_bind_ioctl(vm, op->vma, op->engine,
2243                                                     op->bo, &op->bind_op,
2244                                                     op->syncs, op->num_syncs,
2245                                                     op->fence);
2246                         } else {
2247                                 err = -ENOMEM;
2248                                 op->bind_op.op &= ~FORCE_ASYNC_OP_ERROR;
2249                         }
2250 #else
2251                         err = vm_bind_ioctl(vm, op->vma, op->engine, op->bo,
2252                                             &op->bind_op, op->syncs,
2253                                             op->num_syncs, op->fence);
2254 #endif
2255                         /*
2256                          * In order for the fencing to work (stall behind
2257                          * existing jobs / prevent new jobs from running) all
2258                          * the dma-resv slots need to be programmed in a batch
2259                          * relative to execs / the rebind worker. The vm->lock
2260                          * ensure this.
2261                          */
2262                         if (!err && ((first && VM_BIND_OP(op->bind_op.op) ==
2263                                       XE_VM_BIND_OP_UNMAP) ||
2264                                      vm->async_ops.munmap_rebind_inflight)) {
2265                                 if (last) {
2266                                         op->vma->last_munmap_rebind = false;
2267                                         vm->async_ops.munmap_rebind_inflight =
2268                                                 false;
2269                                 } else {
2270                                         vm->async_ops.munmap_rebind_inflight =
2271                                                 true;
2272
2273                                         async_op_cleanup(vm, op);
2274
2275                                         spin_lock_irq(&vm->async_ops.lock);
2276                                         op = next_async_op(vm);
2277                                         XE_BUG_ON(!op);
2278                                         list_del_init(&op->link);
2279                                         spin_unlock_irq(&vm->async_ops.lock);
2280
2281                                         goto again;
2282                                 }
2283                         }
2284                         if (err) {
2285                                 trace_xe_vma_fail(op->vma);
2286                                 drm_warn(&vm->xe->drm, "Async VM op(%d) failed with %d",
2287                                          VM_BIND_OP(op->bind_op.op),
2288                                          err);
2289
2290                                 spin_lock_irq(&vm->async_ops.lock);
2291                                 list_add(&op->link, &vm->async_ops.pending);
2292                                 spin_unlock_irq(&vm->async_ops.lock);
2293
2294                                 vm_set_async_error(vm, err);
2295                                 up_write(&vm->lock);
2296
2297                                 if (vm->async_ops.error_capture.addr)
2298                                         vm_error_capture(vm, err,
2299                                                          op->bind_op.op,
2300                                                          op->bind_op.addr,
2301                                                          op->bind_op.range);
2302                                 break;
2303                         }
2304                         up_write(&vm->lock);
2305                 } else {
2306                         trace_xe_vma_flush(op->vma);
2307
2308                         if (is_unmap_op(op->bind_op.op)) {
2309                                 down_write(&vm->lock);
2310                                 xe_vma_destroy_unlocked(op->vma);
2311                                 up_write(&vm->lock);
2312                         }
2313
2314                         if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
2315                                                    &op->fence->fence.flags)) {
2316                                 if (!xe_vm_no_dma_fences(vm)) {
2317                                         op->fence->started = true;
2318                                         smp_wmb();
2319                                         wake_up_all(&op->fence->wq);
2320                                 }
2321                                 dma_fence_signal(&op->fence->fence);
2322                         }
2323                 }
2324
2325                 async_op_cleanup(vm, op);
2326         }
2327 }
2328
2329 static int __vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
2330                                  struct xe_engine *e, struct xe_bo *bo,
2331                                  struct drm_xe_vm_bind_op *bind_op,
2332                                  struct xe_sync_entry *syncs, u32 num_syncs)
2333 {
2334         struct async_op *op;
2335         bool installed = false;
2336         u64 seqno;
2337         int i;
2338
2339         lockdep_assert_held(&vm->lock);
2340
2341         op = kmalloc(sizeof(*op), GFP_KERNEL);
2342         if (!op) {
2343                 return -ENOMEM;
2344         }
2345
2346         if (num_syncs) {
2347                 op->fence = kmalloc(sizeof(*op->fence), GFP_KERNEL);
2348                 if (!op->fence) {
2349                         kfree(op);
2350                         return -ENOMEM;
2351                 }
2352
2353                 seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno;
2354                 dma_fence_init(&op->fence->fence, &async_op_fence_ops,
2355                                &vm->async_ops.lock, e ? e->bind.fence_ctx :
2356                                vm->async_ops.fence.context, seqno);
2357
2358                 if (!xe_vm_no_dma_fences(vm)) {
2359                         op->fence->vm = vm;
2360                         op->fence->started = false;
2361                         init_waitqueue_head(&op->fence->wq);
2362                 }
2363         } else {
2364                 op->fence = NULL;
2365         }
2366         op->vma = vma;
2367         op->engine = e;
2368         op->bo = bo;
2369         op->bind_op = *bind_op;
2370         op->syncs = syncs;
2371         op->num_syncs = num_syncs;
2372         INIT_LIST_HEAD(&op->link);
2373
2374         for (i = 0; i < num_syncs; i++)
2375                 installed |= xe_sync_entry_signal(&syncs[i], NULL,
2376                                                   &op->fence->fence);
2377
2378         if (!installed && op->fence)
2379                 dma_fence_signal(&op->fence->fence);
2380
2381         spin_lock_irq(&vm->async_ops.lock);
2382         list_add_tail(&op->link, &vm->async_ops.pending);
2383         spin_unlock_irq(&vm->async_ops.lock);
2384
2385         if (!vm->async_ops.error)
2386                 queue_work(system_unbound_wq, &vm->async_ops.work);
2387
2388         return 0;
2389 }
2390
2391 static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
2392                                struct xe_engine *e, struct xe_bo *bo,
2393                                struct drm_xe_vm_bind_op *bind_op,
2394                                struct xe_sync_entry *syncs, u32 num_syncs)
2395 {
2396         struct xe_vma *__vma, *next;
2397         struct list_head rebind_list;
2398         struct xe_sync_entry *in_syncs = NULL, *out_syncs = NULL;
2399         u32 num_in_syncs = 0, num_out_syncs = 0;
2400         bool first = true, last;
2401         int err;
2402         int i;
2403
2404         lockdep_assert_held(&vm->lock);
2405
2406         /* Not a linked list of unbinds + rebinds, easy */
2407         if (list_empty(&vma->unbind_link))
2408                 return __vm_bind_ioctl_async(vm, vma, e, bo, bind_op,
2409                                              syncs, num_syncs);
2410
2411         /*
2412          * Linked list of unbinds + rebinds, decompose syncs into 'in / out'
2413          * passing the 'in' to the first operation and 'out' to the last. Also
2414          * the reference counting is a little tricky, increment the VM / bind
2415          * engine ref count on all but the last operation and increment the BOs
2416          * ref count on each rebind.
2417          */
2418
2419         XE_BUG_ON(VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP &&
2420                   VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP_ALL &&
2421                   VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_PREFETCH);
2422
2423         /* Decompose syncs */
2424         if (num_syncs) {
2425                 in_syncs = kmalloc(sizeof(*in_syncs) * num_syncs, GFP_KERNEL);
2426                 out_syncs = kmalloc(sizeof(*out_syncs) * num_syncs, GFP_KERNEL);
2427                 if (!in_syncs || !out_syncs) {
2428                         err = -ENOMEM;
2429                         goto out_error;
2430                 }
2431
2432                 for (i = 0; i < num_syncs; ++i) {
2433                         bool signal = syncs[i].flags & DRM_XE_SYNC_SIGNAL;
2434
2435                         if (signal)
2436                                 out_syncs[num_out_syncs++] = syncs[i];
2437                         else
2438                                 in_syncs[num_in_syncs++] = syncs[i];
2439                 }
2440         }
2441
2442         /* Do unbinds + move rebinds to new list */
2443         INIT_LIST_HEAD(&rebind_list);
2444         list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) {
2445                 if (__vma->destroyed ||
2446                     VM_BIND_OP(bind_op->op) == XE_VM_BIND_OP_PREFETCH) {
2447                         list_del_init(&__vma->unbind_link);
2448                         xe_bo_get(bo);
2449                         err = __vm_bind_ioctl_async(xe_vm_get(vm), __vma,
2450                                                     e ? xe_engine_get(e) : NULL,
2451                                                     bo, bind_op, first ?
2452                                                     in_syncs : NULL,
2453                                                     first ? num_in_syncs : 0);
2454                         if (err) {
2455                                 xe_bo_put(bo);
2456                                 xe_vm_put(vm);
2457                                 if (e)
2458                                         xe_engine_put(e);
2459                                 goto out_error;
2460                         }
2461                         in_syncs = NULL;
2462                         first = false;
2463                 } else {
2464                         list_move_tail(&__vma->unbind_link, &rebind_list);
2465                 }
2466         }
2467         last = list_empty(&rebind_list);
2468         if (!last) {
2469                 xe_vm_get(vm);
2470                 if (e)
2471                         xe_engine_get(e);
2472         }
2473         err = __vm_bind_ioctl_async(vm, vma, e,
2474                                     bo, bind_op,
2475                                     first ? in_syncs :
2476                                     last ? out_syncs : NULL,
2477                                     first ? num_in_syncs :
2478                                     last ? num_out_syncs : 0);
2479         if (err) {
2480                 if (!last) {
2481                         xe_vm_put(vm);
2482                         if (e)
2483                                 xe_engine_put(e);
2484                 }
2485                 goto out_error;
2486         }
2487         in_syncs = NULL;
2488
2489         /* Do rebinds */
2490         list_for_each_entry_safe(__vma, next, &rebind_list, unbind_link) {
2491                 list_del_init(&__vma->unbind_link);
2492                 last = list_empty(&rebind_list);
2493
2494                 if (xe_vma_is_userptr(__vma)) {
2495                         bind_op->op = XE_VM_BIND_FLAG_ASYNC |
2496                                 XE_VM_BIND_OP_MAP_USERPTR;
2497                 } else {
2498                         bind_op->op = XE_VM_BIND_FLAG_ASYNC |
2499                                 XE_VM_BIND_OP_MAP;
2500                         xe_bo_get(__vma->bo);
2501                 }
2502
2503                 if (!last) {
2504                         xe_vm_get(vm);
2505                         if (e)
2506                                 xe_engine_get(e);
2507                 }
2508
2509                 err = __vm_bind_ioctl_async(vm, __vma, e,
2510                                             __vma->bo, bind_op, last ?
2511                                             out_syncs : NULL,
2512                                             last ? num_out_syncs : 0);
2513                 if (err) {
2514                         if (!last) {
2515                                 xe_vm_put(vm);
2516                                 if (e)
2517                                         xe_engine_put(e);
2518                         }
2519                         goto out_error;
2520                 }
2521         }
2522
2523         kfree(syncs);
2524         return 0;
2525
2526 out_error:
2527         kfree(in_syncs);
2528         kfree(out_syncs);
2529         kfree(syncs);
2530
2531         return err;
2532 }
2533
2534 static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
2535                                       u64 addr, u64 range, u32 op)
2536 {
2537         struct xe_device *xe = vm->xe;
2538         struct xe_vma *vma, lookup;
2539         bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
2540
2541         lockdep_assert_held(&vm->lock);
2542
2543         lookup.start = addr;
2544         lookup.end = addr + range - 1;
2545
2546         switch (VM_BIND_OP(op)) {
2547         case XE_VM_BIND_OP_MAP:
2548         case XE_VM_BIND_OP_MAP_USERPTR:
2549                 vma = xe_vm_find_overlapping_vma(vm, &lookup);
2550                 if (XE_IOCTL_ERR(xe, vma))
2551                         return -EBUSY;
2552                 break;
2553         case XE_VM_BIND_OP_UNMAP:
2554         case XE_VM_BIND_OP_PREFETCH:
2555                 vma = xe_vm_find_overlapping_vma(vm, &lookup);
2556                 if (XE_IOCTL_ERR(xe, !vma) ||
2557                     XE_IOCTL_ERR(xe, (vma->start != addr ||
2558                                  vma->end != addr + range - 1) && !async))
2559                         return -EINVAL;
2560                 break;
2561         case XE_VM_BIND_OP_UNMAP_ALL:
2562                 break;
2563         default:
2564                 XE_BUG_ON("NOT POSSIBLE");
2565                 return -EINVAL;
2566         }
2567
2568         return 0;
2569 }
2570
2571 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma)
2572 {
2573         down_read(&vm->userptr.notifier_lock);
2574         vma->destroyed = true;
2575         up_read(&vm->userptr.notifier_lock);
2576         xe_vm_remove_vma(vm, vma);
2577 }
2578
2579 static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma *vma)
2580 {
2581         int err;
2582
2583         if (vma->bo && !vma->bo->vm) {
2584                 vm_insert_extobj(vm, vma);
2585                 err = add_preempt_fences(vm, vma->bo);
2586                 if (err)
2587                         return err;
2588         }
2589
2590         return 0;
2591 }
2592
2593 /*
2594  * Find all overlapping VMAs in lookup range and add to a list in the returned
2595  * VMA, all of VMAs found will be unbound. Also possibly add 2 new VMAs that
2596  * need to be bound if first / last VMAs are not fully unbound. This is akin to
2597  * how munmap works.
2598  */
2599 static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
2600                                             struct xe_vma *lookup)
2601 {
2602         struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup);
2603         struct rb_node *node;
2604         struct xe_vma *first = vma, *last = vma, *new_first = NULL,
2605                       *new_last = NULL, *__vma, *next;
2606         int err = 0;
2607         bool first_munmap_rebind = false;
2608
2609         lockdep_assert_held(&vm->lock);
2610         XE_BUG_ON(!vma);
2611
2612         node = &vma->vm_node;
2613         while ((node = rb_next(node))) {
2614                 if (!xe_vma_cmp_vma_cb(lookup, node)) {
2615                         __vma = to_xe_vma(node);
2616                         list_add_tail(&__vma->unbind_link, &vma->unbind_link);
2617                         last = __vma;
2618                 } else {
2619                         break;
2620                 }
2621         }
2622
2623         node = &vma->vm_node;
2624         while ((node = rb_prev(node))) {
2625                 if (!xe_vma_cmp_vma_cb(lookup, node)) {
2626                         __vma = to_xe_vma(node);
2627                         list_add(&__vma->unbind_link, &vma->unbind_link);
2628                         first = __vma;
2629                 } else {
2630                         break;
2631                 }
2632         }
2633
2634         if (first->start != lookup->start) {
2635                 struct ww_acquire_ctx ww;
2636
2637                 if (first->bo)
2638                         err = xe_bo_lock(first->bo, &ww, 0, true);
2639                 if (err)
2640                         goto unwind;
2641                 new_first = xe_vma_create(first->vm, first->bo,
2642                                           first->bo ? first->bo_offset :
2643                                           first->userptr.ptr,
2644                                           first->start,
2645                                           lookup->start - 1,
2646                                           (first->pte_flags &
2647                                            XE_PTE_FLAG_READ_ONLY),
2648                                           first->tile_mask);
2649                 if (first->bo)
2650                         xe_bo_unlock(first->bo, &ww);
2651                 if (!new_first) {
2652                         err = -ENOMEM;
2653                         goto unwind;
2654                 }
2655                 if (!first->bo) {
2656                         err = xe_vma_userptr_pin_pages(new_first);
2657                         if (err)
2658                                 goto unwind;
2659                 }
2660                 err = prep_replacement_vma(vm, new_first);
2661                 if (err)
2662                         goto unwind;
2663         }
2664
2665         if (last->end != lookup->end) {
2666                 struct ww_acquire_ctx ww;
2667                 u64 chunk = lookup->end + 1 - last->start;
2668
2669                 if (last->bo)
2670                         err = xe_bo_lock(last->bo, &ww, 0, true);
2671                 if (err)
2672                         goto unwind;
2673                 new_last = xe_vma_create(last->vm, last->bo,
2674                                          last->bo ? last->bo_offset + chunk :
2675                                          last->userptr.ptr + chunk,
2676                                          last->start + chunk,
2677                                          last->end,
2678                                          (last->pte_flags &
2679                                           XE_PTE_FLAG_READ_ONLY),
2680                                          last->tile_mask);
2681                 if (last->bo)
2682                         xe_bo_unlock(last->bo, &ww);
2683                 if (!new_last) {
2684                         err = -ENOMEM;
2685                         goto unwind;
2686                 }
2687                 if (!last->bo) {
2688                         err = xe_vma_userptr_pin_pages(new_last);
2689                         if (err)
2690                                 goto unwind;
2691                 }
2692                 err = prep_replacement_vma(vm, new_last);
2693                 if (err)
2694                         goto unwind;
2695         }
2696
2697         prep_vma_destroy(vm, vma);
2698         if (list_empty(&vma->unbind_link) && (new_first || new_last))
2699                 vma->first_munmap_rebind = true;
2700         list_for_each_entry(__vma, &vma->unbind_link, unbind_link) {
2701                 if ((new_first || new_last) && !first_munmap_rebind) {
2702                         __vma->first_munmap_rebind = true;
2703                         first_munmap_rebind = true;
2704                 }
2705                 prep_vma_destroy(vm, __vma);
2706         }
2707         if (new_first) {
2708                 xe_vm_insert_vma(vm, new_first);
2709                 list_add_tail(&new_first->unbind_link, &vma->unbind_link);
2710                 if (!new_last)
2711                         new_first->last_munmap_rebind = true;
2712         }
2713         if (new_last) {
2714                 xe_vm_insert_vma(vm, new_last);
2715                 list_add_tail(&new_last->unbind_link, &vma->unbind_link);
2716                 new_last->last_munmap_rebind = true;
2717         }
2718
2719         return vma;
2720
2721 unwind:
2722         list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link)
2723                 list_del_init(&__vma->unbind_link);
2724         if (new_last) {
2725                 prep_vma_destroy(vm, new_last);
2726                 xe_vma_destroy_unlocked(new_last);
2727         }
2728         if (new_first) {
2729                 prep_vma_destroy(vm, new_first);
2730                 xe_vma_destroy_unlocked(new_first);
2731         }
2732
2733         return ERR_PTR(err);
2734 }
2735
2736 /*
2737  * Similar to vm_unbind_lookup_vmas, find all VMAs in lookup range to prefetch
2738  */
2739 static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
2740                                               struct xe_vma *lookup,
2741                                               u32 region)
2742 {
2743         struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup), *__vma,
2744                       *next;
2745         struct rb_node *node;
2746
2747         if (!xe_vma_is_userptr(vma)) {
2748                 if (!xe_bo_can_migrate(vma->bo, region_to_mem_type[region]))
2749                         return ERR_PTR(-EINVAL);
2750         }
2751
2752         node = &vma->vm_node;
2753         while ((node = rb_next(node))) {
2754                 if (!xe_vma_cmp_vma_cb(lookup, node)) {
2755                         __vma = to_xe_vma(node);
2756                         if (!xe_vma_is_userptr(__vma)) {
2757                                 if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
2758                                         goto flush_list;
2759                         }
2760                         list_add_tail(&__vma->unbind_link, &vma->unbind_link);
2761                 } else {
2762                         break;
2763                 }
2764         }
2765
2766         node = &vma->vm_node;
2767         while ((node = rb_prev(node))) {
2768                 if (!xe_vma_cmp_vma_cb(lookup, node)) {
2769                         __vma = to_xe_vma(node);
2770                         if (!xe_vma_is_userptr(__vma)) {
2771                                 if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
2772                                         goto flush_list;
2773                         }
2774                         list_add(&__vma->unbind_link, &vma->unbind_link);
2775                 } else {
2776                         break;
2777                 }
2778         }
2779
2780         return vma;
2781
2782 flush_list:
2783         list_for_each_entry_safe(__vma, next, &vma->unbind_link,
2784                                  unbind_link)
2785                 list_del_init(&__vma->unbind_link);
2786
2787         return ERR_PTR(-EINVAL);
2788 }
2789
2790 static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm,
2791                                                 struct xe_bo *bo)
2792 {
2793         struct xe_vma *first = NULL, *vma;
2794
2795         lockdep_assert_held(&vm->lock);
2796         xe_bo_assert_held(bo);
2797
2798         list_for_each_entry(vma, &bo->vmas, bo_link) {
2799                 if (vma->vm != vm)
2800                         continue;
2801
2802                 prep_vma_destroy(vm, vma);
2803                 if (!first)
2804                         first = vma;
2805                 else
2806                         list_add_tail(&vma->unbind_link, &first->unbind_link);
2807         }
2808
2809         return first;
2810 }
2811
2812 static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
2813                                                struct xe_bo *bo,
2814                                                u64 bo_offset_or_userptr,
2815                                                u64 addr, u64 range, u32 op,
2816                                                u64 tile_mask, u32 region)
2817 {
2818         struct ww_acquire_ctx ww;
2819         struct xe_vma *vma, lookup;
2820         int err;
2821
2822         lockdep_assert_held(&vm->lock);
2823
2824         lookup.start = addr;
2825         lookup.end = addr + range - 1;
2826
2827         switch (VM_BIND_OP(op)) {
2828         case XE_VM_BIND_OP_MAP:
2829                 XE_BUG_ON(!bo);
2830
2831                 err = xe_bo_lock(bo, &ww, 0, true);
2832                 if (err)
2833                         return ERR_PTR(err);
2834                 vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr,
2835                                     addr + range - 1,
2836                                     op & XE_VM_BIND_FLAG_READONLY,
2837                                     tile_mask);
2838                 xe_bo_unlock(bo, &ww);
2839                 if (!vma)
2840                         return ERR_PTR(-ENOMEM);
2841
2842                 xe_vm_insert_vma(vm, vma);
2843                 if (!bo->vm) {
2844                         vm_insert_extobj(vm, vma);
2845                         err = add_preempt_fences(vm, bo);
2846                         if (err) {
2847                                 prep_vma_destroy(vm, vma);
2848                                 xe_vma_destroy_unlocked(vma);
2849
2850                                 return ERR_PTR(err);
2851                         }
2852                 }
2853                 break;
2854         case XE_VM_BIND_OP_UNMAP:
2855                 vma = vm_unbind_lookup_vmas(vm, &lookup);
2856                 break;
2857         case XE_VM_BIND_OP_PREFETCH:
2858                 vma = vm_prefetch_lookup_vmas(vm, &lookup, region);
2859                 break;
2860         case XE_VM_BIND_OP_UNMAP_ALL:
2861                 XE_BUG_ON(!bo);
2862
2863                 err = xe_bo_lock(bo, &ww, 0, true);
2864                 if (err)
2865                         return ERR_PTR(err);
2866                 vma = vm_unbind_all_lookup_vmas(vm, bo);
2867                 if (!vma)
2868                         vma = ERR_PTR(-EINVAL);
2869                 xe_bo_unlock(bo, &ww);
2870                 break;
2871         case XE_VM_BIND_OP_MAP_USERPTR:
2872                 XE_BUG_ON(bo);
2873
2874                 vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr,
2875                                     addr + range - 1,
2876                                     op & XE_VM_BIND_FLAG_READONLY,
2877                                     tile_mask);
2878                 if (!vma)
2879                         return ERR_PTR(-ENOMEM);
2880
2881                 err = xe_vma_userptr_pin_pages(vma);
2882                 if (err) {
2883                         prep_vma_destroy(vm, vma);
2884                         xe_vma_destroy_unlocked(vma);
2885
2886                         return ERR_PTR(err);
2887                 } else {
2888                         xe_vm_insert_vma(vm, vma);
2889                 }
2890                 break;
2891         default:
2892                 XE_BUG_ON("NOT POSSIBLE");
2893                 vma = ERR_PTR(-EINVAL);
2894         }
2895
2896         return vma;
2897 }
2898
2899 #ifdef TEST_VM_ASYNC_OPS_ERROR
2900 #define SUPPORTED_FLAGS \
2901         (FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
2902          XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
2903 #else
2904 #define SUPPORTED_FLAGS \
2905         (XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
2906          XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
2907 #endif
2908 #define XE_64K_PAGE_MASK 0xffffull
2909
2910 #define MAX_BINDS       512     /* FIXME: Picking random upper limit */
2911
2912 static int vm_bind_ioctl_check_args(struct xe_device *xe,
2913                                     struct drm_xe_vm_bind *args,
2914                                     struct drm_xe_vm_bind_op **bind_ops,
2915                                     bool *async)
2916 {
2917         int err;
2918         int i;
2919
2920         if (XE_IOCTL_ERR(xe, args->extensions) ||
2921             XE_IOCTL_ERR(xe, args->pad || args->pad2) ||
2922             XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]) ||
2923             XE_IOCTL_ERR(xe, !args->num_binds) ||
2924             XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS))
2925                 return -EINVAL;
2926
2927         if (args->num_binds > 1) {
2928                 u64 __user *bind_user =
2929                         u64_to_user_ptr(args->vector_of_binds);
2930
2931                 *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
2932                                     args->num_binds, GFP_KERNEL);
2933                 if (!*bind_ops)
2934                         return -ENOMEM;
2935
2936                 err = __copy_from_user(*bind_ops, bind_user,
2937                                        sizeof(struct drm_xe_vm_bind_op) *
2938                                        args->num_binds);
2939                 if (XE_IOCTL_ERR(xe, err)) {
2940                         err = -EFAULT;
2941                         goto free_bind_ops;
2942                 }
2943         } else {
2944                 *bind_ops = &args->bind;
2945         }
2946
2947         for (i = 0; i < args->num_binds; ++i) {
2948                 u64 range = (*bind_ops)[i].range;
2949                 u64 addr = (*bind_ops)[i].addr;
2950                 u32 op = (*bind_ops)[i].op;
2951                 u32 obj = (*bind_ops)[i].obj;
2952                 u64 obj_offset = (*bind_ops)[i].obj_offset;
2953                 u32 region = (*bind_ops)[i].region;
2954
2955                 if (XE_IOCTL_ERR(xe, (*bind_ops)[i].pad) ||
2956                     XE_IOCTL_ERR(xe, (*bind_ops)[i].reserved[0] ||
2957                                      (*bind_ops)[i].reserved[1])) {
2958                         err = -EINVAL;
2959                         goto free_bind_ops;
2960                 }
2961
2962                 if (i == 0) {
2963                         *async = !!(op & XE_VM_BIND_FLAG_ASYNC);
2964                 } else if (XE_IOCTL_ERR(xe, !*async) ||
2965                            XE_IOCTL_ERR(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) ||
2966                            XE_IOCTL_ERR(xe, VM_BIND_OP(op) ==
2967                                         XE_VM_BIND_OP_RESTART)) {
2968                         err = -EINVAL;
2969                         goto free_bind_ops;
2970                 }
2971
2972                 if (XE_IOCTL_ERR(xe, !*async &&
2973                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) {
2974                         err = -EINVAL;
2975                         goto free_bind_ops;
2976                 }
2977
2978                 if (XE_IOCTL_ERR(xe, !*async &&
2979                                  VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) {
2980                         err = -EINVAL;
2981                         goto free_bind_ops;
2982                 }
2983
2984                 if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) >
2985                                  XE_VM_BIND_OP_PREFETCH) ||
2986                     XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) ||
2987                     XE_IOCTL_ERR(xe, !obj &&
2988                                  VM_BIND_OP(op) == XE_VM_BIND_OP_MAP) ||
2989                     XE_IOCTL_ERR(xe, !obj &&
2990                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
2991                     XE_IOCTL_ERR(xe, addr &&
2992                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
2993                     XE_IOCTL_ERR(xe, range &&
2994                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
2995                     XE_IOCTL_ERR(xe, obj &&
2996                                  VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) ||
2997                     XE_IOCTL_ERR(xe, obj &&
2998                                  VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) ||
2999                     XE_IOCTL_ERR(xe, region &&
3000                                  VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) ||
3001                     XE_IOCTL_ERR(xe, !(BIT(region) &
3002                                        xe->info.mem_region_mask)) ||
3003                     XE_IOCTL_ERR(xe, obj &&
3004                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) {
3005                         err = -EINVAL;
3006                         goto free_bind_ops;
3007                 }
3008
3009                 if (XE_IOCTL_ERR(xe, obj_offset & ~PAGE_MASK) ||
3010                     XE_IOCTL_ERR(xe, addr & ~PAGE_MASK) ||
3011                     XE_IOCTL_ERR(xe, range & ~PAGE_MASK) ||
3012                     XE_IOCTL_ERR(xe, !range && VM_BIND_OP(op) !=
3013                                  XE_VM_BIND_OP_RESTART &&
3014                                  VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) {
3015                         err = -EINVAL;
3016                         goto free_bind_ops;
3017                 }
3018         }
3019
3020         return 0;
3021
3022 free_bind_ops:
3023         if (args->num_binds > 1)
3024                 kfree(*bind_ops);
3025         return err;
3026 }
3027
3028 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3029 {
3030         struct xe_device *xe = to_xe_device(dev);
3031         struct xe_file *xef = to_xe_file(file);
3032         struct drm_xe_vm_bind *args = data;
3033         struct drm_xe_sync __user *syncs_user;
3034         struct xe_bo **bos = NULL;
3035         struct xe_vma **vmas = NULL;
3036         struct xe_vm *vm;
3037         struct xe_engine *e = NULL;
3038         u32 num_syncs;
3039         struct xe_sync_entry *syncs = NULL;
3040         struct drm_xe_vm_bind_op *bind_ops;
3041         bool async;
3042         int err;
3043         int i, j = 0;
3044
3045         err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
3046         if (err)
3047                 return err;
3048
3049         vm = xe_vm_lookup(xef, args->vm_id);
3050         if (XE_IOCTL_ERR(xe, !vm)) {
3051                 err = -EINVAL;
3052                 goto free_objs;
3053         }
3054
3055         if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) {
3056                 drm_err(dev, "VM closed while we began looking up?\n");
3057                 err = -ENOENT;
3058                 goto put_vm;
3059         }
3060
3061         if (args->engine_id) {
3062                 e = xe_engine_lookup(xef, args->engine_id);
3063                 if (XE_IOCTL_ERR(xe, !e)) {
3064                         err = -ENOENT;
3065                         goto put_vm;
3066                 }
3067                 if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) {
3068                         err = -EINVAL;
3069                         goto put_engine;
3070                 }
3071         }
3072
3073         if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) {
3074                 if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
3075                         err = -ENOTSUPP;
3076                 if (XE_IOCTL_ERR(xe, !err && args->num_syncs))
3077                         err = EINVAL;
3078                 if (XE_IOCTL_ERR(xe, !err && !vm->async_ops.error))
3079                         err = -EPROTO;
3080
3081                 if (!err) {
3082                         down_write(&vm->lock);
3083                         trace_xe_vm_restart(vm);
3084                         vm_set_async_error(vm, 0);
3085                         up_write(&vm->lock);
3086
3087                         queue_work(system_unbound_wq, &vm->async_ops.work);
3088
3089                         /* Rebinds may have been blocked, give worker a kick */
3090                         if (xe_vm_in_compute_mode(vm))
3091                                 xe_vm_queue_rebind_worker(vm);
3092                 }
3093
3094                 goto put_engine;
3095         }
3096
3097         if (XE_IOCTL_ERR(xe, !vm->async_ops.error &&
3098                          async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
3099                 err = -ENOTSUPP;
3100                 goto put_engine;
3101         }
3102
3103         for (i = 0; i < args->num_binds; ++i) {
3104                 u64 range = bind_ops[i].range;
3105                 u64 addr = bind_ops[i].addr;
3106
3107                 if (XE_IOCTL_ERR(xe, range > vm->size) ||
3108                     XE_IOCTL_ERR(xe, addr > vm->size - range)) {
3109                         err = -EINVAL;
3110                         goto put_engine;
3111                 }
3112
3113                 if (bind_ops[i].tile_mask) {
3114                         u64 valid_tiles = BIT(xe->info.tile_count) - 1;
3115
3116                         if (XE_IOCTL_ERR(xe, bind_ops[i].tile_mask &
3117                                          ~valid_tiles)) {
3118                                 err = -EINVAL;
3119                                 goto put_engine;
3120                         }
3121                 }
3122         }
3123
3124         bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL);
3125         if (!bos) {
3126                 err = -ENOMEM;
3127                 goto put_engine;
3128         }
3129
3130         vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL);
3131         if (!vmas) {
3132                 err = -ENOMEM;
3133                 goto put_engine;
3134         }
3135
3136         for (i = 0; i < args->num_binds; ++i) {
3137                 struct drm_gem_object *gem_obj;
3138                 u64 range = bind_ops[i].range;
3139                 u64 addr = bind_ops[i].addr;
3140                 u32 obj = bind_ops[i].obj;
3141                 u64 obj_offset = bind_ops[i].obj_offset;
3142
3143                 if (!obj)
3144                         continue;
3145
3146                 gem_obj = drm_gem_object_lookup(file, obj);
3147                 if (XE_IOCTL_ERR(xe, !gem_obj)) {
3148                         err = -ENOENT;
3149                         goto put_obj;
3150                 }
3151                 bos[i] = gem_to_xe_bo(gem_obj);
3152
3153                 if (XE_IOCTL_ERR(xe, range > bos[i]->size) ||
3154                     XE_IOCTL_ERR(xe, obj_offset >
3155                                  bos[i]->size - range)) {
3156                         err = -EINVAL;
3157                         goto put_obj;
3158                 }
3159
3160                 if (bos[i]->flags & XE_BO_INTERNAL_64K) {
3161                         if (XE_IOCTL_ERR(xe, obj_offset &
3162                                          XE_64K_PAGE_MASK) ||
3163                             XE_IOCTL_ERR(xe, addr & XE_64K_PAGE_MASK) ||
3164                             XE_IOCTL_ERR(xe, range & XE_64K_PAGE_MASK)) {
3165                                 err = -EINVAL;
3166                                 goto put_obj;
3167                         }
3168                 }
3169         }
3170
3171         if (args->num_syncs) {
3172                 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3173                 if (!syncs) {
3174                         err = -ENOMEM;
3175                         goto put_obj;
3176                 }
3177         }
3178
3179         syncs_user = u64_to_user_ptr(args->syncs);
3180         for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3181                 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3182                                           &syncs_user[num_syncs], false,
3183                                           xe_vm_in_fault_mode(vm));
3184                 if (err)
3185                         goto free_syncs;
3186         }
3187
3188         err = down_write_killable(&vm->lock);
3189         if (err)
3190                 goto free_syncs;
3191
3192         /* Do some error checking first to make the unwind easier */
3193         for (i = 0; i < args->num_binds; ++i) {
3194                 u64 range = bind_ops[i].range;
3195                 u64 addr = bind_ops[i].addr;
3196                 u32 op = bind_ops[i].op;
3197
3198                 err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op);
3199                 if (err)
3200                         goto release_vm_lock;
3201         }
3202
3203         for (i = 0; i < args->num_binds; ++i) {
3204                 u64 range = bind_ops[i].range;
3205                 u64 addr = bind_ops[i].addr;
3206                 u32 op = bind_ops[i].op;
3207                 u64 obj_offset = bind_ops[i].obj_offset;
3208                 u64 tile_mask = bind_ops[i].tile_mask;
3209                 u32 region = bind_ops[i].region;
3210
3211                 vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i], obj_offset,
3212                                                    addr, range, op, tile_mask,
3213                                                    region);
3214                 if (IS_ERR(vmas[i])) {
3215                         err = PTR_ERR(vmas[i]);
3216                         vmas[i] = NULL;
3217                         goto destroy_vmas;
3218                 }
3219         }
3220
3221         for (j = 0; j < args->num_binds; ++j) {
3222                 struct xe_sync_entry *__syncs;
3223                 u32 __num_syncs = 0;
3224                 bool first_or_last = j == 0 || j == args->num_binds - 1;
3225
3226                 if (args->num_binds == 1) {
3227                         __num_syncs = num_syncs;
3228                         __syncs = syncs;
3229                 } else if (first_or_last && num_syncs) {
3230                         bool first = j == 0;
3231
3232                         __syncs = kmalloc(sizeof(*__syncs) * num_syncs,
3233                                           GFP_KERNEL);
3234                         if (!__syncs) {
3235                                 err = ENOMEM;
3236                                 break;
3237                         }
3238
3239                         /* in-syncs on first bind, out-syncs on last bind */
3240                         for (i = 0; i < num_syncs; ++i) {
3241                                 bool signal = syncs[i].flags &
3242                                         DRM_XE_SYNC_SIGNAL;
3243
3244                                 if ((first && !signal) || (!first && signal))
3245                                         __syncs[__num_syncs++] = syncs[i];
3246                         }
3247                 } else {
3248                         __num_syncs = 0;
3249                         __syncs = NULL;
3250                 }
3251
3252                 if (async) {
3253                         bool last = j == args->num_binds - 1;
3254
3255                         /*
3256                          * Each pass of async worker drops the ref, take a ref
3257                          * here, 1 set of refs taken above
3258                          */
3259                         if (!last) {
3260                                 if (e)
3261                                         xe_engine_get(e);
3262                                 xe_vm_get(vm);
3263                         }
3264
3265                         err = vm_bind_ioctl_async(vm, vmas[j], e, bos[j],
3266                                                   bind_ops + j, __syncs,
3267                                                   __num_syncs);
3268                         if (err && !last) {
3269                                 if (e)
3270                                         xe_engine_put(e);
3271                                 xe_vm_put(vm);
3272                         }
3273                         if (err)
3274                                 break;
3275                 } else {
3276                         XE_BUG_ON(j != 0);      /* Not supported */
3277                         err = vm_bind_ioctl(vm, vmas[j], e, bos[j],
3278                                             bind_ops + j, __syncs,
3279                                             __num_syncs, NULL);
3280                         break;  /* Needed so cleanup loops work */
3281                 }
3282         }
3283
3284         /* Most of cleanup owned by the async bind worker */
3285         if (async && !err) {
3286                 up_write(&vm->lock);
3287                 if (args->num_binds > 1)
3288                         kfree(syncs);
3289                 goto free_objs;
3290         }
3291
3292 destroy_vmas:
3293         for (i = j; err && i < args->num_binds; ++i) {
3294                 u32 op = bind_ops[i].op;
3295                 struct xe_vma *vma, *next;
3296
3297                 if (!vmas[i])
3298                         break;
3299
3300                 list_for_each_entry_safe(vma, next, &vma->unbind_link,
3301                                          unbind_link) {
3302                         list_del_init(&vma->unbind_link);
3303                         if (!vma->destroyed) {
3304                                 prep_vma_destroy(vm, vma);
3305                                 xe_vma_destroy_unlocked(vma);
3306                         }
3307                 }
3308
3309                 switch (VM_BIND_OP(op)) {
3310                 case XE_VM_BIND_OP_MAP:
3311                         prep_vma_destroy(vm, vmas[i]);
3312                         xe_vma_destroy_unlocked(vmas[i]);
3313                         break;
3314                 case XE_VM_BIND_OP_MAP_USERPTR:
3315                         prep_vma_destroy(vm, vmas[i]);
3316                         xe_vma_destroy_unlocked(vmas[i]);
3317                         break;
3318                 }
3319         }
3320 release_vm_lock:
3321         up_write(&vm->lock);
3322 free_syncs:
3323         while (num_syncs--) {
3324                 if (async && j &&
3325                     !(syncs[num_syncs].flags & DRM_XE_SYNC_SIGNAL))
3326                         continue;       /* Still in async worker */
3327                 xe_sync_entry_cleanup(&syncs[num_syncs]);
3328         }
3329
3330         kfree(syncs);
3331 put_obj:
3332         for (i = j; i < args->num_binds; ++i)
3333                 xe_bo_put(bos[i]);
3334 put_engine:
3335         if (e)
3336                 xe_engine_put(e);
3337 put_vm:
3338         xe_vm_put(vm);
3339 free_objs:
3340         kfree(bos);
3341         kfree(vmas);
3342         if (args->num_binds > 1)
3343                 kfree(bind_ops);
3344         return err;
3345 }
3346
3347 /*
3348  * XXX: Using the TTM wrappers for now, likely can call into dma-resv code
3349  * directly to optimize. Also this likely should be an inline function.
3350  */
3351 int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
3352                int num_resv, bool intr)
3353 {
3354         struct ttm_validate_buffer tv_vm;
3355         LIST_HEAD(objs);
3356         LIST_HEAD(dups);
3357
3358         XE_BUG_ON(!ww);
3359
3360         tv_vm.num_shared = num_resv;
3361         tv_vm.bo = xe_vm_ttm_bo(vm);;
3362         list_add_tail(&tv_vm.head, &objs);
3363
3364         return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
3365 }
3366
3367 void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww)
3368 {
3369         dma_resv_unlock(&vm->resv);
3370         ww_acquire_fini(ww);
3371 }
3372
3373 /**
3374  * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3375  * @vma: VMA to invalidate
3376  *
3377  * Walks a list of page tables leaves which it memset the entries owned by this
3378  * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3379  * complete.
3380  *
3381  * Returns 0 for success, negative error code otherwise.
3382  */
3383 int xe_vm_invalidate_vma(struct xe_vma *vma)
3384 {
3385         struct xe_device *xe = vma->vm->xe;
3386         struct xe_tile *tile;
3387         u32 tile_needs_invalidate = 0;
3388         int seqno[XE_MAX_TILES_PER_DEVICE];
3389         u8 id;
3390         int ret;
3391
3392         XE_BUG_ON(!xe_vm_in_fault_mode(vma->vm));
3393         trace_xe_vma_usm_invalidate(vma);
3394
3395         /* Check that we don't race with page-table updates */
3396         if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3397                 if (xe_vma_is_userptr(vma)) {
3398                         WARN_ON_ONCE(!mmu_interval_check_retry
3399                                      (&vma->userptr.notifier,
3400                                       vma->userptr.notifier_seq));
3401                         WARN_ON_ONCE(!dma_resv_test_signaled(&vma->vm->resv,
3402                                                              DMA_RESV_USAGE_BOOKKEEP));
3403
3404                 } else {
3405                         xe_bo_assert_held(vma->bo);
3406                 }
3407         }
3408
3409         for_each_tile(tile, xe, id) {
3410                 if (xe_pt_zap_ptes(tile, vma)) {
3411                         tile_needs_invalidate |= BIT(id);
3412                         xe_device_wmb(xe);
3413                         /*
3414                          * FIXME: We potentially need to invalidate multiple
3415                          * GTs within the tile
3416                          */
3417                         seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
3418                         if (seqno[id] < 0)
3419                                 return seqno[id];
3420                 }
3421         }
3422
3423         for_each_tile(tile, xe, id) {
3424                 if (tile_needs_invalidate & BIT(id)) {
3425                         ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
3426                         if (ret < 0)
3427                                 return ret;
3428                 }
3429         }
3430
3431         vma->usm.tile_invalidated = vma->tile_mask;
3432
3433         return 0;
3434 }
3435
3436 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
3437 {
3438         struct rb_node *node;
3439         bool is_vram;
3440         uint64_t addr;
3441
3442         if (!down_read_trylock(&vm->lock)) {
3443                 drm_printf(p, " Failed to acquire VM lock to dump capture");
3444                 return 0;
3445         }
3446         if (vm->pt_root[gt_id]) {
3447                 addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE,
3448                                   &is_vram);
3449                 drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS");
3450         }
3451
3452         for (node = rb_first(&vm->vmas); node; node = rb_next(node)) {
3453                 struct xe_vma *vma = to_xe_vma(node);
3454                 bool is_userptr = xe_vma_is_userptr(vma);
3455
3456                 if (is_userptr) {
3457                         struct xe_res_cursor cur;
3458
3459                         if (vma->userptr.sg) {
3460                                 xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
3461                                                 &cur);
3462                                 addr = xe_res_dma(&cur);
3463                         } else {
3464                                 addr = 0;
3465                         }
3466                 } else {
3467                         addr = __xe_bo_addr(vma->bo, 0, XE_PAGE_SIZE, &is_vram);
3468                 }
3469                 drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
3470                            vma->start, vma->end, vma->end - vma->start + 1ull,
3471                            addr, is_userptr ? "USR" : is_vram ? "VRAM" : "SYS");
3472         }
3473         up_read(&vm->lock);
3474
3475         return 0;
3476 }