drm/xe/vm: tidy up xe_runtime_pm usage
[linux-2.6-microblaze.git] / drivers / gpu / drm / xe / xe_vm.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9
10 #include <drm/drm_print.h>
11 #include <drm/ttm/ttm_execbuf_util.h>
12 #include <drm/ttm/ttm_tt.h>
13 #include <drm/xe_drm.h>
14 #include <linux/delay.h>
15 #include <linux/kthread.h>
16 #include <linux/mm.h>
17 #include <linux/swap.h>
18
19 #include "xe_bo.h"
20 #include "xe_device.h"
21 #include "xe_engine.h"
22 #include "xe_gt.h"
23 #include "xe_gt_pagefault.h"
24 #include "xe_gt_tlb_invalidation.h"
25 #include "xe_migrate.h"
26 #include "xe_pm.h"
27 #include "xe_preempt_fence.h"
28 #include "xe_pt.h"
29 #include "xe_res_cursor.h"
30 #include "xe_sync.h"
31 #include "xe_trace.h"
32
33 #define TEST_VM_ASYNC_OPS_ERROR
34
35 /**
36  * xe_vma_userptr_check_repin() - Advisory check for repin needed
37  * @vma: The userptr vma
38  *
39  * Check if the userptr vma has been invalidated since last successful
40  * repin. The check is advisory only and can the function can be called
41  * without the vm->userptr.notifier_lock held. There is no guarantee that the
42  * vma userptr will remain valid after a lockless check, so typically
43  * the call needs to be followed by a proper check under the notifier_lock.
44  *
45  * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
46  */
47 int xe_vma_userptr_check_repin(struct xe_vma *vma)
48 {
49         return mmu_interval_check_retry(&vma->userptr.notifier,
50                                         vma->userptr.notifier_seq) ?
51                 -EAGAIN : 0;
52 }
53
54 int xe_vma_userptr_pin_pages(struct xe_vma *vma)
55 {
56         struct xe_vm *vm = xe_vma_vm(vma);
57         struct xe_device *xe = vm->xe;
58         const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
59         struct page **pages;
60         bool in_kthread = !current->mm;
61         unsigned long notifier_seq;
62         int pinned, ret, i;
63         bool read_only = xe_vma_read_only(vma);
64
65         lockdep_assert_held(&vm->lock);
66         XE_BUG_ON(!xe_vma_is_userptr(vma));
67 retry:
68         if (vma->gpuva.flags & XE_VMA_DESTROYED)
69                 return 0;
70
71         notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
72         if (notifier_seq == vma->userptr.notifier_seq)
73                 return 0;
74
75         pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
76         if (!pages)
77                 return -ENOMEM;
78
79         if (vma->userptr.sg) {
80                 dma_unmap_sgtable(xe->drm.dev,
81                                   vma->userptr.sg,
82                                   read_only ? DMA_TO_DEVICE :
83                                   DMA_BIDIRECTIONAL, 0);
84                 sg_free_table(vma->userptr.sg);
85                 vma->userptr.sg = NULL;
86         }
87
88         pinned = ret = 0;
89         if (in_kthread) {
90                 if (!mmget_not_zero(vma->userptr.notifier.mm)) {
91                         ret = -EFAULT;
92                         goto mm_closed;
93                 }
94                 kthread_use_mm(vma->userptr.notifier.mm);
95         }
96
97         while (pinned < num_pages) {
98                 ret = get_user_pages_fast(xe_vma_userptr(vma) +
99                                           pinned * PAGE_SIZE,
100                                           num_pages - pinned,
101                                           read_only ? 0 : FOLL_WRITE,
102                                           &pages[pinned]);
103                 if (ret < 0) {
104                         if (in_kthread)
105                                 ret = 0;
106                         break;
107                 }
108
109                 pinned += ret;
110                 ret = 0;
111         }
112
113         if (in_kthread) {
114                 kthread_unuse_mm(vma->userptr.notifier.mm);
115                 mmput(vma->userptr.notifier.mm);
116         }
117 mm_closed:
118         if (ret)
119                 goto out;
120
121         ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages,
122                                                 pinned, 0,
123                                                 (u64)pinned << PAGE_SHIFT,
124                                                 xe_sg_segment_size(xe->drm.dev),
125                                                 GFP_KERNEL);
126         if (ret) {
127                 vma->userptr.sg = NULL;
128                 goto out;
129         }
130         vma->userptr.sg = &vma->userptr.sgt;
131
132         ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
133                               read_only ? DMA_TO_DEVICE :
134                               DMA_BIDIRECTIONAL,
135                               DMA_ATTR_SKIP_CPU_SYNC |
136                               DMA_ATTR_NO_KERNEL_MAPPING);
137         if (ret) {
138                 sg_free_table(vma->userptr.sg);
139                 vma->userptr.sg = NULL;
140                 goto out;
141         }
142
143         for (i = 0; i < pinned; ++i) {
144                 if (!read_only) {
145                         lock_page(pages[i]);
146                         set_page_dirty(pages[i]);
147                         unlock_page(pages[i]);
148                 }
149
150                 mark_page_accessed(pages[i]);
151         }
152
153 out:
154         release_pages(pages, pinned);
155         kvfree(pages);
156
157         if (!(ret < 0)) {
158                 vma->userptr.notifier_seq = notifier_seq;
159                 if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
160                         goto retry;
161         }
162
163         return ret < 0 ? ret : 0;
164 }
165
166 static bool preempt_fences_waiting(struct xe_vm *vm)
167 {
168         struct xe_engine *e;
169
170         lockdep_assert_held(&vm->lock);
171         xe_vm_assert_held(vm);
172
173         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
174                 if (!e->compute.pfence || (e->compute.pfence &&
175                     test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
176                              &e->compute.pfence->flags))) {
177                         return true;
178                 }
179         }
180
181         return false;
182 }
183
184 static void free_preempt_fences(struct list_head *list)
185 {
186         struct list_head *link, *next;
187
188         list_for_each_safe(link, next, list)
189                 xe_preempt_fence_free(to_preempt_fence_from_link(link));
190 }
191
192 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
193                                 unsigned int *count)
194 {
195         lockdep_assert_held(&vm->lock);
196         xe_vm_assert_held(vm);
197
198         if (*count >= vm->preempt.num_engines)
199                 return 0;
200
201         for (; *count < vm->preempt.num_engines; ++(*count)) {
202                 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
203
204                 if (IS_ERR(pfence))
205                         return PTR_ERR(pfence);
206
207                 list_move_tail(xe_preempt_fence_link(pfence), list);
208         }
209
210         return 0;
211 }
212
213 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
214 {
215         struct xe_engine *e;
216
217         xe_vm_assert_held(vm);
218
219         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
220                 if (e->compute.pfence) {
221                         long timeout = dma_fence_wait(e->compute.pfence, false);
222
223                         if (timeout < 0)
224                                 return -ETIME;
225                         dma_fence_put(e->compute.pfence);
226                         e->compute.pfence = NULL;
227                 }
228         }
229
230         return 0;
231 }
232
233 static bool xe_vm_is_idle(struct xe_vm *vm)
234 {
235         struct xe_engine *e;
236
237         xe_vm_assert_held(vm);
238         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
239                 if (!xe_engine_is_idle(e))
240                         return false;
241         }
242
243         return true;
244 }
245
246 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
247 {
248         struct list_head *link;
249         struct xe_engine *e;
250
251         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
252                 struct dma_fence *fence;
253
254                 link = list->next;
255                 XE_BUG_ON(link == list);
256
257                 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
258                                              e, e->compute.context,
259                                              ++e->compute.seqno);
260                 dma_fence_put(e->compute.pfence);
261                 e->compute.pfence = fence;
262         }
263 }
264
265 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
266 {
267         struct xe_engine *e;
268         struct ww_acquire_ctx ww;
269         int err;
270
271         err = xe_bo_lock(bo, &ww, vm->preempt.num_engines, true);
272         if (err)
273                 return err;
274
275         list_for_each_entry(e, &vm->preempt.engines, compute.link)
276                 if (e->compute.pfence) {
277                         dma_resv_add_fence(bo->ttm.base.resv,
278                                            e->compute.pfence,
279                                            DMA_RESV_USAGE_BOOKKEEP);
280                 }
281
282         xe_bo_unlock(bo, &ww);
283         return 0;
284 }
285
286 /**
287  * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv
288  * @vm: The vm.
289  * @fence: The fence to add.
290  * @usage: The resv usage for the fence.
291  *
292  * Loops over all of the vm's external object bindings and adds a @fence
293  * with the given @usage to all of the external object's reservation
294  * objects.
295  */
296 void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
297                              enum dma_resv_usage usage)
298 {
299         struct xe_vma *vma;
300
301         list_for_each_entry(vma, &vm->extobj.list, extobj.link)
302                 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, usage);
303 }
304
305 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
306 {
307         struct xe_engine *e;
308
309         lockdep_assert_held(&vm->lock);
310         xe_vm_assert_held(vm);
311
312         list_for_each_entry(e, &vm->preempt.engines, compute.link) {
313                 e->ops->resume(e);
314
315                 dma_resv_add_fence(xe_vm_resv(vm), e->compute.pfence,
316                                    DMA_RESV_USAGE_BOOKKEEP);
317                 xe_vm_fence_all_extobjs(vm, e->compute.pfence,
318                                         DMA_RESV_USAGE_BOOKKEEP);
319         }
320 }
321
322 int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
323 {
324         struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
325         struct ttm_validate_buffer *tv;
326         struct ww_acquire_ctx ww;
327         struct list_head objs;
328         struct dma_fence *pfence;
329         int err;
330         bool wait;
331
332         XE_BUG_ON(!xe_vm_in_compute_mode(vm));
333
334         down_write(&vm->lock);
335
336         err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, true, 1);
337         if (err)
338                 goto out_unlock_outer;
339
340         pfence = xe_preempt_fence_create(e, e->compute.context,
341                                          ++e->compute.seqno);
342         if (!pfence) {
343                 err = -ENOMEM;
344                 goto out_unlock;
345         }
346
347         list_add(&e->compute.link, &vm->preempt.engines);
348         ++vm->preempt.num_engines;
349         e->compute.pfence = pfence;
350
351         down_read(&vm->userptr.notifier_lock);
352
353         dma_resv_add_fence(xe_vm_resv(vm), pfence,
354                            DMA_RESV_USAGE_BOOKKEEP);
355
356         xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP);
357
358         /*
359          * Check to see if a preemption on VM is in flight or userptr
360          * invalidation, if so trigger this preempt fence to sync state with
361          * other preempt fences on the VM.
362          */
363         wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
364         if (wait)
365                 dma_fence_enable_sw_signaling(pfence);
366
367         up_read(&vm->userptr.notifier_lock);
368
369 out_unlock:
370         xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
371 out_unlock_outer:
372         up_write(&vm->lock);
373
374         return err;
375 }
376
377 /**
378  * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
379  * that need repinning.
380  * @vm: The VM.
381  *
382  * This function checks for whether the VM has userptrs that need repinning,
383  * and provides a release-type barrier on the userptr.notifier_lock after
384  * checking.
385  *
386  * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
387  */
388 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
389 {
390         lockdep_assert_held_read(&vm->userptr.notifier_lock);
391
392         return (list_empty(&vm->userptr.repin_list) &&
393                 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
394 }
395
396 /**
397  * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv
398  * objects of the vm's external buffer objects.
399  * @vm: The vm.
400  * @ww: Pointer to a struct ww_acquire_ctx locking context.
401  * @tv_onstack: Array size XE_ONSTACK_TV of storage for the struct
402  * ttm_validate_buffers used for locking.
403  * @tv: Pointer to a pointer that on output contains the actual storage used.
404  * @objs: List head for the buffer objects locked.
405  * @intr: Whether to lock interruptible.
406  * @num_shared: Number of dma-fence slots to reserve in the locked objects.
407  *
408  * Locks the vm dma-resv objects and all the dma-resv objects of the
409  * buffer objects on the vm external object list. The TTM utilities require
410  * a list of struct ttm_validate_buffers pointing to the actual buffer
411  * objects to lock. Storage for those struct ttm_validate_buffers should
412  * be provided in @tv_onstack, and is typically reserved on the stack
413  * of the caller. If the size of @tv_onstack isn't sufficient, then
414  * storage will be allocated internally using kvmalloc().
415  *
416  * The function performs deadlock handling internally, and after a
417  * successful return the ww locking transaction should be considered
418  * sealed.
419  *
420  * Return: 0 on success, Negative error code on error. In particular if
421  * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. In case
422  * of error, any locking performed has been reverted.
423  */
424 int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
425                         struct ttm_validate_buffer *tv_onstack,
426                         struct ttm_validate_buffer **tv,
427                         struct list_head *objs,
428                         bool intr,
429                         unsigned int num_shared)
430 {
431         struct ttm_validate_buffer *tv_vm, *tv_bo;
432         struct xe_vma *vma, *next;
433         LIST_HEAD(dups);
434         int err;
435
436         lockdep_assert_held(&vm->lock);
437
438         if (vm->extobj.entries < XE_ONSTACK_TV) {
439                 tv_vm = tv_onstack;
440         } else {
441                 tv_vm = kvmalloc_array(vm->extobj.entries + 1, sizeof(*tv_vm),
442                                        GFP_KERNEL);
443                 if (!tv_vm)
444                         return -ENOMEM;
445         }
446         tv_bo = tv_vm + 1;
447
448         INIT_LIST_HEAD(objs);
449         list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
450                 tv_bo->num_shared = num_shared;
451                 tv_bo->bo = &xe_vma_bo(vma)->ttm;
452
453                 list_add_tail(&tv_bo->head, objs);
454                 tv_bo++;
455         }
456         tv_vm->num_shared = num_shared;
457         tv_vm->bo = xe_vm_ttm_bo(vm);
458         list_add_tail(&tv_vm->head, objs);
459         err = ttm_eu_reserve_buffers(ww, objs, intr, &dups);
460         if (err)
461                 goto out_err;
462
463         spin_lock(&vm->notifier.list_lock);
464         list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
465                                  notifier.rebind_link) {
466                 xe_bo_assert_held(xe_vma_bo(vma));
467
468                 list_del_init(&vma->notifier.rebind_link);
469                 if (vma->tile_present && !(vma->gpuva.flags & XE_VMA_DESTROYED))
470                         list_move_tail(&vma->rebind_link, &vm->rebind_list);
471         }
472         spin_unlock(&vm->notifier.list_lock);
473
474         *tv = tv_vm;
475         return 0;
476
477 out_err:
478         if (tv_vm != tv_onstack)
479                 kvfree(tv_vm);
480
481         return err;
482 }
483
484 /**
485  * xe_vm_unlock_dma_resv() - Unlock reservation objects locked by
486  * xe_vm_lock_dma_resv()
487  * @vm: The vm.
488  * @tv_onstack: The @tv_onstack array given to xe_vm_lock_dma_resv().
489  * @tv: The value of *@tv given by xe_vm_lock_dma_resv().
490  * @ww: The ww_acquire_context used for locking.
491  * @objs: The list returned from xe_vm_lock_dma_resv().
492  *
493  * Unlocks the reservation objects and frees any memory allocated by
494  * xe_vm_lock_dma_resv().
495  */
496 void xe_vm_unlock_dma_resv(struct xe_vm *vm,
497                            struct ttm_validate_buffer *tv_onstack,
498                            struct ttm_validate_buffer *tv,
499                            struct ww_acquire_ctx *ww,
500                            struct list_head *objs)
501 {
502         /*
503          * Nothing should've been able to enter the list while we were locked,
504          * since we've held the dma-resvs of all the vm's external objects,
505          * and holding the dma_resv of an object is required for list
506          * addition, and we shouldn't add ourselves.
507          */
508         XE_WARN_ON(!list_empty(&vm->notifier.rebind_list));
509
510         ttm_eu_backoff_reservation(ww, objs);
511         if (tv && tv != tv_onstack)
512                 kvfree(tv);
513 }
514
515 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
516
517 static void xe_vm_kill(struct xe_vm *vm)
518 {
519         struct ww_acquire_ctx ww;
520         struct xe_engine *e;
521
522         lockdep_assert_held(&vm->lock);
523
524         xe_vm_lock(vm, &ww, 0, false);
525         vm->flags |= XE_VM_FLAG_BANNED;
526         trace_xe_vm_kill(vm);
527
528         list_for_each_entry(e, &vm->preempt.engines, compute.link)
529                 e->ops->kill(e);
530         xe_vm_unlock(vm, &ww);
531
532         /* TODO: Inform user the VM is banned */
533 }
534
535 static void preempt_rebind_work_func(struct work_struct *w)
536 {
537         struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
538         struct xe_vma *vma;
539         struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
540         struct ttm_validate_buffer *tv;
541         struct ww_acquire_ctx ww;
542         struct list_head objs;
543         struct dma_fence *rebind_fence;
544         unsigned int fence_count = 0;
545         LIST_HEAD(preempt_fences);
546         ktime_t end = 0;
547         int err;
548         long wait;
549         int __maybe_unused tries = 0;
550
551         XE_BUG_ON(!xe_vm_in_compute_mode(vm));
552         trace_xe_vm_rebind_worker_enter(vm);
553
554         down_write(&vm->lock);
555
556         if (xe_vm_is_closed_or_banned(vm)) {
557                 up_write(&vm->lock);
558                 trace_xe_vm_rebind_worker_exit(vm);
559                 return;
560         }
561
562 retry:
563         if (vm->async_ops.error)
564                 goto out_unlock_outer;
565
566         /*
567          * Extreme corner where we exit a VM error state with a munmap style VM
568          * unbind inflight which requires a rebind. In this case the rebind
569          * needs to install some fences into the dma-resv slots. The worker to
570          * do this queued, let that worker make progress by dropping vm->lock
571          * and trying this again.
572          */
573         if (vm->async_ops.munmap_rebind_inflight) {
574                 up_write(&vm->lock);
575                 flush_work(&vm->async_ops.work);
576                 goto retry;
577         }
578
579         if (xe_vm_userptr_check_repin(vm)) {
580                 err = xe_vm_userptr_pin(vm);
581                 if (err)
582                         goto out_unlock_outer;
583         }
584
585         err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs,
586                                   false, vm->preempt.num_engines);
587         if (err)
588                 goto out_unlock_outer;
589
590         if (xe_vm_is_idle(vm)) {
591                 vm->preempt.rebind_deactivated = true;
592                 goto out_unlock;
593         }
594
595         /* Fresh preempt fences already installed. Everyting is running. */
596         if (!preempt_fences_waiting(vm))
597                 goto out_unlock;
598
599         /*
600          * This makes sure vm is completely suspended and also balances
601          * xe_engine suspend- and resume; we resume *all* vm engines below.
602          */
603         err = wait_for_existing_preempt_fences(vm);
604         if (err)
605                 goto out_unlock;
606
607         err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
608         if (err)
609                 goto out_unlock;
610
611         list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
612                 if (xe_vma_has_no_bo(vma) ||
613                     vma->gpuva.flags & XE_VMA_DESTROYED)
614                         continue;
615
616                 err = xe_bo_validate(xe_vma_bo(vma), vm, false);
617                 if (err)
618                         goto out_unlock;
619         }
620
621         rebind_fence = xe_vm_rebind(vm, true);
622         if (IS_ERR(rebind_fence)) {
623                 err = PTR_ERR(rebind_fence);
624                 goto out_unlock;
625         }
626
627         if (rebind_fence) {
628                 dma_fence_wait(rebind_fence, false);
629                 dma_fence_put(rebind_fence);
630         }
631
632         /* Wait on munmap style VM unbinds */
633         wait = dma_resv_wait_timeout(xe_vm_resv(vm),
634                                      DMA_RESV_USAGE_KERNEL,
635                                      false, MAX_SCHEDULE_TIMEOUT);
636         if (wait <= 0) {
637                 err = -ETIME;
638                 goto out_unlock;
639         }
640
641 #define retry_required(__tries, __vm) \
642         (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
643         (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
644         __xe_vm_userptr_needs_repin(__vm))
645
646         down_read(&vm->userptr.notifier_lock);
647         if (retry_required(tries, vm)) {
648                 up_read(&vm->userptr.notifier_lock);
649                 err = -EAGAIN;
650                 goto out_unlock;
651         }
652
653 #undef retry_required
654
655         spin_lock(&vm->xe->ttm.lru_lock);
656         ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
657         spin_unlock(&vm->xe->ttm.lru_lock);
658
659         /* Point of no return. */
660         arm_preempt_fences(vm, &preempt_fences);
661         resume_and_reinstall_preempt_fences(vm);
662         up_read(&vm->userptr.notifier_lock);
663
664 out_unlock:
665         xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
666 out_unlock_outer:
667         if (err == -EAGAIN) {
668                 trace_xe_vm_rebind_worker_retry(vm);
669                 goto retry;
670         }
671
672         /*
673          * With multiple active VMs, under memory pressure, it is possible that
674          * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
675          * Until ttm properly handles locking in such scenarios, best thing the
676          * driver can do is retry with a timeout. Killing the VM or putting it
677          * in error state after timeout or other error scenarios is still TBD.
678          */
679         if (err == -ENOMEM) {
680                 ktime_t cur = ktime_get();
681
682                 end = end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
683                 if (ktime_before(cur, end)) {
684                         msleep(20);
685                         trace_xe_vm_rebind_worker_retry(vm);
686                         goto retry;
687                 }
688         }
689         if (err) {
690                 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
691                 xe_vm_kill(vm);
692         }
693         up_write(&vm->lock);
694
695         free_preempt_fences(&preempt_fences);
696
697         trace_xe_vm_rebind_worker_exit(vm);
698 }
699
700 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
701                                    const struct mmu_notifier_range *range,
702                                    unsigned long cur_seq)
703 {
704         struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
705         struct xe_vm *vm = xe_vma_vm(vma);
706         struct dma_resv_iter cursor;
707         struct dma_fence *fence;
708         long err;
709
710         XE_BUG_ON(!xe_vma_is_userptr(vma));
711         trace_xe_vma_userptr_invalidate(vma);
712
713         if (!mmu_notifier_range_blockable(range))
714                 return false;
715
716         down_write(&vm->userptr.notifier_lock);
717         mmu_interval_set_seq(mni, cur_seq);
718
719         /* No need to stop gpu access if the userptr is not yet bound. */
720         if (!vma->userptr.initial_bind) {
721                 up_write(&vm->userptr.notifier_lock);
722                 return true;
723         }
724
725         /*
726          * Tell exec and rebind worker they need to repin and rebind this
727          * userptr.
728          */
729         if (!xe_vm_in_fault_mode(vm) &&
730             !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
731                 spin_lock(&vm->userptr.invalidated_lock);
732                 list_move_tail(&vma->userptr.invalidate_link,
733                                &vm->userptr.invalidated);
734                 spin_unlock(&vm->userptr.invalidated_lock);
735         }
736
737         up_write(&vm->userptr.notifier_lock);
738
739         /*
740          * Preempt fences turn into schedule disables, pipeline these.
741          * Note that even in fault mode, we need to wait for binds and
742          * unbinds to complete, and those are attached as BOOKMARK fences
743          * to the vm.
744          */
745         dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
746                             DMA_RESV_USAGE_BOOKKEEP);
747         dma_resv_for_each_fence_unlocked(&cursor, fence)
748                 dma_fence_enable_sw_signaling(fence);
749         dma_resv_iter_end(&cursor);
750
751         err = dma_resv_wait_timeout(xe_vm_resv(vm),
752                                     DMA_RESV_USAGE_BOOKKEEP,
753                                     false, MAX_SCHEDULE_TIMEOUT);
754         XE_WARN_ON(err <= 0);
755
756         if (xe_vm_in_fault_mode(vm)) {
757                 err = xe_vm_invalidate_vma(vma);
758                 XE_WARN_ON(err);
759         }
760
761         trace_xe_vma_userptr_invalidate_complete(vma);
762
763         return true;
764 }
765
766 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
767         .invalidate = vma_userptr_invalidate,
768 };
769
770 int xe_vm_userptr_pin(struct xe_vm *vm)
771 {
772         struct xe_vma *vma, *next;
773         int err = 0;
774         LIST_HEAD(tmp_evict);
775
776         lockdep_assert_held_write(&vm->lock);
777
778         /* Collect invalidated userptrs */
779         spin_lock(&vm->userptr.invalidated_lock);
780         list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
781                                  userptr.invalidate_link) {
782                 list_del_init(&vma->userptr.invalidate_link);
783                 list_move_tail(&vma->userptr_link, &vm->userptr.repin_list);
784         }
785         spin_unlock(&vm->userptr.invalidated_lock);
786
787         /* Pin and move to temporary list */
788         list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, userptr_link) {
789                 err = xe_vma_userptr_pin_pages(vma);
790                 if (err < 0)
791                         goto out_err;
792
793                 list_move_tail(&vma->userptr_link, &tmp_evict);
794         }
795
796         /* Take lock and move to rebind_list for rebinding. */
797         err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
798         if (err)
799                 goto out_err;
800
801         list_for_each_entry_safe(vma, next, &tmp_evict, userptr_link) {
802                 list_del_init(&vma->userptr_link);
803                 list_move_tail(&vma->rebind_link, &vm->rebind_list);
804         }
805
806         dma_resv_unlock(xe_vm_resv(vm));
807
808         return 0;
809
810 out_err:
811         list_splice_tail(&tmp_evict, &vm->userptr.repin_list);
812
813         return err;
814 }
815
816 /**
817  * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
818  * that need repinning.
819  * @vm: The VM.
820  *
821  * This function does an advisory check for whether the VM has userptrs that
822  * need repinning.
823  *
824  * Return: 0 if there are no indications of userptrs needing repinning,
825  * -EAGAIN if there are.
826  */
827 int xe_vm_userptr_check_repin(struct xe_vm *vm)
828 {
829         return (list_empty_careful(&vm->userptr.repin_list) &&
830                 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
831 }
832
833 static struct dma_fence *
834 xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
835                struct xe_sync_entry *syncs, u32 num_syncs,
836                bool first_op, bool last_op);
837
838 struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
839 {
840         struct dma_fence *fence = NULL;
841         struct xe_vma *vma, *next;
842
843         lockdep_assert_held(&vm->lock);
844         if (xe_vm_no_dma_fences(vm) && !rebind_worker)
845                 return NULL;
846
847         xe_vm_assert_held(vm);
848         list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) {
849                 XE_WARN_ON(!vma->tile_present);
850
851                 list_del_init(&vma->rebind_link);
852                 dma_fence_put(fence);
853                 if (rebind_worker)
854                         trace_xe_vma_rebind_worker(vma);
855                 else
856                         trace_xe_vma_rebind_exec(vma);
857                 fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
858                 if (IS_ERR(fence))
859                         return fence;
860         }
861
862         return fence;
863 }
864
865 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
866                                     struct xe_bo *bo,
867                                     u64 bo_offset_or_userptr,
868                                     u64 start, u64 end,
869                                     bool read_only,
870                                     bool is_null,
871                                     u64 tile_mask)
872 {
873         struct xe_vma *vma;
874         struct xe_tile *tile;
875         u8 id;
876
877         XE_BUG_ON(start >= end);
878         XE_BUG_ON(end >= vm->size);
879
880         vma = kzalloc(sizeof(*vma), GFP_KERNEL);
881         if (!vma) {
882                 vma = ERR_PTR(-ENOMEM);
883                 return vma;
884         }
885
886         INIT_LIST_HEAD(&vma->rebind_link);
887         INIT_LIST_HEAD(&vma->unbind_link);
888         INIT_LIST_HEAD(&vma->userptr_link);
889         INIT_LIST_HEAD(&vma->userptr.invalidate_link);
890         INIT_LIST_HEAD(&vma->notifier.rebind_link);
891         INIT_LIST_HEAD(&vma->extobj.link);
892
893         INIT_LIST_HEAD(&vma->gpuva.gem.entry);
894         vma->gpuva.vm = &vm->gpuvm;
895         vma->gpuva.va.addr = start;
896         vma->gpuva.va.range = end - start + 1;
897         if (read_only)
898                 vma->gpuva.flags |= XE_VMA_READ_ONLY;
899         if (is_null)
900                 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
901
902         if (tile_mask) {
903                 vma->tile_mask = tile_mask;
904         } else {
905                 for_each_tile(tile, vm->xe, id)
906                         vma->tile_mask |= 0x1 << id;
907         }
908
909         if (vm->xe->info.platform == XE_PVC)
910                 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
911
912         if (bo) {
913                 struct drm_gpuvm_bo *vm_bo;
914
915                 xe_bo_assert_held(bo);
916
917                 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
918                 if (IS_ERR(vm_bo)) {
919                         kfree(vma);
920                         return ERR_CAST(vm_bo);
921                 }
922
923                 drm_gem_object_get(&bo->ttm.base);
924                 vma->gpuva.gem.obj = &bo->ttm.base;
925                 vma->gpuva.gem.offset = bo_offset_or_userptr;
926                 drm_gpuva_link(&vma->gpuva, vm_bo);
927                 drm_gpuvm_bo_put(vm_bo);
928         } else /* userptr or null */ {
929                 if (!is_null) {
930                         u64 size = end - start + 1;
931                         int err;
932
933                         vma->gpuva.gem.offset = bo_offset_or_userptr;
934
935                         err = mmu_interval_notifier_insert(&vma->userptr.notifier,
936                                                            current->mm,
937                                                            xe_vma_userptr(vma), size,
938                                                            &vma_userptr_notifier_ops);
939                         if (err) {
940                                 kfree(vma);
941                                 vma = ERR_PTR(err);
942                                 return vma;
943                         }
944
945                         vma->userptr.notifier_seq = LONG_MAX;
946                 }
947
948                 xe_vm_get(vm);
949         }
950
951         return vma;
952 }
953
954 static bool vm_remove_extobj(struct xe_vma *vma)
955 {
956         if (!list_empty(&vma->extobj.link)) {
957                 xe_vma_vm(vma)->extobj.entries--;
958                 list_del_init(&vma->extobj.link);
959                 return true;
960         }
961         return false;
962 }
963
964 static void xe_vma_destroy_late(struct xe_vma *vma)
965 {
966         struct xe_vm *vm = xe_vma_vm(vma);
967         struct xe_device *xe = vm->xe;
968         bool read_only = xe_vma_read_only(vma);
969
970         if (xe_vma_is_userptr(vma)) {
971                 if (vma->userptr.sg) {
972                         dma_unmap_sgtable(xe->drm.dev,
973                                           vma->userptr.sg,
974                                           read_only ? DMA_TO_DEVICE :
975                                           DMA_BIDIRECTIONAL, 0);
976                         sg_free_table(vma->userptr.sg);
977                         vma->userptr.sg = NULL;
978                 }
979
980                 /*
981                  * Since userptr pages are not pinned, we can't remove
982                  * the notifer until we're sure the GPU is not accessing
983                  * them anymore
984                  */
985                 mmu_interval_notifier_remove(&vma->userptr.notifier);
986                 xe_vm_put(vm);
987         } else if (xe_vma_is_null(vma)) {
988                 xe_vm_put(vm);
989         } else {
990                 xe_bo_put(xe_vma_bo(vma));
991         }
992
993         kfree(vma);
994 }
995
996 static void vma_destroy_work_func(struct work_struct *w)
997 {
998         struct xe_vma *vma =
999                 container_of(w, struct xe_vma, destroy_work);
1000
1001         xe_vma_destroy_late(vma);
1002 }
1003
1004 static struct xe_vma *
1005 bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
1006                             struct xe_vma *ignore)
1007 {
1008         struct drm_gpuvm_bo *vm_bo;
1009         struct drm_gpuva *va;
1010         struct drm_gem_object *obj = &bo->ttm.base;
1011
1012         xe_bo_assert_held(bo);
1013
1014         drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
1015                 drm_gpuvm_bo_for_each_va(va, vm_bo) {
1016                         struct xe_vma *vma = gpuva_to_vma(va);
1017
1018                         if (vma != ignore && xe_vma_vm(vma) == vm)
1019                                 return vma;
1020                 }
1021         }
1022
1023         return NULL;
1024 }
1025
1026 static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm,
1027                                  struct xe_vma *ignore)
1028 {
1029         struct ww_acquire_ctx ww;
1030         bool ret;
1031
1032         xe_bo_lock(bo, &ww, 0, false);
1033         ret = !!bo_has_vm_references_locked(bo, vm, ignore);
1034         xe_bo_unlock(bo, &ww);
1035
1036         return ret;
1037 }
1038
1039 static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
1040 {
1041         lockdep_assert_held_write(&vm->lock);
1042
1043         list_add(&vma->extobj.link, &vm->extobj.list);
1044         vm->extobj.entries++;
1045 }
1046
1047 static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
1048 {
1049         struct xe_bo *bo = xe_vma_bo(vma);
1050
1051         lockdep_assert_held_write(&vm->lock);
1052
1053         if (bo_has_vm_references(bo, vm, vma))
1054                 return;
1055
1056         __vm_insert_extobj(vm, vma);
1057 }
1058
1059 static void vma_destroy_cb(struct dma_fence *fence,
1060                            struct dma_fence_cb *cb)
1061 {
1062         struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1063
1064         INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1065         queue_work(system_unbound_wq, &vma->destroy_work);
1066 }
1067
1068 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1069 {
1070         struct xe_vm *vm = xe_vma_vm(vma);
1071
1072         lockdep_assert_held_write(&vm->lock);
1073         XE_BUG_ON(!list_empty(&vma->unbind_link));
1074
1075         if (xe_vma_is_userptr(vma)) {
1076                 XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED));
1077
1078                 spin_lock(&vm->userptr.invalidated_lock);
1079                 list_del_init(&vma->userptr.invalidate_link);
1080                 spin_unlock(&vm->userptr.invalidated_lock);
1081                 list_del(&vma->userptr_link);
1082         } else if (!xe_vma_is_null(vma)) {
1083                 xe_bo_assert_held(xe_vma_bo(vma));
1084
1085                 spin_lock(&vm->notifier.list_lock);
1086                 list_del(&vma->notifier.rebind_link);
1087                 spin_unlock(&vm->notifier.list_lock);
1088
1089                 drm_gpuva_unlink(&vma->gpuva);
1090
1091                 if (!xe_vma_bo(vma)->vm && vm_remove_extobj(vma)) {
1092                         struct xe_vma *other;
1093
1094                         other = bo_has_vm_references_locked(xe_vma_bo(vma), vm, NULL);
1095
1096                         if (other)
1097                                 __vm_insert_extobj(vm, other);
1098                 }
1099         }
1100
1101         xe_vm_assert_held(vm);
1102         if (!list_empty(&vma->rebind_link))
1103                 list_del(&vma->rebind_link);
1104
1105         if (fence) {
1106                 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1107                                                  vma_destroy_cb);
1108
1109                 if (ret) {
1110                         XE_WARN_ON(ret != -ENOENT);
1111                         xe_vma_destroy_late(vma);
1112                 }
1113         } else {
1114                 xe_vma_destroy_late(vma);
1115         }
1116 }
1117
1118 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1119 {
1120         struct ttm_validate_buffer tv[2];
1121         struct ww_acquire_ctx ww;
1122         struct xe_bo *bo = xe_vma_bo(vma);
1123         LIST_HEAD(objs);
1124         LIST_HEAD(dups);
1125         int err;
1126
1127         memset(tv, 0, sizeof(tv));
1128         tv[0].bo = xe_vm_ttm_bo(xe_vma_vm(vma));
1129         list_add(&tv[0].head, &objs);
1130
1131         if (bo) {
1132                 tv[1].bo = &xe_bo_get(bo)->ttm;
1133                 list_add(&tv[1].head, &objs);
1134         }
1135         err = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
1136         XE_WARN_ON(err);
1137
1138         xe_vma_destroy(vma, NULL);
1139
1140         ttm_eu_backoff_reservation(&ww, &objs);
1141         if (bo)
1142                 xe_bo_put(bo);
1143 }
1144
1145 struct xe_vma *
1146 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1147 {
1148         struct drm_gpuva *gpuva;
1149
1150         lockdep_assert_held(&vm->lock);
1151
1152         if (xe_vm_is_closed_or_banned(vm))
1153                 return NULL;
1154
1155         XE_BUG_ON(start + range > vm->size);
1156
1157         gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1158
1159         return gpuva ? gpuva_to_vma(gpuva) : NULL;
1160 }
1161
1162 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1163 {
1164         int err;
1165
1166         XE_BUG_ON(xe_vma_vm(vma) != vm);
1167         lockdep_assert_held(&vm->lock);
1168
1169         err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1170         XE_WARN_ON(err);        /* Shouldn't be possible */
1171
1172         return err;
1173 }
1174
1175 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1176 {
1177         XE_BUG_ON(xe_vma_vm(vma) != vm);
1178         lockdep_assert_held(&vm->lock);
1179
1180         drm_gpuva_remove(&vma->gpuva);
1181         if (vm->usm.last_fault_vma == vma)
1182                 vm->usm.last_fault_vma = NULL;
1183 }
1184
1185 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1186 {
1187         struct xe_vma_op *op;
1188
1189         op = kzalloc(sizeof(*op), GFP_KERNEL);
1190
1191         if (unlikely(!op))
1192                 return NULL;
1193
1194         return &op->base;
1195 }
1196
1197 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1198
1199 static struct drm_gpuvm_ops gpuvm_ops = {
1200         .op_alloc = xe_vm_op_alloc,
1201         .vm_free = xe_vm_free,
1202 };
1203
1204 static void xe_vma_op_work_func(struct work_struct *w);
1205 static void vm_destroy_work_func(struct work_struct *w);
1206
1207 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1208 {
1209         struct drm_gem_object *vm_resv_obj;
1210         struct xe_vm *vm;
1211         int err, number_tiles = 0;
1212         struct xe_tile *tile;
1213         u8 id;
1214
1215         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1216         if (!vm)
1217                 return ERR_PTR(-ENOMEM);
1218
1219         vm->xe = xe;
1220
1221         vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1);
1222
1223         vm->flags = flags;
1224
1225         init_rwsem(&vm->lock);
1226
1227         INIT_LIST_HEAD(&vm->rebind_list);
1228
1229         INIT_LIST_HEAD(&vm->userptr.repin_list);
1230         INIT_LIST_HEAD(&vm->userptr.invalidated);
1231         init_rwsem(&vm->userptr.notifier_lock);
1232         spin_lock_init(&vm->userptr.invalidated_lock);
1233
1234         INIT_LIST_HEAD(&vm->notifier.rebind_list);
1235         spin_lock_init(&vm->notifier.list_lock);
1236
1237         INIT_LIST_HEAD(&vm->async_ops.pending);
1238         INIT_WORK(&vm->async_ops.work, xe_vma_op_work_func);
1239         spin_lock_init(&vm->async_ops.lock);
1240
1241         INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1242
1243         INIT_LIST_HEAD(&vm->preempt.engines);
1244         vm->preempt.min_run_period_ms = 10;     /* FIXME: Wire up to uAPI */
1245
1246         INIT_LIST_HEAD(&vm->extobj.list);
1247
1248         if (!(flags & XE_VM_FLAG_MIGRATION))
1249                 xe_device_mem_access_get(xe);
1250
1251         vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1252         if (!vm_resv_obj) {
1253                 err = -ENOMEM;
1254                 goto err_no_resv;
1255         }
1256
1257         drm_gpuvm_init(&vm->gpuvm, "Xe VM", 0, &xe->drm, vm_resv_obj,
1258                        0, vm->size, 0, 0, &gpuvm_ops);
1259
1260         drm_gem_object_put(vm_resv_obj);
1261
1262         err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
1263         if (err)
1264                 goto err_close;
1265
1266         if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1267                 vm->flags |= XE_VM_FLAGS_64K;
1268
1269         for_each_tile(tile, xe, id) {
1270                 if (flags & XE_VM_FLAG_MIGRATION &&
1271                     tile->id != XE_VM_FLAG_GT_ID(flags))
1272                         continue;
1273
1274                 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1275                 if (IS_ERR(vm->pt_root[id])) {
1276                         err = PTR_ERR(vm->pt_root[id]);
1277                         vm->pt_root[id] = NULL;
1278                         goto err_unlock_close;
1279                 }
1280         }
1281
1282         if (flags & XE_VM_FLAG_SCRATCH_PAGE) {
1283                 for_each_tile(tile, xe, id) {
1284                         if (!vm->pt_root[id])
1285                                 continue;
1286
1287                         err = xe_pt_create_scratch(xe, tile, vm);
1288                         if (err)
1289                                 goto err_unlock_close;
1290                 }
1291                 vm->batch_invalidate_tlb = true;
1292         }
1293
1294         if (flags & XE_VM_FLAG_COMPUTE_MODE) {
1295                 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1296                 vm->flags |= XE_VM_FLAG_COMPUTE_MODE;
1297                 vm->batch_invalidate_tlb = false;
1298         }
1299
1300         if (flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
1301                 vm->async_ops.fence.context = dma_fence_context_alloc(1);
1302                 vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
1303         }
1304
1305         /* Fill pt_root after allocating scratch tables */
1306         for_each_tile(tile, xe, id) {
1307                 if (!vm->pt_root[id])
1308                         continue;
1309
1310                 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1311         }
1312         dma_resv_unlock(xe_vm_resv(vm));
1313
1314         /* Kernel migration VM shouldn't have a circular loop.. */
1315         if (!(flags & XE_VM_FLAG_MIGRATION)) {
1316                 for_each_tile(tile, xe, id) {
1317                         struct xe_gt *gt = tile->primary_gt;
1318                         struct xe_vm *migrate_vm;
1319                         struct xe_engine *eng;
1320
1321                         if (!vm->pt_root[id])
1322                                 continue;
1323
1324                         migrate_vm = xe_migrate_get_vm(tile->migrate);
1325                         eng = xe_engine_create_class(xe, gt, migrate_vm,
1326                                                      XE_ENGINE_CLASS_COPY,
1327                                                      ENGINE_FLAG_VM);
1328                         xe_vm_put(migrate_vm);
1329                         if (IS_ERR(eng)) {
1330                                 err = PTR_ERR(eng);
1331                                 goto err_close;
1332                         }
1333                         vm->eng[id] = eng;
1334                         number_tiles++;
1335                 }
1336         }
1337
1338         if (number_tiles > 1)
1339                 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1340
1341         mutex_lock(&xe->usm.lock);
1342         if (flags & XE_VM_FLAG_FAULT_MODE)
1343                 xe->usm.num_vm_in_fault_mode++;
1344         else if (!(flags & XE_VM_FLAG_MIGRATION))
1345                 xe->usm.num_vm_in_non_fault_mode++;
1346         mutex_unlock(&xe->usm.lock);
1347
1348         trace_xe_vm_create(vm);
1349
1350         return vm;
1351
1352 err_unlock_close:
1353         dma_resv_unlock(xe_vm_resv(vm));
1354 err_close:
1355         xe_vm_close_and_put(vm);
1356         return ERR_PTR(err);
1357
1358 err_no_resv:
1359         kfree(vm);
1360         if (!(flags & XE_VM_FLAG_MIGRATION))
1361                 xe_device_mem_access_put(xe);
1362         return ERR_PTR(err);
1363 }
1364
1365 static void flush_async_ops(struct xe_vm *vm)
1366 {
1367         queue_work(system_unbound_wq, &vm->async_ops.work);
1368         flush_work(&vm->async_ops.work);
1369 }
1370
1371 static void vm_error_capture(struct xe_vm *vm, int err,
1372                              u32 op, u64 addr, u64 size)
1373 {
1374         struct drm_xe_vm_bind_op_error_capture capture;
1375         u64 __user *address =
1376                 u64_to_user_ptr(vm->async_ops.error_capture.addr);
1377         bool in_kthread = !current->mm;
1378
1379         capture.error = err;
1380         capture.op = op;
1381         capture.addr = addr;
1382         capture.size = size;
1383
1384         if (in_kthread) {
1385                 if (!mmget_not_zero(vm->async_ops.error_capture.mm))
1386                         goto mm_closed;
1387                 kthread_use_mm(vm->async_ops.error_capture.mm);
1388         }
1389
1390         if (copy_to_user(address, &capture, sizeof(capture)))
1391                 XE_WARN_ON("Copy to user failed");
1392
1393         if (in_kthread) {
1394                 kthread_unuse_mm(vm->async_ops.error_capture.mm);
1395                 mmput(vm->async_ops.error_capture.mm);
1396         }
1397
1398 mm_closed:
1399         wake_up_all(&vm->async_ops.error_capture.wq);
1400 }
1401
1402 static void xe_vm_close(struct xe_vm *vm)
1403 {
1404         down_write(&vm->lock);
1405         vm->size = 0;
1406         up_write(&vm->lock);
1407 }
1408
1409 void xe_vm_close_and_put(struct xe_vm *vm)
1410 {
1411         LIST_HEAD(contested);
1412         struct ww_acquire_ctx ww;
1413         struct xe_device *xe = vm->xe;
1414         struct xe_tile *tile;
1415         struct xe_vma *vma, *next_vma;
1416         struct drm_gpuva *gpuva, *next;
1417         u8 id;
1418
1419         XE_BUG_ON(vm->preempt.num_engines);
1420
1421         xe_vm_close(vm);
1422         flush_async_ops(vm);
1423         if (xe_vm_in_compute_mode(vm))
1424                 flush_work(&vm->preempt.rebind_work);
1425
1426         for_each_tile(tile, xe, id) {
1427                 if (vm->eng[id]) {
1428                         xe_engine_kill(vm->eng[id]);
1429                         xe_engine_put(vm->eng[id]);
1430                         vm->eng[id] = NULL;
1431                 }
1432         }
1433
1434         down_write(&vm->lock);
1435         xe_vm_lock(vm, &ww, 0, false);
1436         drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1437                 vma = gpuva_to_vma(gpuva);
1438
1439                 if (xe_vma_has_no_bo(vma)) {
1440                         down_read(&vm->userptr.notifier_lock);
1441                         vma->gpuva.flags |= XE_VMA_DESTROYED;
1442                         up_read(&vm->userptr.notifier_lock);
1443                 }
1444
1445                 xe_vm_remove_vma(vm, vma);
1446
1447                 /* easy case, remove from VMA? */
1448                 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1449                         xe_vma_destroy(vma, NULL);
1450                         continue;
1451                 }
1452
1453                 list_add_tail(&vma->unbind_link, &contested);
1454         }
1455
1456         /*
1457          * All vm operations will add shared fences to resv.
1458          * The only exception is eviction for a shared object,
1459          * but even so, the unbind when evicted would still
1460          * install a fence to resv. Hence it's safe to
1461          * destroy the pagetables immediately.
1462          */
1463         for_each_tile(tile, xe, id) {
1464                 if (vm->scratch_bo[id]) {
1465                         u32 i;
1466
1467                         xe_bo_unpin(vm->scratch_bo[id]);
1468                         xe_bo_put(vm->scratch_bo[id]);
1469                         for (i = 0; i < vm->pt_root[id]->level; i++)
1470                                 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags,
1471                                               NULL);
1472                 }
1473                 if (vm->pt_root[id]) {
1474                         xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1475                         vm->pt_root[id] = NULL;
1476                 }
1477         }
1478         xe_vm_unlock(vm, &ww);
1479
1480         /*
1481          * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1482          * Since we hold a refcount to the bo, we can remove and free
1483          * the members safely without locking.
1484          */
1485         list_for_each_entry_safe(vma, next_vma, &contested, unbind_link) {
1486                 list_del_init(&vma->unbind_link);
1487                 xe_vma_destroy_unlocked(vma);
1488         }
1489
1490         if (vm->async_ops.error_capture.addr)
1491                 wake_up_all(&vm->async_ops.error_capture.wq);
1492
1493         XE_WARN_ON(!list_empty(&vm->extobj.list));
1494         up_write(&vm->lock);
1495
1496         mutex_lock(&xe->usm.lock);
1497         if (vm->flags & XE_VM_FLAG_FAULT_MODE)
1498                 xe->usm.num_vm_in_fault_mode--;
1499         else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
1500                 xe->usm.num_vm_in_non_fault_mode--;
1501         mutex_unlock(&xe->usm.lock);
1502
1503         xe_vm_put(vm);
1504 }
1505
1506 static void vm_destroy_work_func(struct work_struct *w)
1507 {
1508         struct xe_vm *vm =
1509                 container_of(w, struct xe_vm, destroy_work);
1510         struct xe_device *xe = vm->xe;
1511         struct xe_tile *tile;
1512         u8 id;
1513         void *lookup;
1514
1515         /* xe_vm_close_and_put was not called? */
1516         XE_WARN_ON(vm->size);
1517
1518         if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
1519                 xe_device_mem_access_put(xe);
1520
1521                 if (xe->info.has_asid) {
1522                         mutex_lock(&xe->usm.lock);
1523                         lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1524                         XE_WARN_ON(lookup != vm);
1525                         mutex_unlock(&xe->usm.lock);
1526                 }
1527         }
1528
1529         for_each_tile(tile, xe, id)
1530                 XE_WARN_ON(vm->pt_root[id]);
1531
1532         trace_xe_vm_free(vm);
1533         dma_fence_put(vm->rebind_fence);
1534         kfree(vm);
1535 }
1536
1537 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1538 {
1539         struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1540
1541         /* To destroy the VM we need to be able to sleep */
1542         queue_work(system_unbound_wq, &vm->destroy_work);
1543 }
1544
1545 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1546 {
1547         struct xe_vm *vm;
1548
1549         mutex_lock(&xef->vm.lock);
1550         vm = xa_load(&xef->vm.xa, id);
1551         if (vm)
1552                 xe_vm_get(vm);
1553         mutex_unlock(&xef->vm.lock);
1554
1555         return vm;
1556 }
1557
1558 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1559 {
1560         return xe_pde_encode(vm->pt_root[tile->id]->bo, 0,
1561                              XE_CACHE_WB);
1562 }
1563
1564 static struct dma_fence *
1565 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
1566                  struct xe_sync_entry *syncs, u32 num_syncs,
1567                  bool first_op, bool last_op)
1568 {
1569         struct xe_tile *tile;
1570         struct dma_fence *fence = NULL;
1571         struct dma_fence **fences = NULL;
1572         struct dma_fence_array *cf = NULL;
1573         struct xe_vm *vm = xe_vma_vm(vma);
1574         int cur_fence = 0, i;
1575         int number_tiles = hweight_long(vma->tile_present);
1576         int err;
1577         u8 id;
1578
1579         trace_xe_vma_unbind(vma);
1580
1581         if (number_tiles > 1) {
1582                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1583                                        GFP_KERNEL);
1584                 if (!fences)
1585                         return ERR_PTR(-ENOMEM);
1586         }
1587
1588         for_each_tile(tile, vm->xe, id) {
1589                 if (!(vma->tile_present & BIT(id)))
1590                         goto next;
1591
1592                 fence = __xe_pt_unbind_vma(tile, vma, e, first_op ? syncs : NULL,
1593                                            first_op ? num_syncs : 0);
1594                 if (IS_ERR(fence)) {
1595                         err = PTR_ERR(fence);
1596                         goto err_fences;
1597                 }
1598
1599                 if (fences)
1600                         fences[cur_fence++] = fence;
1601
1602 next:
1603                 if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
1604                         e = list_next_entry(e, multi_gt_list);
1605         }
1606
1607         if (fences) {
1608                 cf = dma_fence_array_create(number_tiles, fences,
1609                                             vm->composite_fence_ctx,
1610                                             vm->composite_fence_seqno++,
1611                                             false);
1612                 if (!cf) {
1613                         --vm->composite_fence_seqno;
1614                         err = -ENOMEM;
1615                         goto err_fences;
1616                 }
1617         }
1618
1619         if (last_op) {
1620                 for (i = 0; i < num_syncs; i++)
1621                         xe_sync_entry_signal(&syncs[i], NULL,
1622                                              cf ? &cf->base : fence);
1623         }
1624
1625         return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
1626
1627 err_fences:
1628         if (fences) {
1629                 while (cur_fence) {
1630                         /* FIXME: Rewind the previous binds? */
1631                         dma_fence_put(fences[--cur_fence]);
1632                 }
1633                 kfree(fences);
1634         }
1635
1636         return ERR_PTR(err);
1637 }
1638
1639 static struct dma_fence *
1640 xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
1641                struct xe_sync_entry *syncs, u32 num_syncs,
1642                bool first_op, bool last_op)
1643 {
1644         struct xe_tile *tile;
1645         struct dma_fence *fence;
1646         struct dma_fence **fences = NULL;
1647         struct dma_fence_array *cf = NULL;
1648         struct xe_vm *vm = xe_vma_vm(vma);
1649         int cur_fence = 0, i;
1650         int number_tiles = hweight_long(vma->tile_mask);
1651         int err;
1652         u8 id;
1653
1654         trace_xe_vma_bind(vma);
1655
1656         if (number_tiles > 1) {
1657                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1658                                        GFP_KERNEL);
1659                 if (!fences)
1660                         return ERR_PTR(-ENOMEM);
1661         }
1662
1663         for_each_tile(tile, vm->xe, id) {
1664                 if (!(vma->tile_mask & BIT(id)))
1665                         goto next;
1666
1667                 fence = __xe_pt_bind_vma(tile, vma, e, first_op ? syncs : NULL,
1668                                          first_op ? num_syncs : 0,
1669                                          vma->tile_present & BIT(id));
1670                 if (IS_ERR(fence)) {
1671                         err = PTR_ERR(fence);
1672                         goto err_fences;
1673                 }
1674
1675                 if (fences)
1676                         fences[cur_fence++] = fence;
1677
1678 next:
1679                 if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
1680                         e = list_next_entry(e, multi_gt_list);
1681         }
1682
1683         if (fences) {
1684                 cf = dma_fence_array_create(number_tiles, fences,
1685                                             vm->composite_fence_ctx,
1686                                             vm->composite_fence_seqno++,
1687                                             false);
1688                 if (!cf) {
1689                         --vm->composite_fence_seqno;
1690                         err = -ENOMEM;
1691                         goto err_fences;
1692                 }
1693         }
1694
1695         if (last_op) {
1696                 for (i = 0; i < num_syncs; i++)
1697                         xe_sync_entry_signal(&syncs[i], NULL,
1698                                              cf ? &cf->base : fence);
1699         }
1700
1701         return cf ? &cf->base : fence;
1702
1703 err_fences:
1704         if (fences) {
1705                 while (cur_fence) {
1706                         /* FIXME: Rewind the previous binds? */
1707                         dma_fence_put(fences[--cur_fence]);
1708                 }
1709                 kfree(fences);
1710         }
1711
1712         return ERR_PTR(err);
1713 }
1714
1715 struct async_op_fence {
1716         struct dma_fence fence;
1717         struct dma_fence *wait_fence;
1718         struct dma_fence_cb cb;
1719         struct xe_vm *vm;
1720         wait_queue_head_t wq;
1721         bool started;
1722 };
1723
1724 static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence)
1725 {
1726         return "xe";
1727 }
1728
1729 static const char *
1730 async_op_fence_get_timeline_name(struct dma_fence *dma_fence)
1731 {
1732         return "async_op_fence";
1733 }
1734
1735 static const struct dma_fence_ops async_op_fence_ops = {
1736         .get_driver_name = async_op_fence_get_driver_name,
1737         .get_timeline_name = async_op_fence_get_timeline_name,
1738 };
1739
1740 static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
1741 {
1742         struct async_op_fence *afence =
1743                 container_of(cb, struct async_op_fence, cb);
1744
1745         afence->fence.error = afence->wait_fence->error;
1746         dma_fence_signal(&afence->fence);
1747         xe_vm_put(afence->vm);
1748         dma_fence_put(afence->wait_fence);
1749         dma_fence_put(&afence->fence);
1750 }
1751
1752 static void add_async_op_fence_cb(struct xe_vm *vm,
1753                                   struct dma_fence *fence,
1754                                   struct async_op_fence *afence)
1755 {
1756         int ret;
1757
1758         if (!xe_vm_no_dma_fences(vm)) {
1759                 afence->started = true;
1760                 smp_wmb();
1761                 wake_up_all(&afence->wq);
1762         }
1763
1764         afence->wait_fence = dma_fence_get(fence);
1765         afence->vm = xe_vm_get(vm);
1766         dma_fence_get(&afence->fence);
1767         ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb);
1768         if (ret == -ENOENT) {
1769                 afence->fence.error = afence->wait_fence->error;
1770                 dma_fence_signal(&afence->fence);
1771         }
1772         if (ret) {
1773                 xe_vm_put(vm);
1774                 dma_fence_put(afence->wait_fence);
1775                 dma_fence_put(&afence->fence);
1776         }
1777         XE_WARN_ON(ret && ret != -ENOENT);
1778 }
1779
1780 int xe_vm_async_fence_wait_start(struct dma_fence *fence)
1781 {
1782         if (fence->ops == &async_op_fence_ops) {
1783                 struct async_op_fence *afence =
1784                         container_of(fence, struct async_op_fence, fence);
1785
1786                 XE_BUG_ON(xe_vm_no_dma_fences(afence->vm));
1787
1788                 smp_rmb();
1789                 return wait_event_interruptible(afence->wq, afence->started);
1790         }
1791
1792         return 0;
1793 }
1794
1795 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
1796                         struct xe_engine *e, struct xe_sync_entry *syncs,
1797                         u32 num_syncs, struct async_op_fence *afence,
1798                         bool immediate, bool first_op, bool last_op)
1799 {
1800         struct dma_fence *fence;
1801
1802         xe_vm_assert_held(vm);
1803
1804         if (immediate) {
1805                 fence = xe_vm_bind_vma(vma, e, syncs, num_syncs, first_op,
1806                                        last_op);
1807                 if (IS_ERR(fence))
1808                         return PTR_ERR(fence);
1809         } else {
1810                 int i;
1811
1812                 XE_BUG_ON(!xe_vm_in_fault_mode(vm));
1813
1814                 fence = dma_fence_get_stub();
1815                 if (last_op) {
1816                         for (i = 0; i < num_syncs; i++)
1817                                 xe_sync_entry_signal(&syncs[i], NULL, fence);
1818                 }
1819         }
1820         if (afence)
1821                 add_async_op_fence_cb(vm, fence, afence);
1822
1823         dma_fence_put(fence);
1824         return 0;
1825 }
1826
1827 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e,
1828                       struct xe_bo *bo, struct xe_sync_entry *syncs,
1829                       u32 num_syncs, struct async_op_fence *afence,
1830                       bool immediate, bool first_op, bool last_op)
1831 {
1832         int err;
1833
1834         xe_vm_assert_held(vm);
1835         xe_bo_assert_held(bo);
1836
1837         if (bo && immediate) {
1838                 err = xe_bo_validate(bo, vm, true);
1839                 if (err)
1840                         return err;
1841         }
1842
1843         return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence, immediate,
1844                             first_op, last_op);
1845 }
1846
1847 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
1848                         struct xe_engine *e, struct xe_sync_entry *syncs,
1849                         u32 num_syncs, struct async_op_fence *afence,
1850                         bool first_op, bool last_op)
1851 {
1852         struct dma_fence *fence;
1853
1854         xe_vm_assert_held(vm);
1855         xe_bo_assert_held(xe_vma_bo(vma));
1856
1857         fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs, first_op, last_op);
1858         if (IS_ERR(fence))
1859                 return PTR_ERR(fence);
1860         if (afence)
1861                 add_async_op_fence_cb(vm, fence, afence);
1862
1863         xe_vma_destroy(vma, fence);
1864         dma_fence_put(fence);
1865
1866         return 0;
1867 }
1868
1869 static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm,
1870                                         u64 value)
1871 {
1872         if (XE_IOCTL_DBG(xe, !value))
1873                 return -EINVAL;
1874
1875         if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
1876                 return -EOPNOTSUPP;
1877
1878         if (XE_IOCTL_DBG(xe, vm->async_ops.error_capture.addr))
1879                 return -EOPNOTSUPP;
1880
1881         vm->async_ops.error_capture.mm = current->mm;
1882         vm->async_ops.error_capture.addr = value;
1883         init_waitqueue_head(&vm->async_ops.error_capture.wq);
1884
1885         return 0;
1886 }
1887
1888 typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm,
1889                                      u64 value);
1890
1891 static const xe_vm_set_property_fn vm_set_property_funcs[] = {
1892         [XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] =
1893                 vm_set_error_capture_address,
1894 };
1895
1896 static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
1897                                     u64 extension)
1898 {
1899         u64 __user *address = u64_to_user_ptr(extension);
1900         struct drm_xe_ext_vm_set_property ext;
1901         int err;
1902
1903         err = __copy_from_user(&ext, address, sizeof(ext));
1904         if (XE_IOCTL_DBG(xe, err))
1905                 return -EFAULT;
1906
1907         if (XE_IOCTL_DBG(xe, ext.property >=
1908                          ARRAY_SIZE(vm_set_property_funcs)) ||
1909             XE_IOCTL_DBG(xe, ext.pad) ||
1910             XE_IOCTL_DBG(xe, ext.reserved[0] || ext.reserved[1]))
1911                 return -EINVAL;
1912
1913         return vm_set_property_funcs[ext.property](xe, vm, ext.value);
1914 }
1915
1916 typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm,
1917                                        u64 extension);
1918
1919 static const xe_vm_set_property_fn vm_user_extension_funcs[] = {
1920         [XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property,
1921 };
1922
1923 #define MAX_USER_EXTENSIONS     16
1924 static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm,
1925                               u64 extensions, int ext_number)
1926 {
1927         u64 __user *address = u64_to_user_ptr(extensions);
1928         struct xe_user_extension ext;
1929         int err;
1930
1931         if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
1932                 return -E2BIG;
1933
1934         err = __copy_from_user(&ext, address, sizeof(ext));
1935         if (XE_IOCTL_DBG(xe, err))
1936                 return -EFAULT;
1937
1938         if (XE_IOCTL_DBG(xe, ext.pad) ||
1939             XE_IOCTL_DBG(xe, ext.name >=
1940                          ARRAY_SIZE(vm_user_extension_funcs)))
1941                 return -EINVAL;
1942
1943         err = vm_user_extension_funcs[ext.name](xe, vm, extensions);
1944         if (XE_IOCTL_DBG(xe, err))
1945                 return err;
1946
1947         if (ext.next_extension)
1948                 return vm_user_extensions(xe, vm, ext.next_extension,
1949                                           ++ext_number);
1950
1951         return 0;
1952 }
1953
1954 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \
1955                                     DRM_XE_VM_CREATE_COMPUTE_MODE | \
1956                                     DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \
1957                                     DRM_XE_VM_CREATE_FAULT_MODE)
1958
1959 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1960                        struct drm_file *file)
1961 {
1962         struct xe_device *xe = to_xe_device(dev);
1963         struct xe_file *xef = to_xe_file(file);
1964         struct drm_xe_vm_create *args = data;
1965         struct xe_vm *vm;
1966         u32 id, asid;
1967         int err;
1968         u32 flags = 0;
1969
1970         if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1971                 return -EINVAL;
1972
1973         if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1974                 return -EINVAL;
1975
1976         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE &&
1977                          args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
1978                 return -EINVAL;
1979
1980         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE &&
1981                          args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
1982                 return -EINVAL;
1983
1984         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
1985                          xe_device_in_non_fault_mode(xe)))
1986                 return -EINVAL;
1987
1988         if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) &&
1989                          xe_device_in_fault_mode(xe)))
1990                 return -EINVAL;
1991
1992         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
1993                          !xe->info.supports_usm))
1994                 return -EINVAL;
1995
1996         if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE)
1997                 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1998         if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE)
1999                 flags |= XE_VM_FLAG_COMPUTE_MODE;
2000         if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS)
2001                 flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
2002         if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE)
2003                 flags |= XE_VM_FLAG_FAULT_MODE;
2004
2005         vm = xe_vm_create(xe, flags);
2006         if (IS_ERR(vm))
2007                 return PTR_ERR(vm);
2008
2009         if (args->extensions) {
2010                 err = vm_user_extensions(xe, vm, args->extensions, 0);
2011                 if (XE_IOCTL_DBG(xe, err)) {
2012                         xe_vm_close_and_put(vm);
2013                         return err;
2014                 }
2015         }
2016
2017         mutex_lock(&xef->vm.lock);
2018         err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
2019         mutex_unlock(&xef->vm.lock);
2020         if (err) {
2021                 xe_vm_close_and_put(vm);
2022                 return err;
2023         }
2024
2025         if (xe->info.has_asid) {
2026                 mutex_lock(&xe->usm.lock);
2027                 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
2028                                       XA_LIMIT(0, XE_MAX_ASID - 1),
2029                                       &xe->usm.next_asid, GFP_KERNEL);
2030                 mutex_unlock(&xe->usm.lock);
2031                 if (err) {
2032                         xe_vm_close_and_put(vm);
2033                         return err;
2034                 }
2035                 vm->usm.asid = asid;
2036         }
2037
2038         args->vm_id = id;
2039
2040 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
2041         /* Warning: Security issue - never enable by default */
2042         args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
2043 #endif
2044
2045         return 0;
2046 }
2047
2048 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
2049                         struct drm_file *file)
2050 {
2051         struct xe_device *xe = to_xe_device(dev);
2052         struct xe_file *xef = to_xe_file(file);
2053         struct drm_xe_vm_destroy *args = data;
2054         struct xe_vm *vm;
2055         int err = 0;
2056
2057         if (XE_IOCTL_DBG(xe, args->pad) ||
2058             XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2059                 return -EINVAL;
2060
2061         mutex_lock(&xef->vm.lock);
2062         vm = xa_load(&xef->vm.xa, args->vm_id);
2063         if (XE_IOCTL_DBG(xe, !vm))
2064                 err = -ENOENT;
2065         else if (XE_IOCTL_DBG(xe, vm->preempt.num_engines))
2066                 err = -EBUSY;
2067         else
2068                 xa_erase(&xef->vm.xa, args->vm_id);
2069         mutex_unlock(&xef->vm.lock);
2070
2071         if (!err)
2072                 xe_vm_close_and_put(vm);
2073
2074         return err;
2075 }
2076
2077 static const u32 region_to_mem_type[] = {
2078         XE_PL_TT,
2079         XE_PL_VRAM0,
2080         XE_PL_VRAM1,
2081 };
2082
2083 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
2084                           struct xe_engine *e, u32 region,
2085                           struct xe_sync_entry *syncs, u32 num_syncs,
2086                           struct async_op_fence *afence, bool first_op,
2087                           bool last_op)
2088 {
2089         int err;
2090
2091         XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type));
2092
2093         if (!xe_vma_has_no_bo(vma)) {
2094                 err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
2095                 if (err)
2096                         return err;
2097         }
2098
2099         if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
2100                 return xe_vm_bind(vm, vma, e, xe_vma_bo(vma), syncs, num_syncs,
2101                                   afence, true, first_op, last_op);
2102         } else {
2103                 int i;
2104
2105                 /* Nothing to do, signal fences now */
2106                 if (last_op) {
2107                         for (i = 0; i < num_syncs; i++)
2108                                 xe_sync_entry_signal(&syncs[i], NULL,
2109                                                      dma_fence_get_stub());
2110                 }
2111                 if (afence)
2112                         dma_fence_signal(&afence->fence);
2113                 return 0;
2114         }
2115 }
2116
2117 #define VM_BIND_OP(op)  (op & 0xffff)
2118
2119 struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm)
2120 {
2121         int idx = vm->flags & XE_VM_FLAG_MIGRATION ?
2122                 XE_VM_FLAG_GT_ID(vm->flags) : 0;
2123
2124         /* Safe to use index 0 as all BO in the VM share a single dma-resv lock */
2125         return &vm->pt_root[idx]->bo->ttm;
2126 }
2127
2128 static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv)
2129 {
2130         tv->num_shared = 1;
2131         tv->bo = xe_vm_ttm_bo(vm);
2132 }
2133
2134 static void vm_set_async_error(struct xe_vm *vm, int err)
2135 {
2136         lockdep_assert_held(&vm->lock);
2137         vm->async_ops.error = err;
2138 }
2139
2140 static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
2141                                     u64 addr, u64 range, u32 op)
2142 {
2143         struct xe_device *xe = vm->xe;
2144         struct xe_vma *vma;
2145         bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
2146
2147         lockdep_assert_held(&vm->lock);
2148
2149         switch (VM_BIND_OP(op)) {
2150         case XE_VM_BIND_OP_MAP:
2151         case XE_VM_BIND_OP_MAP_USERPTR:
2152                 vma = xe_vm_find_overlapping_vma(vm, addr, range);
2153                 if (XE_IOCTL_DBG(xe, vma && !async))
2154                         return -EBUSY;
2155                 break;
2156         case XE_VM_BIND_OP_UNMAP:
2157         case XE_VM_BIND_OP_PREFETCH:
2158                 vma = xe_vm_find_overlapping_vma(vm, addr, range);
2159                 if (XE_IOCTL_DBG(xe, !vma))
2160                         /* Not an actual error, IOCTL cleans up returns and 0 */
2161                         return -ENODATA;
2162                 if (XE_IOCTL_DBG(xe, (xe_vma_start(vma) != addr ||
2163                                       xe_vma_end(vma) != addr + range) && !async))
2164                         return -EINVAL;
2165                 break;
2166         case XE_VM_BIND_OP_UNMAP_ALL:
2167                 if (XE_IOCTL_DBG(xe, list_empty(&bo->ttm.base.gpuva.list)))
2168                         /* Not an actual error, IOCTL cleans up returns and 0 */
2169                         return -ENODATA;
2170                 break;
2171         default:
2172                 XE_BUG_ON("NOT POSSIBLE");
2173                 return -EINVAL;
2174         }
2175
2176         return 0;
2177 }
2178
2179 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2180                              bool post_commit)
2181 {
2182         down_read(&vm->userptr.notifier_lock);
2183         vma->gpuva.flags |= XE_VMA_DESTROYED;
2184         up_read(&vm->userptr.notifier_lock);
2185         if (post_commit)
2186                 xe_vm_remove_vma(vm, vma);
2187 }
2188
2189 #undef ULL
2190 #define ULL     unsigned long long
2191
2192 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
2193 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2194 {
2195         struct xe_vma *vma;
2196
2197         switch (op->op) {
2198         case DRM_GPUVA_OP_MAP:
2199                 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2200                        (ULL)op->map.va.addr, (ULL)op->map.va.range);
2201                 break;
2202         case DRM_GPUVA_OP_REMAP:
2203                 vma = gpuva_to_vma(op->remap.unmap->va);
2204                 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2205                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2206                        op->unmap.keep ? 1 : 0);
2207                 if (op->remap.prev)
2208                         vm_dbg(&xe->drm,
2209                                "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2210                                (ULL)op->remap.prev->va.addr,
2211                                (ULL)op->remap.prev->va.range);
2212                 if (op->remap.next)
2213                         vm_dbg(&xe->drm,
2214                                "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2215                                (ULL)op->remap.next->va.addr,
2216                                (ULL)op->remap.next->va.range);
2217                 break;
2218         case DRM_GPUVA_OP_UNMAP:
2219                 vma = gpuva_to_vma(op->unmap.va);
2220                 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2221                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2222                        op->unmap.keep ? 1 : 0);
2223                 break;
2224         case DRM_GPUVA_OP_PREFETCH:
2225                 vma = gpuva_to_vma(op->prefetch.va);
2226                 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2227                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2228                 break;
2229         default:
2230                 XE_BUG_ON("NOT POSSIBLE");
2231         }
2232 }
2233 #else
2234 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2235 {
2236 }
2237 #endif
2238
2239 /*
2240  * Create operations list from IOCTL arguments, setup operations fields so parse
2241  * and commit steps are decoupled from IOCTL arguments. This step can fail.
2242  */
2243 static struct drm_gpuva_ops *
2244 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
2245                          u64 bo_offset_or_userptr, u64 addr, u64 range,
2246                          u32 operation, u64 tile_mask, u32 region)
2247 {
2248         struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2249         struct ww_acquire_ctx ww;
2250         struct drm_gpuva_ops *ops;
2251         struct drm_gpuva_op *__op;
2252         struct xe_vma_op *op;
2253         struct drm_gpuvm_bo *vm_bo;
2254         int err;
2255
2256         lockdep_assert_held_write(&vm->lock);
2257
2258         vm_dbg(&vm->xe->drm,
2259                "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2260                VM_BIND_OP(operation), (ULL)addr, (ULL)range,
2261                (ULL)bo_offset_or_userptr);
2262
2263         switch (VM_BIND_OP(operation)) {
2264         case XE_VM_BIND_OP_MAP:
2265         case XE_VM_BIND_OP_MAP_USERPTR:
2266                 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2267                                                   obj, bo_offset_or_userptr);
2268                 if (IS_ERR(ops))
2269                         return ops;
2270
2271                 drm_gpuva_for_each_op(__op, ops) {
2272                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2273
2274                         op->tile_mask = tile_mask;
2275                         op->map.immediate =
2276                                 operation & XE_VM_BIND_FLAG_IMMEDIATE;
2277                         op->map.read_only =
2278                                 operation & XE_VM_BIND_FLAG_READONLY;
2279                         op->map.is_null = operation & XE_VM_BIND_FLAG_NULL;
2280                 }
2281                 break;
2282         case XE_VM_BIND_OP_UNMAP:
2283                 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2284                 if (IS_ERR(ops))
2285                         return ops;
2286
2287                 drm_gpuva_for_each_op(__op, ops) {
2288                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2289
2290                         op->tile_mask = tile_mask;
2291                 }
2292                 break;
2293         case XE_VM_BIND_OP_PREFETCH:
2294                 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2295                 if (IS_ERR(ops))
2296                         return ops;
2297
2298                 drm_gpuva_for_each_op(__op, ops) {
2299                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2300
2301                         op->tile_mask = tile_mask;
2302                         op->prefetch.region = region;
2303                 }
2304                 break;
2305         case XE_VM_BIND_OP_UNMAP_ALL:
2306                 XE_BUG_ON(!bo);
2307
2308                 err = xe_bo_lock(bo, &ww, 0, true);
2309                 if (err)
2310                         return ERR_PTR(err);
2311
2312                 vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj);
2313                 if (!vm_bo)
2314                         break;
2315
2316                 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2317                 drm_gpuvm_bo_put(vm_bo);
2318                 xe_bo_unlock(bo, &ww);
2319                 if (IS_ERR(ops))
2320                         return ops;
2321
2322                 drm_gpuva_for_each_op(__op, ops) {
2323                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2324
2325                         op->tile_mask = tile_mask;
2326                 }
2327                 break;
2328         default:
2329                 XE_BUG_ON("NOT POSSIBLE");
2330                 ops = ERR_PTR(-EINVAL);
2331         }
2332
2333 #ifdef TEST_VM_ASYNC_OPS_ERROR
2334         if (operation & FORCE_ASYNC_OP_ERROR) {
2335                 op = list_first_entry_or_null(&ops->list, struct xe_vma_op,
2336                                               base.entry);
2337                 if (op)
2338                         op->inject_error = true;
2339         }
2340 #endif
2341
2342         if (!IS_ERR(ops))
2343                 drm_gpuva_for_each_op(__op, ops)
2344                         print_op(vm->xe, __op);
2345
2346         return ops;
2347 }
2348
2349 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2350                               u64 tile_mask, bool read_only, bool is_null)
2351 {
2352         struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2353         struct xe_vma *vma;
2354         struct ww_acquire_ctx ww;
2355         int err;
2356
2357         lockdep_assert_held_write(&vm->lock);
2358
2359         if (bo) {
2360                 err = xe_bo_lock(bo, &ww, 0, true);
2361                 if (err)
2362                         return ERR_PTR(err);
2363         }
2364         vma = xe_vma_create(vm, bo, op->gem.offset,
2365                             op->va.addr, op->va.addr +
2366                             op->va.range - 1, read_only, is_null,
2367                             tile_mask);
2368         if (bo)
2369                 xe_bo_unlock(bo, &ww);
2370
2371         if (xe_vma_is_userptr(vma)) {
2372                 err = xe_vma_userptr_pin_pages(vma);
2373                 if (err) {
2374                         prep_vma_destroy(vm, vma, false);
2375                         xe_vma_destroy_unlocked(vma);
2376                         return ERR_PTR(err);
2377                 }
2378         } else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
2379                 vm_insert_extobj(vm, vma);
2380                 err = add_preempt_fences(vm, bo);
2381                 if (err) {
2382                         prep_vma_destroy(vm, vma, false);
2383                         xe_vma_destroy_unlocked(vma);
2384                         return ERR_PTR(err);
2385                 }
2386         }
2387
2388         return vma;
2389 }
2390
2391 /*
2392  * Parse operations list and create any resources needed for the operations
2393  * prior to fully committing to the operations. This setup can fail.
2394  */
2395 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
2396                                    struct drm_gpuva_ops **ops, int num_ops_list,
2397                                    struct xe_sync_entry *syncs, u32 num_syncs,
2398                                    struct list_head *ops_list, bool async)
2399 {
2400         struct xe_vma_op *last_op = NULL;
2401         struct list_head *async_list = NULL;
2402         struct async_op_fence *fence = NULL;
2403         int err, i;
2404
2405         lockdep_assert_held_write(&vm->lock);
2406         XE_BUG_ON(num_ops_list > 1 && !async);
2407
2408         if (num_syncs && async) {
2409                 u64 seqno;
2410
2411                 fence = kmalloc(sizeof(*fence), GFP_KERNEL);
2412                 if (!fence)
2413                         return -ENOMEM;
2414
2415                 seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno;
2416                 dma_fence_init(&fence->fence, &async_op_fence_ops,
2417                                &vm->async_ops.lock, e ? e->bind.fence_ctx :
2418                                vm->async_ops.fence.context, seqno);
2419
2420                 if (!xe_vm_no_dma_fences(vm)) {
2421                         fence->vm = vm;
2422                         fence->started = false;
2423                         init_waitqueue_head(&fence->wq);
2424                 }
2425         }
2426
2427         for (i = 0; i < num_ops_list; ++i) {
2428                 struct drm_gpuva_ops *__ops = ops[i];
2429                 struct drm_gpuva_op *__op;
2430
2431                 drm_gpuva_for_each_op(__op, __ops) {
2432                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2433                         bool first = !async_list;
2434
2435                         XE_BUG_ON(!first && !async);
2436
2437                         INIT_LIST_HEAD(&op->link);
2438                         if (first)
2439                                 async_list = ops_list;
2440                         list_add_tail(&op->link, async_list);
2441
2442                         if (first) {
2443                                 op->flags |= XE_VMA_OP_FIRST;
2444                                 op->num_syncs = num_syncs;
2445                                 op->syncs = syncs;
2446                         }
2447
2448                         op->engine = e;
2449
2450                         switch (op->base.op) {
2451                         case DRM_GPUVA_OP_MAP:
2452                         {
2453                                 struct xe_vma *vma;
2454
2455                                 vma = new_vma(vm, &op->base.map,
2456                                               op->tile_mask, op->map.read_only,
2457                                               op->map.is_null);
2458                                 if (IS_ERR(vma)) {
2459                                         err = PTR_ERR(vma);
2460                                         goto free_fence;
2461                                 }
2462
2463                                 op->map.vma = vma;
2464                                 break;
2465                         }
2466                         case DRM_GPUVA_OP_REMAP:
2467                                 if (op->base.remap.prev) {
2468                                         struct xe_vma *vma;
2469                                         bool read_only =
2470                                                 op->base.remap.unmap->va->flags &
2471                                                 XE_VMA_READ_ONLY;
2472                                         bool is_null =
2473                                                 op->base.remap.unmap->va->flags &
2474                                                 DRM_GPUVA_SPARSE;
2475
2476                                         vma = new_vma(vm, op->base.remap.prev,
2477                                                       op->tile_mask, read_only,
2478                                                       is_null);
2479                                         if (IS_ERR(vma)) {
2480                                                 err = PTR_ERR(vma);
2481                                                 goto free_fence;
2482                                         }
2483
2484                                         op->remap.prev = vma;
2485                                 }
2486
2487                                 if (op->base.remap.next) {
2488                                         struct xe_vma *vma;
2489                                         bool read_only =
2490                                                 op->base.remap.unmap->va->flags &
2491                                                 XE_VMA_READ_ONLY;
2492
2493                                         bool is_null =
2494                                                 op->base.remap.unmap->va->flags &
2495                                                 DRM_GPUVA_SPARSE;
2496
2497                                         vma = new_vma(vm, op->base.remap.next,
2498                                                       op->tile_mask, read_only,
2499                                                       is_null);
2500                                         if (IS_ERR(vma)) {
2501                                                 err = PTR_ERR(vma);
2502                                                 goto free_fence;
2503                                         }
2504
2505                                         op->remap.next = vma;
2506                                 }
2507
2508                                 /* XXX: Support no doing remaps */
2509                                 op->remap.start =
2510                                         xe_vma_start(gpuva_to_vma(op->base.remap.unmap->va));
2511                                 op->remap.range =
2512                                         xe_vma_size(gpuva_to_vma(op->base.remap.unmap->va));
2513                                 break;
2514                         case DRM_GPUVA_OP_UNMAP:
2515                                 op->unmap.start =
2516                                         xe_vma_start(gpuva_to_vma(op->base.unmap.va));
2517                                 op->unmap.range =
2518                                         xe_vma_size(gpuva_to_vma(op->base.unmap.va));
2519                                 break;
2520                         case DRM_GPUVA_OP_PREFETCH:
2521                                 /* Nothing to do */
2522                                 break;
2523                         default:
2524                                 XE_BUG_ON("NOT POSSIBLE");
2525                         }
2526
2527                         last_op = op;
2528                 }
2529
2530                 last_op->ops = __ops;
2531         }
2532
2533         if (!last_op)
2534                 return -ENODATA;
2535
2536         last_op->flags |= XE_VMA_OP_LAST;
2537         last_op->num_syncs = num_syncs;
2538         last_op->syncs = syncs;
2539         last_op->fence = fence;
2540
2541         return 0;
2542
2543 free_fence:
2544         kfree(fence);
2545         return err;
2546 }
2547
2548 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2549 {
2550         int err = 0;
2551
2552         lockdep_assert_held_write(&vm->lock);
2553
2554         switch (op->base.op) {
2555         case DRM_GPUVA_OP_MAP:
2556                 err |= xe_vm_insert_vma(vm, op->map.vma);
2557                 break;
2558         case DRM_GPUVA_OP_REMAP:
2559                 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2560                                  true);
2561                 if (op->remap.prev)
2562                         err |= xe_vm_insert_vma(vm, op->remap.prev);
2563                 if (op->remap.next)
2564                         err |= xe_vm_insert_vma(vm, op->remap.next);
2565                 break;
2566         case DRM_GPUVA_OP_UNMAP:
2567                 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2568                 break;
2569         case DRM_GPUVA_OP_PREFETCH:
2570                 /* Nothing to do */
2571                 break;
2572         default:
2573                 XE_BUG_ON("NOT POSSIBLE");
2574         }
2575
2576         op->flags |= XE_VMA_OP_COMMITTED;
2577         return err;
2578 }
2579
2580 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
2581                                struct xe_vma_op *op)
2582 {
2583         LIST_HEAD(objs);
2584         LIST_HEAD(dups);
2585         struct ttm_validate_buffer tv_bo, tv_vm;
2586         struct ww_acquire_ctx ww;
2587         struct xe_bo *vbo;
2588         int err;
2589
2590         lockdep_assert_held_write(&vm->lock);
2591
2592         xe_vm_tv_populate(vm, &tv_vm);
2593         list_add_tail(&tv_vm.head, &objs);
2594         vbo = xe_vma_bo(vma);
2595         if (vbo) {
2596                 /*
2597                  * An unbind can drop the last reference to the BO and
2598                  * the BO is needed for ttm_eu_backoff_reservation so
2599                  * take a reference here.
2600                  */
2601                 xe_bo_get(vbo);
2602
2603                 if (!vbo->vm) {
2604                         tv_bo.bo = &vbo->ttm;
2605                         tv_bo.num_shared = 1;
2606                         list_add(&tv_bo.head, &objs);
2607                 }
2608         }
2609
2610 again:
2611         err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups);
2612         if (err) {
2613                 xe_bo_put(vbo);
2614                 return err;
2615         }
2616
2617         xe_vm_assert_held(vm);
2618         xe_bo_assert_held(xe_vma_bo(vma));
2619
2620         switch (op->base.op) {
2621         case DRM_GPUVA_OP_MAP:
2622                 err = xe_vm_bind(vm, vma, op->engine, xe_vma_bo(vma),
2623                                  op->syncs, op->num_syncs, op->fence,
2624                                  op->map.immediate || !xe_vm_in_fault_mode(vm),
2625                                  op->flags & XE_VMA_OP_FIRST,
2626                                  op->flags & XE_VMA_OP_LAST);
2627                 break;
2628         case DRM_GPUVA_OP_REMAP:
2629         {
2630                 bool prev = !!op->remap.prev;
2631                 bool next = !!op->remap.next;
2632
2633                 if (!op->remap.unmap_done) {
2634                         vm->async_ops.munmap_rebind_inflight = true;
2635                         if (prev || next)
2636                                 vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
2637                         err = xe_vm_unbind(vm, vma, op->engine, op->syncs,
2638                                            op->num_syncs,
2639                                            !prev && !next ? op->fence : NULL,
2640                                            op->flags & XE_VMA_OP_FIRST,
2641                                            op->flags & XE_VMA_OP_LAST && !prev &&
2642                                            !next);
2643                         if (err)
2644                                 break;
2645                         op->remap.unmap_done = true;
2646                 }
2647
2648                 if (prev) {
2649                         op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
2650                         err = xe_vm_bind(vm, op->remap.prev, op->engine,
2651                                          xe_vma_bo(op->remap.prev), op->syncs,
2652                                          op->num_syncs,
2653                                          !next ? op->fence : NULL, true, false,
2654                                          op->flags & XE_VMA_OP_LAST && !next);
2655                         op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2656                         if (err)
2657                                 break;
2658                         op->remap.prev = NULL;
2659                 }
2660
2661                 if (next) {
2662                         op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
2663                         err = xe_vm_bind(vm, op->remap.next, op->engine,
2664                                          xe_vma_bo(op->remap.next),
2665                                          op->syncs, op->num_syncs,
2666                                          op->fence, true, false,
2667                                          op->flags & XE_VMA_OP_LAST);
2668                         op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2669                         if (err)
2670                                 break;
2671                         op->remap.next = NULL;
2672                 }
2673                 vm->async_ops.munmap_rebind_inflight = false;
2674
2675                 break;
2676         }
2677         case DRM_GPUVA_OP_UNMAP:
2678                 err = xe_vm_unbind(vm, vma, op->engine, op->syncs,
2679                                    op->num_syncs, op->fence,
2680                                    op->flags & XE_VMA_OP_FIRST,
2681                                    op->flags & XE_VMA_OP_LAST);
2682                 break;
2683         case DRM_GPUVA_OP_PREFETCH:
2684                 err = xe_vm_prefetch(vm, vma, op->engine, op->prefetch.region,
2685                                      op->syncs, op->num_syncs, op->fence,
2686                                      op->flags & XE_VMA_OP_FIRST,
2687                                      op->flags & XE_VMA_OP_LAST);
2688                 break;
2689         default:
2690                 XE_BUG_ON("NOT POSSIBLE");
2691         }
2692
2693         ttm_eu_backoff_reservation(&ww, &objs);
2694         if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
2695                 lockdep_assert_held_write(&vm->lock);
2696                 err = xe_vma_userptr_pin_pages(vma);
2697                 if (!err)
2698                         goto again;
2699         }
2700         xe_bo_put(vbo);
2701
2702         if (err)
2703                 trace_xe_vma_fail(vma);
2704
2705         return err;
2706 }
2707
2708 static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
2709 {
2710         int ret = 0;
2711
2712         lockdep_assert_held_write(&vm->lock);
2713
2714 #ifdef TEST_VM_ASYNC_OPS_ERROR
2715         if (op->inject_error) {
2716                 op->inject_error = false;
2717                 return -ENOMEM;
2718         }
2719 #endif
2720
2721         switch (op->base.op) {
2722         case DRM_GPUVA_OP_MAP:
2723                 ret = __xe_vma_op_execute(vm, op->map.vma, op);
2724                 break;
2725         case DRM_GPUVA_OP_REMAP:
2726         {
2727                 struct xe_vma *vma;
2728
2729                 if (!op->remap.unmap_done)
2730                         vma = gpuva_to_vma(op->base.remap.unmap->va);
2731                 else if (op->remap.prev)
2732                         vma = op->remap.prev;
2733                 else
2734                         vma = op->remap.next;
2735
2736                 ret = __xe_vma_op_execute(vm, vma, op);
2737                 break;
2738         }
2739         case DRM_GPUVA_OP_UNMAP:
2740                 ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
2741                                           op);
2742                 break;
2743         case DRM_GPUVA_OP_PREFETCH:
2744                 ret = __xe_vma_op_execute(vm,
2745                                           gpuva_to_vma(op->base.prefetch.va),
2746                                           op);
2747                 break;
2748         default:
2749                 XE_BUG_ON("NOT POSSIBLE");
2750         }
2751
2752         return ret;
2753 }
2754
2755 static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
2756 {
2757         bool last = op->flags & XE_VMA_OP_LAST;
2758
2759         if (last) {
2760                 while (op->num_syncs--)
2761                         xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
2762                 kfree(op->syncs);
2763                 if (op->engine)
2764                         xe_engine_put(op->engine);
2765                 if (op->fence)
2766                         dma_fence_put(&op->fence->fence);
2767         }
2768         if (!list_empty(&op->link)) {
2769                 spin_lock_irq(&vm->async_ops.lock);
2770                 list_del(&op->link);
2771                 spin_unlock_irq(&vm->async_ops.lock);
2772         }
2773         if (op->ops)
2774                 drm_gpuva_ops_free(&vm->gpuvm, op->ops);
2775         if (last)
2776                 xe_vm_put(vm);
2777 }
2778
2779 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2780                              bool post_commit)
2781 {
2782         lockdep_assert_held_write(&vm->lock);
2783
2784         switch (op->base.op) {
2785         case DRM_GPUVA_OP_MAP:
2786                 if (op->map.vma) {
2787                         prep_vma_destroy(vm, op->map.vma, post_commit);
2788                         xe_vma_destroy_unlocked(op->map.vma);
2789                 }
2790                 break;
2791         case DRM_GPUVA_OP_UNMAP:
2792         {
2793                 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2794
2795                 down_read(&vm->userptr.notifier_lock);
2796                 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2797                 up_read(&vm->userptr.notifier_lock);
2798                 if (post_commit)
2799                         xe_vm_insert_vma(vm, vma);
2800                 break;
2801         }
2802         case DRM_GPUVA_OP_REMAP:
2803         {
2804                 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2805
2806                 if (op->remap.prev) {
2807                         prep_vma_destroy(vm, op->remap.prev, post_commit);
2808                         xe_vma_destroy_unlocked(op->remap.prev);
2809                 }
2810                 if (op->remap.next) {
2811                         prep_vma_destroy(vm, op->remap.next, post_commit);
2812                         xe_vma_destroy_unlocked(op->remap.next);
2813                 }
2814                 down_read(&vm->userptr.notifier_lock);
2815                 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2816                 up_read(&vm->userptr.notifier_lock);
2817                 if (post_commit)
2818                         xe_vm_insert_vma(vm, vma);
2819                 break;
2820         }
2821         case DRM_GPUVA_OP_PREFETCH:
2822                 /* Nothing to do */
2823                 break;
2824         default:
2825                 XE_BUG_ON("NOT POSSIBLE");
2826         }
2827 }
2828
2829 static struct xe_vma_op *next_vma_op(struct xe_vm *vm)
2830 {
2831         return list_first_entry_or_null(&vm->async_ops.pending,
2832                                         struct xe_vma_op, link);
2833 }
2834
2835 static void xe_vma_op_work_func(struct work_struct *w)
2836 {
2837         struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work);
2838
2839         for (;;) {
2840                 struct xe_vma_op *op;
2841                 int err;
2842
2843                 if (vm->async_ops.error && !xe_vm_is_closed(vm))
2844                         break;
2845
2846                 spin_lock_irq(&vm->async_ops.lock);
2847                 op = next_vma_op(vm);
2848                 spin_unlock_irq(&vm->async_ops.lock);
2849
2850                 if (!op)
2851                         break;
2852
2853                 if (!xe_vm_is_closed(vm)) {
2854                         down_write(&vm->lock);
2855                         err = xe_vma_op_execute(vm, op);
2856                         if (err) {
2857                                 drm_warn(&vm->xe->drm,
2858                                          "Async VM op(%d) failed with %d",
2859                                          op->base.op, err);
2860                                 vm_set_async_error(vm, err);
2861                                 up_write(&vm->lock);
2862
2863                                 if (vm->async_ops.error_capture.addr)
2864                                         vm_error_capture(vm, err, 0, 0, 0);
2865                                 break;
2866                         }
2867                         up_write(&vm->lock);
2868                 } else {
2869                         struct xe_vma *vma;
2870
2871                         switch (op->base.op) {
2872                         case DRM_GPUVA_OP_REMAP:
2873                                 vma = gpuva_to_vma(op->base.remap.unmap->va);
2874                                 trace_xe_vma_flush(vma);
2875
2876                                 down_write(&vm->lock);
2877                                 xe_vma_destroy_unlocked(vma);
2878                                 up_write(&vm->lock);
2879                                 break;
2880                         case DRM_GPUVA_OP_UNMAP:
2881                                 vma = gpuva_to_vma(op->base.unmap.va);
2882                                 trace_xe_vma_flush(vma);
2883
2884                                 down_write(&vm->lock);
2885                                 xe_vma_destroy_unlocked(vma);
2886                                 up_write(&vm->lock);
2887                                 break;
2888                         default:
2889                                 /* Nothing to do */
2890                                 break;
2891                         }
2892
2893                         if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
2894                                                    &op->fence->fence.flags)) {
2895                                 if (!xe_vm_no_dma_fences(vm)) {
2896                                         op->fence->started = true;
2897                                         wake_up_all(&op->fence->wq);
2898                                 }
2899                                 dma_fence_signal(&op->fence->fence);
2900                         }
2901                 }
2902
2903                 xe_vma_op_cleanup(vm, op);
2904         }
2905 }
2906
2907 static int vm_bind_ioctl_ops_commit(struct xe_vm *vm,
2908                                     struct list_head *ops_list, bool async)
2909 {
2910         struct xe_vma_op *op, *last_op, *next;
2911         int err;
2912
2913         lockdep_assert_held_write(&vm->lock);
2914
2915         list_for_each_entry(op, ops_list, link) {
2916                 last_op = op;
2917                 err = xe_vma_op_commit(vm, op);
2918                 if (err)
2919                         goto unwind;
2920         }
2921
2922         if (!async) {
2923                 err = xe_vma_op_execute(vm, last_op);
2924                 if (err)
2925                         goto unwind;
2926                 xe_vma_op_cleanup(vm, last_op);
2927         } else {
2928                 int i;
2929                 bool installed = false;
2930
2931                 for (i = 0; i < last_op->num_syncs; i++)
2932                         installed |= xe_sync_entry_signal(&last_op->syncs[i],
2933                                                           NULL,
2934                                                           &last_op->fence->fence);
2935                 if (!installed && last_op->fence)
2936                         dma_fence_signal(&last_op->fence->fence);
2937
2938                 spin_lock_irq(&vm->async_ops.lock);
2939                 list_splice_tail(ops_list, &vm->async_ops.pending);
2940                 spin_unlock_irq(&vm->async_ops.lock);
2941
2942                 if (!vm->async_ops.error)
2943                         queue_work(system_unbound_wq, &vm->async_ops.work);
2944         }
2945
2946         return 0;
2947
2948 unwind:
2949         list_for_each_entry_reverse(op, ops_list, link)
2950                 xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED);
2951         list_for_each_entry_safe(op, next, ops_list, link)
2952                 xe_vma_op_cleanup(vm, op);
2953
2954         return err;
2955 }
2956
2957 /*
2958  * Unwind operations list, called after a failure of vm_bind_ioctl_ops_create or
2959  * vm_bind_ioctl_ops_parse.
2960  */
2961 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2962                                      struct drm_gpuva_ops **ops,
2963                                      int num_ops_list)
2964 {
2965         int i;
2966
2967         for (i = 0; i < num_ops_list; ++i) {
2968                 struct drm_gpuva_ops *__ops = ops[i];
2969                 struct drm_gpuva_op *__op;
2970
2971                 if (!__ops)
2972                         continue;
2973
2974                 drm_gpuva_for_each_op(__op, __ops) {
2975                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2976
2977                         xe_vma_op_unwind(vm, op, false);
2978                 }
2979         }
2980 }
2981
2982 #ifdef TEST_VM_ASYNC_OPS_ERROR
2983 #define SUPPORTED_FLAGS \
2984         (FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
2985          XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | \
2986          XE_VM_BIND_FLAG_NULL | 0xffff)
2987 #else
2988 #define SUPPORTED_FLAGS \
2989         (XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
2990          XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | 0xffff)
2991 #endif
2992 #define XE_64K_PAGE_MASK 0xffffull
2993
2994 #define MAX_BINDS       512     /* FIXME: Picking random upper limit */
2995
2996 static int vm_bind_ioctl_check_args(struct xe_device *xe,
2997                                     struct drm_xe_vm_bind *args,
2998                                     struct drm_xe_vm_bind_op **bind_ops,
2999                                     bool *async)
3000 {
3001         int err;
3002         int i;
3003
3004         if (XE_IOCTL_DBG(xe, args->extensions) ||
3005             XE_IOCTL_DBG(xe, !args->num_binds) ||
3006             XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS))
3007                 return -EINVAL;
3008
3009         if (args->num_binds > 1) {
3010                 u64 __user *bind_user =
3011                         u64_to_user_ptr(args->vector_of_binds);
3012
3013                 *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
3014                                     args->num_binds, GFP_KERNEL);
3015                 if (!*bind_ops)
3016                         return -ENOMEM;
3017
3018                 err = __copy_from_user(*bind_ops, bind_user,
3019                                        sizeof(struct drm_xe_vm_bind_op) *
3020                                        args->num_binds);
3021                 if (XE_IOCTL_DBG(xe, err)) {
3022                         err = -EFAULT;
3023                         goto free_bind_ops;
3024                 }
3025         } else {
3026                 *bind_ops = &args->bind;
3027         }
3028
3029         for (i = 0; i < args->num_binds; ++i) {
3030                 u64 range = (*bind_ops)[i].range;
3031                 u64 addr = (*bind_ops)[i].addr;
3032                 u32 op = (*bind_ops)[i].op;
3033                 u32 obj = (*bind_ops)[i].obj;
3034                 u64 obj_offset = (*bind_ops)[i].obj_offset;
3035                 u32 region = (*bind_ops)[i].region;
3036                 bool is_null = op & XE_VM_BIND_FLAG_NULL;
3037
3038                 if (i == 0) {
3039                         *async = !!(op & XE_VM_BIND_FLAG_ASYNC);
3040                 } else if (XE_IOCTL_DBG(xe, !*async) ||
3041                            XE_IOCTL_DBG(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) ||
3042                            XE_IOCTL_DBG(xe, VM_BIND_OP(op) ==
3043                                         XE_VM_BIND_OP_RESTART)) {
3044                         err = -EINVAL;
3045                         goto free_bind_ops;
3046                 }
3047
3048                 if (XE_IOCTL_DBG(xe, !*async &&
3049                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) {
3050                         err = -EINVAL;
3051                         goto free_bind_ops;
3052                 }
3053
3054                 if (XE_IOCTL_DBG(xe, !*async &&
3055                                  VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) {
3056                         err = -EINVAL;
3057                         goto free_bind_ops;
3058                 }
3059
3060                 if (XE_IOCTL_DBG(xe, VM_BIND_OP(op) >
3061                                  XE_VM_BIND_OP_PREFETCH) ||
3062                     XE_IOCTL_DBG(xe, op & ~SUPPORTED_FLAGS) ||
3063                     XE_IOCTL_DBG(xe, obj && is_null) ||
3064                     XE_IOCTL_DBG(xe, obj_offset && is_null) ||
3065                     XE_IOCTL_DBG(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP &&
3066                                  is_null) ||
3067                     XE_IOCTL_DBG(xe, !obj &&
3068                                  VM_BIND_OP(op) == XE_VM_BIND_OP_MAP &&
3069                                  !is_null) ||
3070                     XE_IOCTL_DBG(xe, !obj &&
3071                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
3072                     XE_IOCTL_DBG(xe, addr &&
3073                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
3074                     XE_IOCTL_DBG(xe, range &&
3075                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
3076                     XE_IOCTL_DBG(xe, obj &&
3077                                  VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) ||
3078                     XE_IOCTL_DBG(xe, obj &&
3079                                  VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) ||
3080                     XE_IOCTL_DBG(xe, region &&
3081                                  VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) ||
3082                     XE_IOCTL_DBG(xe, !(BIT(region) &
3083                                        xe->info.mem_region_mask)) ||
3084                     XE_IOCTL_DBG(xe, obj &&
3085                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) {
3086                         err = -EINVAL;
3087                         goto free_bind_ops;
3088                 }
3089
3090                 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3091                     XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3092                     XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3093                     XE_IOCTL_DBG(xe, !range && VM_BIND_OP(op) !=
3094                                  XE_VM_BIND_OP_RESTART &&
3095                                  VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) {
3096                         err = -EINVAL;
3097                         goto free_bind_ops;
3098                 }
3099         }
3100
3101         return 0;
3102
3103 free_bind_ops:
3104         if (args->num_binds > 1)
3105                 kfree(*bind_ops);
3106         return err;
3107 }
3108
3109 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3110 {
3111         struct xe_device *xe = to_xe_device(dev);
3112         struct xe_file *xef = to_xe_file(file);
3113         struct drm_xe_vm_bind *args = data;
3114         struct drm_xe_sync __user *syncs_user;
3115         struct xe_bo **bos = NULL;
3116         struct drm_gpuva_ops **ops = NULL;
3117         struct xe_vm *vm;
3118         struct xe_engine *e = NULL;
3119         u32 num_syncs;
3120         struct xe_sync_entry *syncs = NULL;
3121         struct drm_xe_vm_bind_op *bind_ops;
3122         LIST_HEAD(ops_list);
3123         bool async;
3124         int err;
3125         int i;
3126
3127         err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
3128         if (err)
3129                 return err;
3130
3131         if (args->engine_id) {
3132                 e = xe_engine_lookup(xef, args->engine_id);
3133                 if (XE_IOCTL_DBG(xe, !e)) {
3134                         err = -ENOENT;
3135                         goto free_objs;
3136                 }
3137
3138                 if (XE_IOCTL_DBG(xe, !(e->flags & ENGINE_FLAG_VM))) {
3139                         err = -EINVAL;
3140                         goto put_engine;
3141                 }
3142         }
3143
3144         vm = xe_vm_lookup(xef, args->vm_id);
3145         if (XE_IOCTL_DBG(xe, !vm)) {
3146                 err = -EINVAL;
3147                 goto put_engine;
3148         }
3149
3150         err = down_write_killable(&vm->lock);
3151         if (err)
3152                 goto put_vm;
3153
3154         if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3155                 err = -ENOENT;
3156                 goto release_vm_lock;
3157         }
3158
3159         if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) {
3160                 if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
3161                         err = -EOPNOTSUPP;
3162                 if (XE_IOCTL_DBG(xe, !err && args->num_syncs))
3163                         err = EINVAL;
3164                 if (XE_IOCTL_DBG(xe, !err && !vm->async_ops.error))
3165                         err = -EPROTO;
3166
3167                 if (!err) {
3168                         trace_xe_vm_restart(vm);
3169                         vm_set_async_error(vm, 0);
3170
3171                         queue_work(system_unbound_wq, &vm->async_ops.work);
3172
3173                         /* Rebinds may have been blocked, give worker a kick */
3174                         if (xe_vm_in_compute_mode(vm))
3175                                 xe_vm_queue_rebind_worker(vm);
3176                 }
3177
3178                 goto release_vm_lock;
3179         }
3180
3181         if (XE_IOCTL_DBG(xe, !vm->async_ops.error &&
3182                          async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
3183                 err = -EOPNOTSUPP;
3184                 goto release_vm_lock;
3185         }
3186
3187         for (i = 0; i < args->num_binds; ++i) {
3188                 u64 range = bind_ops[i].range;
3189                 u64 addr = bind_ops[i].addr;
3190
3191                 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3192                     XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3193                         err = -EINVAL;
3194                         goto release_vm_lock;
3195                 }
3196
3197                 if (bind_ops[i].tile_mask) {
3198                         u64 valid_tiles = BIT(xe->info.tile_count) - 1;
3199
3200                         if (XE_IOCTL_DBG(xe, bind_ops[i].tile_mask &
3201                                          ~valid_tiles)) {
3202                                 err = -EINVAL;
3203                                 goto release_vm_lock;
3204                         }
3205                 }
3206         }
3207
3208         bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL);
3209         if (!bos) {
3210                 err = -ENOMEM;
3211                 goto release_vm_lock;
3212         }
3213
3214         ops = kzalloc(sizeof(*ops) * args->num_binds, GFP_KERNEL);
3215         if (!ops) {
3216                 err = -ENOMEM;
3217                 goto release_vm_lock;
3218         }
3219
3220         for (i = 0; i < args->num_binds; ++i) {
3221                 struct drm_gem_object *gem_obj;
3222                 u64 range = bind_ops[i].range;
3223                 u64 addr = bind_ops[i].addr;
3224                 u32 obj = bind_ops[i].obj;
3225                 u64 obj_offset = bind_ops[i].obj_offset;
3226
3227                 if (!obj)
3228                         continue;
3229
3230                 gem_obj = drm_gem_object_lookup(file, obj);
3231                 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3232                         err = -ENOENT;
3233                         goto put_obj;
3234                 }
3235                 bos[i] = gem_to_xe_bo(gem_obj);
3236
3237                 if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
3238                     XE_IOCTL_DBG(xe, obj_offset >
3239                                  bos[i]->size - range)) {
3240                         err = -EINVAL;
3241                         goto put_obj;
3242                 }
3243
3244                 if (bos[i]->flags & XE_BO_INTERNAL_64K) {
3245                         if (XE_IOCTL_DBG(xe, obj_offset &
3246                                          XE_64K_PAGE_MASK) ||
3247                             XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3248                             XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3249                                 err = -EINVAL;
3250                                 goto put_obj;
3251                         }
3252                 }
3253         }
3254
3255         if (args->num_syncs) {
3256                 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3257                 if (!syncs) {
3258                         err = -ENOMEM;
3259                         goto put_obj;
3260                 }
3261         }
3262
3263         syncs_user = u64_to_user_ptr(args->syncs);
3264         for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3265                 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3266                                           &syncs_user[num_syncs], false,
3267                                           xe_vm_no_dma_fences(vm));
3268                 if (err)
3269                         goto free_syncs;
3270         }
3271
3272         /* Do some error checking first to make the unwind easier */
3273         for (i = 0; i < args->num_binds; ++i) {
3274                 u64 range = bind_ops[i].range;
3275                 u64 addr = bind_ops[i].addr;
3276                 u32 op = bind_ops[i].op;
3277
3278                 err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op);
3279                 if (err)
3280                         goto free_syncs;
3281         }
3282
3283         for (i = 0; i < args->num_binds; ++i) {
3284                 u64 range = bind_ops[i].range;
3285                 u64 addr = bind_ops[i].addr;
3286                 u32 op = bind_ops[i].op;
3287                 u64 obj_offset = bind_ops[i].obj_offset;
3288                 u64 tile_mask = bind_ops[i].tile_mask;
3289                 u32 region = bind_ops[i].region;
3290
3291                 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3292                                                   addr, range, op, tile_mask,
3293                                                   region);
3294                 if (IS_ERR(ops[i])) {
3295                         err = PTR_ERR(ops[i]);
3296                         ops[i] = NULL;
3297                         goto unwind_ops;
3298                 }
3299         }
3300
3301         err = vm_bind_ioctl_ops_parse(vm, e, ops, args->num_binds,
3302                                       syncs, num_syncs, &ops_list, async);
3303         if (err)
3304                 goto unwind_ops;
3305
3306         err = vm_bind_ioctl_ops_commit(vm, &ops_list, async);
3307         up_write(&vm->lock);
3308
3309         for (i = 0; i < args->num_binds; ++i)
3310                 xe_bo_put(bos[i]);
3311
3312         kfree(bos);
3313         kfree(ops);
3314         if (args->num_binds > 1)
3315                 kfree(bind_ops);
3316
3317         return err;
3318
3319 unwind_ops:
3320         vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3321 free_syncs:
3322         while (num_syncs--)
3323                 xe_sync_entry_cleanup(&syncs[num_syncs]);
3324
3325         kfree(syncs);
3326 put_obj:
3327         for (i = 0; i < args->num_binds; ++i)
3328                 xe_bo_put(bos[i]);
3329 release_vm_lock:
3330         up_write(&vm->lock);
3331 put_vm:
3332         xe_vm_put(vm);
3333 put_engine:
3334         if (e)
3335                 xe_engine_put(e);
3336 free_objs:
3337         kfree(bos);
3338         kfree(ops);
3339         if (args->num_binds > 1)
3340                 kfree(bind_ops);
3341         return err == -ENODATA ? 0 : err;
3342 }
3343
3344 /*
3345  * XXX: Using the TTM wrappers for now, likely can call into dma-resv code
3346  * directly to optimize. Also this likely should be an inline function.
3347  */
3348 int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
3349                int num_resv, bool intr)
3350 {
3351         struct ttm_validate_buffer tv_vm;
3352         LIST_HEAD(objs);
3353         LIST_HEAD(dups);
3354
3355         XE_BUG_ON(!ww);
3356
3357         tv_vm.num_shared = num_resv;
3358         tv_vm.bo = xe_vm_ttm_bo(vm);
3359         list_add_tail(&tv_vm.head, &objs);
3360
3361         return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
3362 }
3363
3364 void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww)
3365 {
3366         dma_resv_unlock(xe_vm_resv(vm));
3367         ww_acquire_fini(ww);
3368 }
3369
3370 /**
3371  * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3372  * @vma: VMA to invalidate
3373  *
3374  * Walks a list of page tables leaves which it memset the entries owned by this
3375  * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3376  * complete.
3377  *
3378  * Returns 0 for success, negative error code otherwise.
3379  */
3380 int xe_vm_invalidate_vma(struct xe_vma *vma)
3381 {
3382         struct xe_device *xe = xe_vma_vm(vma)->xe;
3383         struct xe_tile *tile;
3384         u32 tile_needs_invalidate = 0;
3385         int seqno[XE_MAX_TILES_PER_DEVICE];
3386         u8 id;
3387         int ret;
3388
3389         XE_BUG_ON(!xe_vm_in_fault_mode(xe_vma_vm(vma)));
3390         XE_WARN_ON(xe_vma_is_null(vma));
3391         trace_xe_vma_usm_invalidate(vma);
3392
3393         /* Check that we don't race with page-table updates */
3394         if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3395                 if (xe_vma_is_userptr(vma)) {
3396                         WARN_ON_ONCE(!mmu_interval_check_retry
3397                                      (&vma->userptr.notifier,
3398                                       vma->userptr.notifier_seq));
3399                         WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3400                                                              DMA_RESV_USAGE_BOOKKEEP));
3401
3402                 } else {
3403                         xe_bo_assert_held(xe_vma_bo(vma));
3404                 }
3405         }
3406
3407         for_each_tile(tile, xe, id) {
3408                 if (xe_pt_zap_ptes(tile, vma)) {
3409                         tile_needs_invalidate |= BIT(id);
3410                         xe_device_wmb(xe);
3411                         /*
3412                          * FIXME: We potentially need to invalidate multiple
3413                          * GTs within the tile
3414                          */
3415                         seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
3416                         if (seqno[id] < 0)
3417                                 return seqno[id];
3418                 }
3419         }
3420
3421         for_each_tile(tile, xe, id) {
3422                 if (tile_needs_invalidate & BIT(id)) {
3423                         ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
3424                         if (ret < 0)
3425                                 return ret;
3426                 }
3427         }
3428
3429         vma->usm.tile_invalidated = vma->tile_mask;
3430
3431         return 0;
3432 }
3433
3434 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
3435 {
3436         struct drm_gpuva *gpuva;
3437         bool is_vram;
3438         uint64_t addr;
3439
3440         if (!down_read_trylock(&vm->lock)) {
3441                 drm_printf(p, " Failed to acquire VM lock to dump capture");
3442                 return 0;
3443         }
3444         if (vm->pt_root[gt_id]) {
3445                 addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE,
3446                                   &is_vram);
3447                 drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS");
3448         }
3449
3450         drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3451                 struct xe_vma *vma = gpuva_to_vma(gpuva);
3452                 bool is_userptr = xe_vma_is_userptr(vma);
3453                 bool is_null = xe_vma_is_null(vma);
3454
3455                 if (is_null) {
3456                         addr = 0;
3457                 } else if (is_userptr) {
3458                         struct xe_res_cursor cur;
3459
3460                         if (vma->userptr.sg) {
3461                                 xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
3462                                                 &cur);
3463                                 addr = xe_res_dma(&cur);
3464                         } else {
3465                                 addr = 0;
3466                         }
3467                 } else {
3468                         addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE, &is_vram);
3469                 }
3470                 drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
3471                            xe_vma_start(vma), xe_vma_end(vma) - 1,
3472                            xe_vma_size(vma),
3473                            addr, is_null ? "NULL" : is_userptr ? "USR" :
3474                            is_vram ? "VRAM" : "SYS");
3475         }
3476         up_read(&vm->lock);
3477
3478         return 0;
3479 }