drm/xe: Convert remaining instances of ttm_eu_reserve_buffers to drm_exec
[linux-2.6-microblaze.git] / drivers / gpu / drm / xe / xe_vm.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9
10 #include <drm/drm_exec.h>
11 #include <drm/drm_print.h>
12 #include <drm/ttm/ttm_execbuf_util.h>
13 #include <drm/ttm/ttm_tt.h>
14 #include <drm/xe_drm.h>
15 #include <linux/delay.h>
16 #include <linux/kthread.h>
17 #include <linux/mm.h>
18 #include <linux/swap.h>
19
20 #include "xe_bo.h"
21 #include "xe_device.h"
22 #include "xe_exec_queue.h"
23 #include "xe_gt.h"
24 #include "xe_gt_pagefault.h"
25 #include "xe_gt_tlb_invalidation.h"
26 #include "xe_migrate.h"
27 #include "xe_pm.h"
28 #include "xe_preempt_fence.h"
29 #include "xe_pt.h"
30 #include "xe_res_cursor.h"
31 #include "xe_sync.h"
32 #include "xe_trace.h"
33 #include "generated/xe_wa_oob.h"
34 #include "xe_wa.h"
35
36 #define TEST_VM_ASYNC_OPS_ERROR
37
38 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
39 {
40         return vm->gpuvm.r_obj;
41 }
42
43 /**
44  * xe_vma_userptr_check_repin() - Advisory check for repin needed
45  * @vma: The userptr vma
46  *
47  * Check if the userptr vma has been invalidated since last successful
48  * repin. The check is advisory only and can the function can be called
49  * without the vm->userptr.notifier_lock held. There is no guarantee that the
50  * vma userptr will remain valid after a lockless check, so typically
51  * the call needs to be followed by a proper check under the notifier_lock.
52  *
53  * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
54  */
55 int xe_vma_userptr_check_repin(struct xe_vma *vma)
56 {
57         return mmu_interval_check_retry(&vma->userptr.notifier,
58                                         vma->userptr.notifier_seq) ?
59                 -EAGAIN : 0;
60 }
61
62 int xe_vma_userptr_pin_pages(struct xe_vma *vma)
63 {
64         struct xe_vm *vm = xe_vma_vm(vma);
65         struct xe_device *xe = vm->xe;
66         const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
67         struct page **pages;
68         bool in_kthread = !current->mm;
69         unsigned long notifier_seq;
70         int pinned, ret, i;
71         bool read_only = xe_vma_read_only(vma);
72
73         lockdep_assert_held(&vm->lock);
74         XE_WARN_ON(!xe_vma_is_userptr(vma));
75 retry:
76         if (vma->gpuva.flags & XE_VMA_DESTROYED)
77                 return 0;
78
79         notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
80         if (notifier_seq == vma->userptr.notifier_seq)
81                 return 0;
82
83         pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
84         if (!pages)
85                 return -ENOMEM;
86
87         if (vma->userptr.sg) {
88                 dma_unmap_sgtable(xe->drm.dev,
89                                   vma->userptr.sg,
90                                   read_only ? DMA_TO_DEVICE :
91                                   DMA_BIDIRECTIONAL, 0);
92                 sg_free_table(vma->userptr.sg);
93                 vma->userptr.sg = NULL;
94         }
95
96         pinned = ret = 0;
97         if (in_kthread) {
98                 if (!mmget_not_zero(vma->userptr.notifier.mm)) {
99                         ret = -EFAULT;
100                         goto mm_closed;
101                 }
102                 kthread_use_mm(vma->userptr.notifier.mm);
103         }
104
105         while (pinned < num_pages) {
106                 ret = get_user_pages_fast(xe_vma_userptr(vma) +
107                                           pinned * PAGE_SIZE,
108                                           num_pages - pinned,
109                                           read_only ? 0 : FOLL_WRITE,
110                                           &pages[pinned]);
111                 if (ret < 0) {
112                         if (in_kthread)
113                                 ret = 0;
114                         break;
115                 }
116
117                 pinned += ret;
118                 ret = 0;
119         }
120
121         if (in_kthread) {
122                 kthread_unuse_mm(vma->userptr.notifier.mm);
123                 mmput(vma->userptr.notifier.mm);
124         }
125 mm_closed:
126         if (ret)
127                 goto out;
128
129         ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages,
130                                                 pinned, 0,
131                                                 (u64)pinned << PAGE_SHIFT,
132                                                 xe_sg_segment_size(xe->drm.dev),
133                                                 GFP_KERNEL);
134         if (ret) {
135                 vma->userptr.sg = NULL;
136                 goto out;
137         }
138         vma->userptr.sg = &vma->userptr.sgt;
139
140         ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
141                               read_only ? DMA_TO_DEVICE :
142                               DMA_BIDIRECTIONAL,
143                               DMA_ATTR_SKIP_CPU_SYNC |
144                               DMA_ATTR_NO_KERNEL_MAPPING);
145         if (ret) {
146                 sg_free_table(vma->userptr.sg);
147                 vma->userptr.sg = NULL;
148                 goto out;
149         }
150
151         for (i = 0; i < pinned; ++i) {
152                 if (!read_only) {
153                         lock_page(pages[i]);
154                         set_page_dirty(pages[i]);
155                         unlock_page(pages[i]);
156                 }
157
158                 mark_page_accessed(pages[i]);
159         }
160
161 out:
162         release_pages(pages, pinned);
163         kvfree(pages);
164
165         if (!(ret < 0)) {
166                 vma->userptr.notifier_seq = notifier_seq;
167                 if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
168                         goto retry;
169         }
170
171         return ret < 0 ? ret : 0;
172 }
173
174 static bool preempt_fences_waiting(struct xe_vm *vm)
175 {
176         struct xe_exec_queue *q;
177
178         lockdep_assert_held(&vm->lock);
179         xe_vm_assert_held(vm);
180
181         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
182                 if (!q->compute.pfence ||
183                     (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
184                                                    &q->compute.pfence->flags))) {
185                         return true;
186                 }
187         }
188
189         return false;
190 }
191
192 static void free_preempt_fences(struct list_head *list)
193 {
194         struct list_head *link, *next;
195
196         list_for_each_safe(link, next, list)
197                 xe_preempt_fence_free(to_preempt_fence_from_link(link));
198 }
199
200 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
201                                 unsigned int *count)
202 {
203         lockdep_assert_held(&vm->lock);
204         xe_vm_assert_held(vm);
205
206         if (*count >= vm->preempt.num_exec_queues)
207                 return 0;
208
209         for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
210                 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
211
212                 if (IS_ERR(pfence))
213                         return PTR_ERR(pfence);
214
215                 list_move_tail(xe_preempt_fence_link(pfence), list);
216         }
217
218         return 0;
219 }
220
221 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
222 {
223         struct xe_exec_queue *q;
224
225         xe_vm_assert_held(vm);
226
227         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
228                 if (q->compute.pfence) {
229                         long timeout = dma_fence_wait(q->compute.pfence, false);
230
231                         if (timeout < 0)
232                                 return -ETIME;
233                         dma_fence_put(q->compute.pfence);
234                         q->compute.pfence = NULL;
235                 }
236         }
237
238         return 0;
239 }
240
241 static bool xe_vm_is_idle(struct xe_vm *vm)
242 {
243         struct xe_exec_queue *q;
244
245         xe_vm_assert_held(vm);
246         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
247                 if (!xe_exec_queue_is_idle(q))
248                         return false;
249         }
250
251         return true;
252 }
253
254 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
255 {
256         struct list_head *link;
257         struct xe_exec_queue *q;
258
259         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
260                 struct dma_fence *fence;
261
262                 link = list->next;
263                 XE_WARN_ON(link == list);
264
265                 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
266                                              q, q->compute.context,
267                                              ++q->compute.seqno);
268                 dma_fence_put(q->compute.pfence);
269                 q->compute.pfence = fence;
270         }
271 }
272
273 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
274 {
275         struct xe_exec_queue *q;
276         int err;
277
278         err = xe_bo_lock(bo, true);
279         if (err)
280                 return err;
281
282         err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
283         if (err)
284                 goto out_unlock;
285
286         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
287                 if (q->compute.pfence) {
288                         dma_resv_add_fence(bo->ttm.base.resv,
289                                            q->compute.pfence,
290                                            DMA_RESV_USAGE_BOOKKEEP);
291                 }
292
293 out_unlock:
294         xe_bo_unlock(bo);
295         return err;
296 }
297
298 /**
299  * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv
300  * @vm: The vm.
301  * @fence: The fence to add.
302  * @usage: The resv usage for the fence.
303  *
304  * Loops over all of the vm's external object bindings and adds a @fence
305  * with the given @usage to all of the external object's reservation
306  * objects.
307  */
308 void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
309                              enum dma_resv_usage usage)
310 {
311         struct xe_vma *vma;
312
313         list_for_each_entry(vma, &vm->extobj.list, extobj.link)
314                 dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, usage);
315 }
316
317 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
318 {
319         struct xe_exec_queue *q;
320
321         lockdep_assert_held(&vm->lock);
322         xe_vm_assert_held(vm);
323
324         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
325                 q->ops->resume(q);
326
327                 dma_resv_add_fence(xe_vm_resv(vm), q->compute.pfence,
328                                    DMA_RESV_USAGE_BOOKKEEP);
329                 xe_vm_fence_all_extobjs(vm, q->compute.pfence,
330                                         DMA_RESV_USAGE_BOOKKEEP);
331         }
332 }
333
334 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
335 {
336         struct drm_exec exec;
337         struct dma_fence *pfence;
338         int err;
339         bool wait;
340
341         XE_WARN_ON(!xe_vm_in_compute_mode(vm));
342
343         down_write(&vm->lock);
344         drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
345         drm_exec_until_all_locked(&exec) {
346                 err = xe_vm_lock_dma_resv(vm, &exec, 1, true);
347                 drm_exec_retry_on_contention(&exec);
348                 if (err)
349                         goto out_unlock;
350         }
351
352         pfence = xe_preempt_fence_create(q, q->compute.context,
353                                          ++q->compute.seqno);
354         if (!pfence) {
355                 err = -ENOMEM;
356                 goto out_unlock;
357         }
358
359         list_add(&q->compute.link, &vm->preempt.exec_queues);
360         ++vm->preempt.num_exec_queues;
361         q->compute.pfence = pfence;
362
363         down_read(&vm->userptr.notifier_lock);
364
365         dma_resv_add_fence(xe_vm_resv(vm), pfence,
366                            DMA_RESV_USAGE_BOOKKEEP);
367
368         xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP);
369
370         /*
371          * Check to see if a preemption on VM is in flight or userptr
372          * invalidation, if so trigger this preempt fence to sync state with
373          * other preempt fences on the VM.
374          */
375         wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
376         if (wait)
377                 dma_fence_enable_sw_signaling(pfence);
378
379         up_read(&vm->userptr.notifier_lock);
380
381 out_unlock:
382         drm_exec_fini(&exec);
383         up_write(&vm->lock);
384
385         return err;
386 }
387
388 /**
389  * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
390  * that need repinning.
391  * @vm: The VM.
392  *
393  * This function checks for whether the VM has userptrs that need repinning,
394  * and provides a release-type barrier on the userptr.notifier_lock after
395  * checking.
396  *
397  * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
398  */
399 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
400 {
401         lockdep_assert_held_read(&vm->userptr.notifier_lock);
402
403         return (list_empty(&vm->userptr.repin_list) &&
404                 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
405 }
406
407 /**
408  * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv
409  * objects of the vm's external buffer objects.
410  * @vm: The vm.
411  * @exec: Pointer to a struct drm_exec locking context.
412  * @num_shared: Number of dma-fence slots to reserve in the locked objects.
413  * @lock_vm: Lock also the vm's dma_resv.
414  *
415  * Locks the vm dma-resv objects and all the dma-resv objects of the
416  * buffer objects on the vm external object list.
417  *
418  * Return: 0 on success, Negative error code on error. In particular if
419  * @intr is set to true, -EINTR or -ERESTARTSYS may be returned.
420  */
421 int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec,
422                         unsigned int num_shared, bool lock_vm)
423 {
424         struct xe_vma *vma, *next;
425         int err = 0;
426
427         lockdep_assert_held(&vm->lock);
428
429         if (lock_vm) {
430                 err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
431                 if (err)
432                         return err;
433         }
434
435         list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
436                 err = drm_exec_prepare_obj(exec, &xe_vma_bo(vma)->ttm.base, num_shared);
437                 if (err)
438                         return err;
439         }
440
441         spin_lock(&vm->notifier.list_lock);
442         list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
443                                  notifier.rebind_link) {
444                 xe_bo_assert_held(xe_vma_bo(vma));
445
446                 list_del_init(&vma->notifier.rebind_link);
447                 if (vma->tile_present && !(vma->gpuva.flags & XE_VMA_DESTROYED))
448                         list_move_tail(&vma->combined_links.rebind,
449                                        &vm->rebind_list);
450         }
451         spin_unlock(&vm->notifier.list_lock);
452
453         return 0;
454 }
455
456 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
457
458 static void xe_vm_kill(struct xe_vm *vm)
459 {
460         struct xe_exec_queue *q;
461
462         lockdep_assert_held(&vm->lock);
463
464         xe_vm_lock(vm, false);
465         vm->flags |= XE_VM_FLAG_BANNED;
466         trace_xe_vm_kill(vm);
467
468         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
469                 q->ops->kill(q);
470         xe_vm_unlock(vm);
471
472         /* TODO: Inform user the VM is banned */
473 }
474
475 /**
476  * xe_vm_validate_should_retry() - Whether to retry after a validate error.
477  * @exec: The drm_exec object used for locking before validation.
478  * @err: The error returned from ttm_bo_validate().
479  * @end: A ktime_t cookie that should be set to 0 before first use and
480  * that should be reused on subsequent calls.
481  *
482  * With multiple active VMs, under memory pressure, it is possible that
483  * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
484  * Until ttm properly handles locking in such scenarios, best thing the
485  * driver can do is retry with a timeout. Check if that is necessary, and
486  * if so unlock the drm_exec's objects while keeping the ticket to prepare
487  * for a rerun.
488  *
489  * Return: true if a retry after drm_exec_init() is recommended;
490  * false otherwise.
491  */
492 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
493 {
494         ktime_t cur;
495
496         if (err != -ENOMEM)
497                 return false;
498
499         cur = ktime_get();
500         *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
501         if (!ktime_before(cur, *end))
502                 return false;
503
504         /*
505          * We would like to keep the ticket here with
506          * drm_exec_unlock_all(), but WW mutex asserts currently
507          * stop us from that. In any case this function could go away
508          * with proper TTM -EDEADLK handling.
509          */
510         drm_exec_fini(exec);
511
512         msleep(20);
513         return true;
514 }
515
516 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
517                                  bool *done)
518 {
519         struct xe_vma *vma;
520         int err;
521
522         err = drm_exec_prepare_obj(exec, xe_vm_obj(vm),
523                                    vm->preempt.num_exec_queues);
524         if (err)
525                 return err;
526
527         if (xe_vm_is_idle(vm)) {
528                 vm->preempt.rebind_deactivated = true;
529                 *done = true;
530                 return 0;
531         }
532
533         if (!preempt_fences_waiting(vm)) {
534                 *done = true;
535                 return 0;
536         }
537
538         err = xe_vm_lock_dma_resv(vm, exec, vm->preempt.num_exec_queues, false);
539         if (err)
540                 return err;
541
542         err = wait_for_existing_preempt_fences(vm);
543         if (err)
544                 return err;
545
546         list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
547                 if (xe_vma_has_no_bo(vma) ||
548                     vma->gpuva.flags & XE_VMA_DESTROYED)
549                         continue;
550
551                 err = xe_bo_validate(xe_vma_bo(vma), vm, false);
552                 if (err)
553                         break;
554         }
555
556         return err;
557 }
558
559 static void preempt_rebind_work_func(struct work_struct *w)
560 {
561         struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
562         struct drm_exec exec;
563         struct dma_fence *rebind_fence;
564         unsigned int fence_count = 0;
565         LIST_HEAD(preempt_fences);
566         ktime_t end = 0;
567         int err;
568         long wait;
569         int __maybe_unused tries = 0;
570
571         XE_WARN_ON(!xe_vm_in_compute_mode(vm));
572         trace_xe_vm_rebind_worker_enter(vm);
573
574         down_write(&vm->lock);
575
576         if (xe_vm_is_closed_or_banned(vm)) {
577                 up_write(&vm->lock);
578                 trace_xe_vm_rebind_worker_exit(vm);
579                 return;
580         }
581
582 retry:
583         if (vm->async_ops.error)
584                 goto out_unlock_outer;
585
586         /*
587          * Extreme corner where we exit a VM error state with a munmap style VM
588          * unbind inflight which requires a rebind. In this case the rebind
589          * needs to install some fences into the dma-resv slots. The worker to
590          * do this queued, let that worker make progress by dropping vm->lock
591          * and trying this again.
592          */
593         if (vm->async_ops.munmap_rebind_inflight) {
594                 up_write(&vm->lock);
595                 flush_work(&vm->async_ops.work);
596                 goto retry;
597         }
598
599         if (xe_vm_userptr_check_repin(vm)) {
600                 err = xe_vm_userptr_pin(vm);
601                 if (err)
602                         goto out_unlock_outer;
603         }
604
605         drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
606
607         drm_exec_until_all_locked(&exec) {
608                 bool done = false;
609
610                 err = xe_preempt_work_begin(&exec, vm, &done);
611                 drm_exec_retry_on_contention(&exec);
612                 if (err && xe_vm_validate_should_retry(&exec, err, &end)) {
613                         err = -EAGAIN;
614                         goto out_unlock_outer;
615                 }
616                 if (err || done)
617                         goto out_unlock;
618         }
619
620         err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
621         if (err)
622                 goto out_unlock;
623
624         rebind_fence = xe_vm_rebind(vm, true);
625         if (IS_ERR(rebind_fence)) {
626                 err = PTR_ERR(rebind_fence);
627                 goto out_unlock;
628         }
629
630         if (rebind_fence) {
631                 dma_fence_wait(rebind_fence, false);
632                 dma_fence_put(rebind_fence);
633         }
634
635         /* Wait on munmap style VM unbinds */
636         wait = dma_resv_wait_timeout(xe_vm_resv(vm),
637                                      DMA_RESV_USAGE_KERNEL,
638                                      false, MAX_SCHEDULE_TIMEOUT);
639         if (wait <= 0) {
640                 err = -ETIME;
641                 goto out_unlock;
642         }
643
644 #define retry_required(__tries, __vm) \
645         (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
646         (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
647         __xe_vm_userptr_needs_repin(__vm))
648
649         down_read(&vm->userptr.notifier_lock);
650         if (retry_required(tries, vm)) {
651                 up_read(&vm->userptr.notifier_lock);
652                 err = -EAGAIN;
653                 goto out_unlock;
654         }
655
656 #undef retry_required
657
658         spin_lock(&vm->xe->ttm.lru_lock);
659         ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
660         spin_unlock(&vm->xe->ttm.lru_lock);
661
662         /* Point of no return. */
663         arm_preempt_fences(vm, &preempt_fences);
664         resume_and_reinstall_preempt_fences(vm);
665         up_read(&vm->userptr.notifier_lock);
666
667 out_unlock:
668         drm_exec_fini(&exec);
669 out_unlock_outer:
670         if (err == -EAGAIN) {
671                 trace_xe_vm_rebind_worker_retry(vm);
672                 goto retry;
673         }
674
675         if (err) {
676                 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
677                 xe_vm_kill(vm);
678         }
679         up_write(&vm->lock);
680
681         free_preempt_fences(&preempt_fences);
682
683         trace_xe_vm_rebind_worker_exit(vm);
684 }
685
686 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
687                                    const struct mmu_notifier_range *range,
688                                    unsigned long cur_seq)
689 {
690         struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
691         struct xe_vm *vm = xe_vma_vm(vma);
692         struct dma_resv_iter cursor;
693         struct dma_fence *fence;
694         long err;
695
696         XE_WARN_ON(!xe_vma_is_userptr(vma));
697         trace_xe_vma_userptr_invalidate(vma);
698
699         if (!mmu_notifier_range_blockable(range))
700                 return false;
701
702         down_write(&vm->userptr.notifier_lock);
703         mmu_interval_set_seq(mni, cur_seq);
704
705         /* No need to stop gpu access if the userptr is not yet bound. */
706         if (!vma->userptr.initial_bind) {
707                 up_write(&vm->userptr.notifier_lock);
708                 return true;
709         }
710
711         /*
712          * Tell exec and rebind worker they need to repin and rebind this
713          * userptr.
714          */
715         if (!xe_vm_in_fault_mode(vm) &&
716             !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
717                 spin_lock(&vm->userptr.invalidated_lock);
718                 list_move_tail(&vma->userptr.invalidate_link,
719                                &vm->userptr.invalidated);
720                 spin_unlock(&vm->userptr.invalidated_lock);
721         }
722
723         up_write(&vm->userptr.notifier_lock);
724
725         /*
726          * Preempt fences turn into schedule disables, pipeline these.
727          * Note that even in fault mode, we need to wait for binds and
728          * unbinds to complete, and those are attached as BOOKMARK fences
729          * to the vm.
730          */
731         dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
732                             DMA_RESV_USAGE_BOOKKEEP);
733         dma_resv_for_each_fence_unlocked(&cursor, fence)
734                 dma_fence_enable_sw_signaling(fence);
735         dma_resv_iter_end(&cursor);
736
737         err = dma_resv_wait_timeout(xe_vm_resv(vm),
738                                     DMA_RESV_USAGE_BOOKKEEP,
739                                     false, MAX_SCHEDULE_TIMEOUT);
740         XE_WARN_ON(err <= 0);
741
742         if (xe_vm_in_fault_mode(vm)) {
743                 err = xe_vm_invalidate_vma(vma);
744                 XE_WARN_ON(err);
745         }
746
747         trace_xe_vma_userptr_invalidate_complete(vma);
748
749         return true;
750 }
751
752 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
753         .invalidate = vma_userptr_invalidate,
754 };
755
756 int xe_vm_userptr_pin(struct xe_vm *vm)
757 {
758         struct xe_vma *vma, *next;
759         int err = 0;
760         LIST_HEAD(tmp_evict);
761
762         lockdep_assert_held_write(&vm->lock);
763
764         /* Collect invalidated userptrs */
765         spin_lock(&vm->userptr.invalidated_lock);
766         list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
767                                  userptr.invalidate_link) {
768                 list_del_init(&vma->userptr.invalidate_link);
769                 if (list_empty(&vma->combined_links.userptr))
770                         list_move_tail(&vma->combined_links.userptr,
771                                        &vm->userptr.repin_list);
772         }
773         spin_unlock(&vm->userptr.invalidated_lock);
774
775         /* Pin and move to temporary list */
776         list_for_each_entry_safe(vma, next, &vm->userptr.repin_list,
777                                  combined_links.userptr) {
778                 err = xe_vma_userptr_pin_pages(vma);
779                 if (err < 0)
780                         goto out_err;
781
782                 list_move_tail(&vma->combined_links.userptr, &tmp_evict);
783         }
784
785         /* Take lock and move to rebind_list for rebinding. */
786         err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
787         if (err)
788                 goto out_err;
789
790         list_for_each_entry_safe(vma, next, &tmp_evict, combined_links.userptr)
791                 list_move_tail(&vma->combined_links.rebind, &vm->rebind_list);
792
793         dma_resv_unlock(xe_vm_resv(vm));
794
795         return 0;
796
797 out_err:
798         list_splice_tail(&tmp_evict, &vm->userptr.repin_list);
799
800         return err;
801 }
802
803 /**
804  * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
805  * that need repinning.
806  * @vm: The VM.
807  *
808  * This function does an advisory check for whether the VM has userptrs that
809  * need repinning.
810  *
811  * Return: 0 if there are no indications of userptrs needing repinning,
812  * -EAGAIN if there are.
813  */
814 int xe_vm_userptr_check_repin(struct xe_vm *vm)
815 {
816         return (list_empty_careful(&vm->userptr.repin_list) &&
817                 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
818 }
819
820 static struct dma_fence *
821 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
822                struct xe_sync_entry *syncs, u32 num_syncs,
823                bool first_op, bool last_op);
824
825 struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
826 {
827         struct dma_fence *fence = NULL;
828         struct xe_vma *vma, *next;
829
830         lockdep_assert_held(&vm->lock);
831         if (xe_vm_no_dma_fences(vm) && !rebind_worker)
832                 return NULL;
833
834         xe_vm_assert_held(vm);
835         list_for_each_entry_safe(vma, next, &vm->rebind_list,
836                                  combined_links.rebind) {
837                 XE_WARN_ON(!vma->tile_present);
838
839                 list_del_init(&vma->combined_links.rebind);
840                 dma_fence_put(fence);
841                 if (rebind_worker)
842                         trace_xe_vma_rebind_worker(vma);
843                 else
844                         trace_xe_vma_rebind_exec(vma);
845                 fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
846                 if (IS_ERR(fence))
847                         return fence;
848         }
849
850         return fence;
851 }
852
853 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
854                                     struct xe_bo *bo,
855                                     u64 bo_offset_or_userptr,
856                                     u64 start, u64 end,
857                                     bool read_only,
858                                     bool is_null,
859                                     u8 tile_mask)
860 {
861         struct xe_vma *vma;
862         struct xe_tile *tile;
863         u8 id;
864
865         XE_WARN_ON(start >= end);
866         XE_WARN_ON(end >= vm->size);
867
868         if (!bo && !is_null)    /* userptr */
869                 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
870         else
871                 vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr),
872                               GFP_KERNEL);
873         if (!vma) {
874                 vma = ERR_PTR(-ENOMEM);
875                 return vma;
876         }
877
878         INIT_LIST_HEAD(&vma->combined_links.rebind);
879         INIT_LIST_HEAD(&vma->notifier.rebind_link);
880         INIT_LIST_HEAD(&vma->extobj.link);
881
882         INIT_LIST_HEAD(&vma->gpuva.gem.entry);
883         vma->gpuva.vm = &vm->gpuvm;
884         vma->gpuva.va.addr = start;
885         vma->gpuva.va.range = end - start + 1;
886         if (read_only)
887                 vma->gpuva.flags |= XE_VMA_READ_ONLY;
888         if (is_null)
889                 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
890
891         if (tile_mask) {
892                 vma->tile_mask = tile_mask;
893         } else {
894                 for_each_tile(tile, vm->xe, id)
895                         vma->tile_mask |= 0x1 << id;
896         }
897
898         if (vm->xe->info.platform == XE_PVC)
899                 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
900
901         if (bo) {
902                 struct drm_gpuvm_bo *vm_bo;
903
904                 xe_bo_assert_held(bo);
905
906                 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
907                 if (IS_ERR(vm_bo)) {
908                         kfree(vma);
909                         return ERR_CAST(vm_bo);
910                 }
911
912                 drm_gem_object_get(&bo->ttm.base);
913                 vma->gpuva.gem.obj = &bo->ttm.base;
914                 vma->gpuva.gem.offset = bo_offset_or_userptr;
915                 drm_gpuva_link(&vma->gpuva, vm_bo);
916                 drm_gpuvm_bo_put(vm_bo);
917         } else /* userptr or null */ {
918                 if (!is_null) {
919                         u64 size = end - start + 1;
920                         int err;
921
922                         INIT_LIST_HEAD(&vma->userptr.invalidate_link);
923                         vma->gpuva.gem.offset = bo_offset_or_userptr;
924
925                         err = mmu_interval_notifier_insert(&vma->userptr.notifier,
926                                                            current->mm,
927                                                            xe_vma_userptr(vma), size,
928                                                            &vma_userptr_notifier_ops);
929                         if (err) {
930                                 kfree(vma);
931                                 vma = ERR_PTR(err);
932                                 return vma;
933                         }
934
935                         vma->userptr.notifier_seq = LONG_MAX;
936                 }
937
938                 xe_vm_get(vm);
939         }
940
941         return vma;
942 }
943
944 static bool vm_remove_extobj(struct xe_vma *vma)
945 {
946         if (!list_empty(&vma->extobj.link)) {
947                 xe_vma_vm(vma)->extobj.entries--;
948                 list_del_init(&vma->extobj.link);
949                 return true;
950         }
951         return false;
952 }
953
954 static void xe_vma_destroy_late(struct xe_vma *vma)
955 {
956         struct xe_vm *vm = xe_vma_vm(vma);
957         struct xe_device *xe = vm->xe;
958         bool read_only = xe_vma_read_only(vma);
959
960         if (xe_vma_is_userptr(vma)) {
961                 if (vma->userptr.sg) {
962                         dma_unmap_sgtable(xe->drm.dev,
963                                           vma->userptr.sg,
964                                           read_only ? DMA_TO_DEVICE :
965                                           DMA_BIDIRECTIONAL, 0);
966                         sg_free_table(vma->userptr.sg);
967                         vma->userptr.sg = NULL;
968                 }
969
970                 /*
971                  * Since userptr pages are not pinned, we can't remove
972                  * the notifer until we're sure the GPU is not accessing
973                  * them anymore
974                  */
975                 mmu_interval_notifier_remove(&vma->userptr.notifier);
976                 xe_vm_put(vm);
977         } else if (xe_vma_is_null(vma)) {
978                 xe_vm_put(vm);
979         } else {
980                 xe_bo_put(xe_vma_bo(vma));
981         }
982
983         kfree(vma);
984 }
985
986 static void vma_destroy_work_func(struct work_struct *w)
987 {
988         struct xe_vma *vma =
989                 container_of(w, struct xe_vma, destroy_work);
990
991         xe_vma_destroy_late(vma);
992 }
993
994 static struct xe_vma *
995 bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
996                             struct xe_vma *ignore)
997 {
998         struct drm_gpuvm_bo *vm_bo;
999         struct drm_gpuva *va;
1000         struct drm_gem_object *obj = &bo->ttm.base;
1001
1002         xe_bo_assert_held(bo);
1003
1004         drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
1005                 drm_gpuvm_bo_for_each_va(va, vm_bo) {
1006                         struct xe_vma *vma = gpuva_to_vma(va);
1007
1008                         if (vma != ignore && xe_vma_vm(vma) == vm)
1009                                 return vma;
1010                 }
1011         }
1012
1013         return NULL;
1014 }
1015
1016 static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm,
1017                                  struct xe_vma *ignore)
1018 {
1019         bool ret;
1020
1021         xe_bo_lock(bo, false);
1022         ret = !!bo_has_vm_references_locked(bo, vm, ignore);
1023         xe_bo_unlock(bo);
1024
1025         return ret;
1026 }
1027
1028 static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
1029 {
1030         lockdep_assert_held_write(&vm->lock);
1031
1032         list_add(&vma->extobj.link, &vm->extobj.list);
1033         vm->extobj.entries++;
1034 }
1035
1036 static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
1037 {
1038         struct xe_bo *bo = xe_vma_bo(vma);
1039
1040         lockdep_assert_held_write(&vm->lock);
1041
1042         if (bo_has_vm_references(bo, vm, vma))
1043                 return;
1044
1045         __vm_insert_extobj(vm, vma);
1046 }
1047
1048 static void vma_destroy_cb(struct dma_fence *fence,
1049                            struct dma_fence_cb *cb)
1050 {
1051         struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1052
1053         INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1054         queue_work(system_unbound_wq, &vma->destroy_work);
1055 }
1056
1057 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1058 {
1059         struct xe_vm *vm = xe_vma_vm(vma);
1060
1061         lockdep_assert_held_write(&vm->lock);
1062         XE_WARN_ON(!list_empty(&vma->combined_links.destroy));
1063
1064         if (xe_vma_is_userptr(vma)) {
1065                 XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED));
1066
1067                 spin_lock(&vm->userptr.invalidated_lock);
1068                 list_del(&vma->userptr.invalidate_link);
1069                 spin_unlock(&vm->userptr.invalidated_lock);
1070         } else if (!xe_vma_is_null(vma)) {
1071                 xe_bo_assert_held(xe_vma_bo(vma));
1072
1073                 spin_lock(&vm->notifier.list_lock);
1074                 list_del(&vma->notifier.rebind_link);
1075                 spin_unlock(&vm->notifier.list_lock);
1076
1077                 drm_gpuva_unlink(&vma->gpuva);
1078
1079                 if (!xe_vma_bo(vma)->vm && vm_remove_extobj(vma)) {
1080                         struct xe_vma *other;
1081
1082                         other = bo_has_vm_references_locked(xe_vma_bo(vma), vm, NULL);
1083
1084                         if (other)
1085                                 __vm_insert_extobj(vm, other);
1086                 }
1087         }
1088
1089         xe_vm_assert_held(vm);
1090         if (fence) {
1091                 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1092                                                  vma_destroy_cb);
1093
1094                 if (ret) {
1095                         XE_WARN_ON(ret != -ENOENT);
1096                         xe_vma_destroy_late(vma);
1097                 }
1098         } else {
1099                 xe_vma_destroy_late(vma);
1100         }
1101 }
1102
1103 /**
1104  * xe_vm_prepare_vma() - drm_exec utility to lock a vma
1105  * @exec: The drm_exec object we're currently locking for.
1106  * @vma: The vma for witch we want to lock the vm resv and any attached
1107  * object's resv.
1108  * @num_shared: The number of dma-fence slots to pre-allocate in the
1109  * objects' reservation objects.
1110  *
1111  * Return: 0 on success, negative error code on error. In particular
1112  * may return -EDEADLK on WW transaction contention and -EINTR if
1113  * an interruptible wait is terminated by a signal.
1114  */
1115 int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
1116                       unsigned int num_shared)
1117 {
1118         struct xe_vm *vm = xe_vma_vm(vma);
1119         struct xe_bo *bo = xe_vma_bo(vma);
1120         int err;
1121
1122         XE_WARN_ON(!vm);
1123         err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
1124         if (!err && bo && !bo->vm)
1125                 err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
1126
1127         return err;
1128 }
1129
1130 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1131 {
1132         struct drm_exec exec;
1133         int err;
1134
1135         drm_exec_init(&exec, 0);
1136         drm_exec_until_all_locked(&exec) {
1137                 err = xe_vm_prepare_vma(&exec, vma, 0);
1138                 drm_exec_retry_on_contention(&exec);
1139                 if (XE_WARN_ON(err))
1140                         break;
1141         }
1142
1143         xe_vma_destroy(vma, NULL);
1144
1145         drm_exec_fini(&exec);
1146 }
1147
1148 struct xe_vma *
1149 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1150 {
1151         struct drm_gpuva *gpuva;
1152
1153         lockdep_assert_held(&vm->lock);
1154
1155         if (xe_vm_is_closed_or_banned(vm))
1156                 return NULL;
1157
1158         XE_WARN_ON(start + range > vm->size);
1159
1160         gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1161
1162         return gpuva ? gpuva_to_vma(gpuva) : NULL;
1163 }
1164
1165 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1166 {
1167         int err;
1168
1169         XE_WARN_ON(xe_vma_vm(vma) != vm);
1170         lockdep_assert_held(&vm->lock);
1171
1172         err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1173         XE_WARN_ON(err);        /* Shouldn't be possible */
1174
1175         return err;
1176 }
1177
1178 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1179 {
1180         XE_WARN_ON(xe_vma_vm(vma) != vm);
1181         lockdep_assert_held(&vm->lock);
1182
1183         drm_gpuva_remove(&vma->gpuva);
1184         if (vm->usm.last_fault_vma == vma)
1185                 vm->usm.last_fault_vma = NULL;
1186 }
1187
1188 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1189 {
1190         struct xe_vma_op *op;
1191
1192         op = kzalloc(sizeof(*op), GFP_KERNEL);
1193
1194         if (unlikely(!op))
1195                 return NULL;
1196
1197         return &op->base;
1198 }
1199
1200 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1201
1202 static struct drm_gpuvm_ops gpuvm_ops = {
1203         .op_alloc = xe_vm_op_alloc,
1204         .vm_free = xe_vm_free,
1205 };
1206
1207 static void xe_vma_op_work_func(struct work_struct *w);
1208 static void vm_destroy_work_func(struct work_struct *w);
1209
1210 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1211 {
1212         struct drm_gem_object *vm_resv_obj;
1213         struct xe_vm *vm;
1214         int err, number_tiles = 0;
1215         struct xe_tile *tile;
1216         u8 id;
1217
1218         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1219         if (!vm)
1220                 return ERR_PTR(-ENOMEM);
1221
1222         vm->xe = xe;
1223
1224         vm->size = 1ull << xe->info.va_bits;
1225
1226         vm->flags = flags;
1227
1228         init_rwsem(&vm->lock);
1229
1230         INIT_LIST_HEAD(&vm->rebind_list);
1231
1232         INIT_LIST_HEAD(&vm->userptr.repin_list);
1233         INIT_LIST_HEAD(&vm->userptr.invalidated);
1234         init_rwsem(&vm->userptr.notifier_lock);
1235         spin_lock_init(&vm->userptr.invalidated_lock);
1236
1237         INIT_LIST_HEAD(&vm->notifier.rebind_list);
1238         spin_lock_init(&vm->notifier.list_lock);
1239
1240         INIT_LIST_HEAD(&vm->async_ops.pending);
1241         INIT_WORK(&vm->async_ops.work, xe_vma_op_work_func);
1242         spin_lock_init(&vm->async_ops.lock);
1243
1244         INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1245
1246         INIT_LIST_HEAD(&vm->preempt.exec_queues);
1247         vm->preempt.min_run_period_ms = 10;     /* FIXME: Wire up to uAPI */
1248
1249         for_each_tile(tile, xe, id)
1250                 xe_range_fence_tree_init(&vm->rftree[id]);
1251
1252         INIT_LIST_HEAD(&vm->extobj.list);
1253
1254         if (!(flags & XE_VM_FLAG_MIGRATION))
1255                 xe_device_mem_access_get(xe);
1256
1257         vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1258         if (!vm_resv_obj) {
1259                 err = -ENOMEM;
1260                 goto err_no_resv;
1261         }
1262
1263         drm_gpuvm_init(&vm->gpuvm, "Xe VM", 0, &xe->drm, vm_resv_obj,
1264                        0, vm->size, 0, 0, &gpuvm_ops);
1265
1266         drm_gem_object_put(vm_resv_obj);
1267
1268         err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
1269         if (err)
1270                 goto err_close;
1271
1272         if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1273                 vm->flags |= XE_VM_FLAG_64K;
1274
1275         for_each_tile(tile, xe, id) {
1276                 if (flags & XE_VM_FLAG_MIGRATION &&
1277                     tile->id != XE_VM_FLAG_TILE_ID(flags))
1278                         continue;
1279
1280                 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1281                 if (IS_ERR(vm->pt_root[id])) {
1282                         err = PTR_ERR(vm->pt_root[id]);
1283                         vm->pt_root[id] = NULL;
1284                         goto err_unlock_close;
1285                 }
1286         }
1287
1288         if (flags & XE_VM_FLAG_SCRATCH_PAGE) {
1289                 for_each_tile(tile, xe, id) {
1290                         if (!vm->pt_root[id])
1291                                 continue;
1292
1293                         err = xe_pt_create_scratch(xe, tile, vm);
1294                         if (err)
1295                                 goto err_unlock_close;
1296                 }
1297                 vm->batch_invalidate_tlb = true;
1298         }
1299
1300         if (flags & XE_VM_FLAG_COMPUTE_MODE) {
1301                 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1302                 vm->flags |= XE_VM_FLAG_COMPUTE_MODE;
1303                 vm->batch_invalidate_tlb = false;
1304         }
1305
1306         if (flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
1307                 vm->async_ops.fence.context = dma_fence_context_alloc(1);
1308                 vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
1309         }
1310
1311         /* Fill pt_root after allocating scratch tables */
1312         for_each_tile(tile, xe, id) {
1313                 if (!vm->pt_root[id])
1314                         continue;
1315
1316                 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1317         }
1318         dma_resv_unlock(xe_vm_resv(vm));
1319
1320         /* Kernel migration VM shouldn't have a circular loop.. */
1321         if (!(flags & XE_VM_FLAG_MIGRATION)) {
1322                 for_each_tile(tile, xe, id) {
1323                         struct xe_gt *gt = tile->primary_gt;
1324                         struct xe_vm *migrate_vm;
1325                         struct xe_exec_queue *q;
1326
1327                         if (!vm->pt_root[id])
1328                                 continue;
1329
1330                         migrate_vm = xe_migrate_get_vm(tile->migrate);
1331                         q = xe_exec_queue_create_class(xe, gt, migrate_vm,
1332                                                        XE_ENGINE_CLASS_COPY,
1333                                                        EXEC_QUEUE_FLAG_VM);
1334                         xe_vm_put(migrate_vm);
1335                         if (IS_ERR(q)) {
1336                                 err = PTR_ERR(q);
1337                                 goto err_close;
1338                         }
1339                         vm->q[id] = q;
1340                         number_tiles++;
1341                 }
1342         }
1343
1344         if (number_tiles > 1)
1345                 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1346
1347         mutex_lock(&xe->usm.lock);
1348         if (flags & XE_VM_FLAG_FAULT_MODE)
1349                 xe->usm.num_vm_in_fault_mode++;
1350         else if (!(flags & XE_VM_FLAG_MIGRATION))
1351                 xe->usm.num_vm_in_non_fault_mode++;
1352         mutex_unlock(&xe->usm.lock);
1353
1354         trace_xe_vm_create(vm);
1355
1356         return vm;
1357
1358 err_unlock_close:
1359         dma_resv_unlock(xe_vm_resv(vm));
1360 err_close:
1361         xe_vm_close_and_put(vm);
1362         return ERR_PTR(err);
1363
1364 err_no_resv:
1365         for_each_tile(tile, xe, id)
1366                 xe_range_fence_tree_fini(&vm->rftree[id]);
1367         kfree(vm);
1368         if (!(flags & XE_VM_FLAG_MIGRATION))
1369                 xe_device_mem_access_put(xe);
1370         return ERR_PTR(err);
1371 }
1372
1373 static void flush_async_ops(struct xe_vm *vm)
1374 {
1375         queue_work(system_unbound_wq, &vm->async_ops.work);
1376         flush_work(&vm->async_ops.work);
1377 }
1378
1379 static void vm_error_capture(struct xe_vm *vm, int err,
1380                              u32 op, u64 addr, u64 size)
1381 {
1382         struct drm_xe_vm_bind_op_error_capture capture;
1383         u64 __user *address =
1384                 u64_to_user_ptr(vm->async_ops.error_capture.addr);
1385         bool in_kthread = !current->mm;
1386
1387         capture.error = err;
1388         capture.op = op;
1389         capture.addr = addr;
1390         capture.size = size;
1391
1392         if (in_kthread) {
1393                 if (!mmget_not_zero(vm->async_ops.error_capture.mm))
1394                         goto mm_closed;
1395                 kthread_use_mm(vm->async_ops.error_capture.mm);
1396         }
1397
1398         if (copy_to_user(address, &capture, sizeof(capture)))
1399                 XE_WARN_ON("Copy to user failed");
1400
1401         if (in_kthread) {
1402                 kthread_unuse_mm(vm->async_ops.error_capture.mm);
1403                 mmput(vm->async_ops.error_capture.mm);
1404         }
1405
1406 mm_closed:
1407         wake_up_all(&vm->async_ops.error_capture.wq);
1408 }
1409
1410 static void xe_vm_close(struct xe_vm *vm)
1411 {
1412         down_write(&vm->lock);
1413         vm->size = 0;
1414         up_write(&vm->lock);
1415 }
1416
1417 void xe_vm_close_and_put(struct xe_vm *vm)
1418 {
1419         LIST_HEAD(contested);
1420         struct xe_device *xe = vm->xe;
1421         struct xe_tile *tile;
1422         struct xe_vma *vma, *next_vma;
1423         struct drm_gpuva *gpuva, *next;
1424         u8 id;
1425
1426         XE_WARN_ON(vm->preempt.num_exec_queues);
1427
1428         xe_vm_close(vm);
1429         flush_async_ops(vm);
1430         if (xe_vm_in_compute_mode(vm))
1431                 flush_work(&vm->preempt.rebind_work);
1432
1433         for_each_tile(tile, xe, id) {
1434                 if (vm->q[id]) {
1435                         xe_exec_queue_kill(vm->q[id]);
1436                         xe_exec_queue_put(vm->q[id]);
1437                         vm->q[id] = NULL;
1438                 }
1439         }
1440
1441         down_write(&vm->lock);
1442         xe_vm_lock(vm, false);
1443         drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1444                 vma = gpuva_to_vma(gpuva);
1445
1446                 if (xe_vma_has_no_bo(vma)) {
1447                         down_read(&vm->userptr.notifier_lock);
1448                         vma->gpuva.flags |= XE_VMA_DESTROYED;
1449                         up_read(&vm->userptr.notifier_lock);
1450                 }
1451
1452                 xe_vm_remove_vma(vm, vma);
1453
1454                 /* easy case, remove from VMA? */
1455                 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1456                         list_del_init(&vma->combined_links.rebind);
1457                         xe_vma_destroy(vma, NULL);
1458                         continue;
1459                 }
1460
1461                 list_move_tail(&vma->combined_links.destroy, &contested);
1462                 vma->gpuva.flags |= XE_VMA_DESTROYED;
1463         }
1464
1465         /*
1466          * All vm operations will add shared fences to resv.
1467          * The only exception is eviction for a shared object,
1468          * but even so, the unbind when evicted would still
1469          * install a fence to resv. Hence it's safe to
1470          * destroy the pagetables immediately.
1471          */
1472         for_each_tile(tile, xe, id) {
1473                 if (vm->scratch_bo[id]) {
1474                         u32 i;
1475
1476                         xe_bo_unpin(vm->scratch_bo[id]);
1477                         xe_bo_put(vm->scratch_bo[id]);
1478                         for (i = 0; i < vm->pt_root[id]->level; i++)
1479                                 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags,
1480                                               NULL);
1481                 }
1482                 if (vm->pt_root[id]) {
1483                         xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1484                         vm->pt_root[id] = NULL;
1485                 }
1486         }
1487         xe_vm_unlock(vm);
1488
1489         /*
1490          * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1491          * Since we hold a refcount to the bo, we can remove and free
1492          * the members safely without locking.
1493          */
1494         list_for_each_entry_safe(vma, next_vma, &contested,
1495                                  combined_links.destroy) {
1496                 list_del_init(&vma->combined_links.destroy);
1497                 xe_vma_destroy_unlocked(vma);
1498         }
1499
1500         if (vm->async_ops.error_capture.addr)
1501                 wake_up_all(&vm->async_ops.error_capture.wq);
1502
1503         XE_WARN_ON(!list_empty(&vm->extobj.list));
1504         up_write(&vm->lock);
1505
1506         mutex_lock(&xe->usm.lock);
1507         if (vm->flags & XE_VM_FLAG_FAULT_MODE)
1508                 xe->usm.num_vm_in_fault_mode--;
1509         else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
1510                 xe->usm.num_vm_in_non_fault_mode--;
1511         mutex_unlock(&xe->usm.lock);
1512
1513         for_each_tile(tile, xe, id)
1514                 xe_range_fence_tree_fini(&vm->rftree[id]);
1515
1516         xe_vm_put(vm);
1517 }
1518
1519 static void vm_destroy_work_func(struct work_struct *w)
1520 {
1521         struct xe_vm *vm =
1522                 container_of(w, struct xe_vm, destroy_work);
1523         struct xe_device *xe = vm->xe;
1524         struct xe_tile *tile;
1525         u8 id;
1526         void *lookup;
1527
1528         /* xe_vm_close_and_put was not called? */
1529         XE_WARN_ON(vm->size);
1530
1531         if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
1532                 xe_device_mem_access_put(xe);
1533
1534                 if (xe->info.has_asid) {
1535                         mutex_lock(&xe->usm.lock);
1536                         lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1537                         XE_WARN_ON(lookup != vm);
1538                         mutex_unlock(&xe->usm.lock);
1539                 }
1540         }
1541
1542         for_each_tile(tile, xe, id)
1543                 XE_WARN_ON(vm->pt_root[id]);
1544
1545         trace_xe_vm_free(vm);
1546         dma_fence_put(vm->rebind_fence);
1547         kfree(vm);
1548 }
1549
1550 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1551 {
1552         struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1553
1554         /* To destroy the VM we need to be able to sleep */
1555         queue_work(system_unbound_wq, &vm->destroy_work);
1556 }
1557
1558 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1559 {
1560         struct xe_vm *vm;
1561
1562         mutex_lock(&xef->vm.lock);
1563         vm = xa_load(&xef->vm.xa, id);
1564         if (vm)
1565                 xe_vm_get(vm);
1566         mutex_unlock(&xef->vm.lock);
1567
1568         return vm;
1569 }
1570
1571 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1572 {
1573         return xe_pde_encode(vm->pt_root[tile->id]->bo, 0,
1574                              XE_CACHE_WB);
1575 }
1576
1577 static struct dma_fence *
1578 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1579                  struct xe_sync_entry *syncs, u32 num_syncs,
1580                  bool first_op, bool last_op)
1581 {
1582         struct xe_tile *tile;
1583         struct dma_fence *fence = NULL;
1584         struct dma_fence **fences = NULL;
1585         struct dma_fence_array *cf = NULL;
1586         struct xe_vm *vm = xe_vma_vm(vma);
1587         int cur_fence = 0, i;
1588         int number_tiles = hweight8(vma->tile_present);
1589         int err;
1590         u8 id;
1591
1592         trace_xe_vma_unbind(vma);
1593
1594         if (number_tiles > 1) {
1595                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1596                                        GFP_KERNEL);
1597                 if (!fences)
1598                         return ERR_PTR(-ENOMEM);
1599         }
1600
1601         for_each_tile(tile, vm->xe, id) {
1602                 if (!(vma->tile_present & BIT(id)))
1603                         goto next;
1604
1605                 fence = __xe_pt_unbind_vma(tile, vma, q, first_op ? syncs : NULL,
1606                                            first_op ? num_syncs : 0);
1607                 if (IS_ERR(fence)) {
1608                         err = PTR_ERR(fence);
1609                         goto err_fences;
1610                 }
1611
1612                 if (fences)
1613                         fences[cur_fence++] = fence;
1614
1615 next:
1616                 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1617                         q = list_next_entry(q, multi_gt_list);
1618         }
1619
1620         if (fences) {
1621                 cf = dma_fence_array_create(number_tiles, fences,
1622                                             vm->composite_fence_ctx,
1623                                             vm->composite_fence_seqno++,
1624                                             false);
1625                 if (!cf) {
1626                         --vm->composite_fence_seqno;
1627                         err = -ENOMEM;
1628                         goto err_fences;
1629                 }
1630         }
1631
1632         if (last_op) {
1633                 for (i = 0; i < num_syncs; i++)
1634                         xe_sync_entry_signal(&syncs[i], NULL,
1635                                              cf ? &cf->base : fence);
1636         }
1637
1638         return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
1639
1640 err_fences:
1641         if (fences) {
1642                 while (cur_fence) {
1643                         /* FIXME: Rewind the previous binds? */
1644                         dma_fence_put(fences[--cur_fence]);
1645                 }
1646                 kfree(fences);
1647         }
1648
1649         return ERR_PTR(err);
1650 }
1651
1652 static struct dma_fence *
1653 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1654                struct xe_sync_entry *syncs, u32 num_syncs,
1655                bool first_op, bool last_op)
1656 {
1657         struct xe_tile *tile;
1658         struct dma_fence *fence;
1659         struct dma_fence **fences = NULL;
1660         struct dma_fence_array *cf = NULL;
1661         struct xe_vm *vm = xe_vma_vm(vma);
1662         int cur_fence = 0, i;
1663         int number_tiles = hweight8(vma->tile_mask);
1664         int err;
1665         u8 id;
1666
1667         trace_xe_vma_bind(vma);
1668
1669         if (number_tiles > 1) {
1670                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1671                                        GFP_KERNEL);
1672                 if (!fences)
1673                         return ERR_PTR(-ENOMEM);
1674         }
1675
1676         for_each_tile(tile, vm->xe, id) {
1677                 if (!(vma->tile_mask & BIT(id)))
1678                         goto next;
1679
1680                 fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
1681                                          first_op ? syncs : NULL,
1682                                          first_op ? num_syncs : 0,
1683                                          vma->tile_present & BIT(id));
1684                 if (IS_ERR(fence)) {
1685                         err = PTR_ERR(fence);
1686                         goto err_fences;
1687                 }
1688
1689                 if (fences)
1690                         fences[cur_fence++] = fence;
1691
1692 next:
1693                 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1694                         q = list_next_entry(q, multi_gt_list);
1695         }
1696
1697         if (fences) {
1698                 cf = dma_fence_array_create(number_tiles, fences,
1699                                             vm->composite_fence_ctx,
1700                                             vm->composite_fence_seqno++,
1701                                             false);
1702                 if (!cf) {
1703                         --vm->composite_fence_seqno;
1704                         err = -ENOMEM;
1705                         goto err_fences;
1706                 }
1707         }
1708
1709         if (last_op) {
1710                 for (i = 0; i < num_syncs; i++)
1711                         xe_sync_entry_signal(&syncs[i], NULL,
1712                                              cf ? &cf->base : fence);
1713         }
1714
1715         return cf ? &cf->base : fence;
1716
1717 err_fences:
1718         if (fences) {
1719                 while (cur_fence) {
1720                         /* FIXME: Rewind the previous binds? */
1721                         dma_fence_put(fences[--cur_fence]);
1722                 }
1723                 kfree(fences);
1724         }
1725
1726         return ERR_PTR(err);
1727 }
1728
1729 struct async_op_fence {
1730         struct dma_fence fence;
1731         struct dma_fence *wait_fence;
1732         struct dma_fence_cb cb;
1733         struct xe_vm *vm;
1734         wait_queue_head_t wq;
1735         bool started;
1736 };
1737
1738 static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence)
1739 {
1740         return "xe";
1741 }
1742
1743 static const char *
1744 async_op_fence_get_timeline_name(struct dma_fence *dma_fence)
1745 {
1746         return "async_op_fence";
1747 }
1748
1749 static const struct dma_fence_ops async_op_fence_ops = {
1750         .get_driver_name = async_op_fence_get_driver_name,
1751         .get_timeline_name = async_op_fence_get_timeline_name,
1752 };
1753
1754 static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
1755 {
1756         struct async_op_fence *afence =
1757                 container_of(cb, struct async_op_fence, cb);
1758
1759         afence->fence.error = afence->wait_fence->error;
1760         dma_fence_signal(&afence->fence);
1761         xe_vm_put(afence->vm);
1762         dma_fence_put(afence->wait_fence);
1763         dma_fence_put(&afence->fence);
1764 }
1765
1766 static void add_async_op_fence_cb(struct xe_vm *vm,
1767                                   struct dma_fence *fence,
1768                                   struct async_op_fence *afence)
1769 {
1770         int ret;
1771
1772         if (!xe_vm_no_dma_fences(vm)) {
1773                 afence->started = true;
1774                 smp_wmb();
1775                 wake_up_all(&afence->wq);
1776         }
1777
1778         afence->wait_fence = dma_fence_get(fence);
1779         afence->vm = xe_vm_get(vm);
1780         dma_fence_get(&afence->fence);
1781         ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb);
1782         if (ret == -ENOENT) {
1783                 afence->fence.error = afence->wait_fence->error;
1784                 dma_fence_signal(&afence->fence);
1785         }
1786         if (ret) {
1787                 xe_vm_put(vm);
1788                 dma_fence_put(afence->wait_fence);
1789                 dma_fence_put(&afence->fence);
1790         }
1791         XE_WARN_ON(ret && ret != -ENOENT);
1792 }
1793
1794 int xe_vm_async_fence_wait_start(struct dma_fence *fence)
1795 {
1796         if (fence->ops == &async_op_fence_ops) {
1797                 struct async_op_fence *afence =
1798                         container_of(fence, struct async_op_fence, fence);
1799
1800                 XE_WARN_ON(xe_vm_no_dma_fences(afence->vm));
1801
1802                 smp_rmb();
1803                 return wait_event_interruptible(afence->wq, afence->started);
1804         }
1805
1806         return 0;
1807 }
1808
1809 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
1810                         struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1811                         u32 num_syncs, struct async_op_fence *afence,
1812                         bool immediate, bool first_op, bool last_op)
1813 {
1814         struct dma_fence *fence;
1815
1816         xe_vm_assert_held(vm);
1817
1818         if (immediate) {
1819                 fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
1820                                        last_op);
1821                 if (IS_ERR(fence))
1822                         return PTR_ERR(fence);
1823         } else {
1824                 int i;
1825
1826                 XE_WARN_ON(!xe_vm_in_fault_mode(vm));
1827
1828                 fence = dma_fence_get_stub();
1829                 if (last_op) {
1830                         for (i = 0; i < num_syncs; i++)
1831                                 xe_sync_entry_signal(&syncs[i], NULL, fence);
1832                 }
1833         }
1834         if (afence)
1835                 add_async_op_fence_cb(vm, fence, afence);
1836
1837         dma_fence_put(fence);
1838         return 0;
1839 }
1840
1841 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
1842                       struct xe_bo *bo, struct xe_sync_entry *syncs,
1843                       u32 num_syncs, struct async_op_fence *afence,
1844                       bool immediate, bool first_op, bool last_op)
1845 {
1846         int err;
1847
1848         xe_vm_assert_held(vm);
1849         xe_bo_assert_held(bo);
1850
1851         if (bo && immediate) {
1852                 err = xe_bo_validate(bo, vm, true);
1853                 if (err)
1854                         return err;
1855         }
1856
1857         return __xe_vm_bind(vm, vma, q, syncs, num_syncs, afence, immediate,
1858                             first_op, last_op);
1859 }
1860
1861 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
1862                         struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1863                         u32 num_syncs, struct async_op_fence *afence,
1864                         bool first_op, bool last_op)
1865 {
1866         struct dma_fence *fence;
1867
1868         xe_vm_assert_held(vm);
1869         xe_bo_assert_held(xe_vma_bo(vma));
1870
1871         fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
1872         if (IS_ERR(fence))
1873                 return PTR_ERR(fence);
1874         if (afence)
1875                 add_async_op_fence_cb(vm, fence, afence);
1876
1877         xe_vma_destroy(vma, fence);
1878         dma_fence_put(fence);
1879
1880         return 0;
1881 }
1882
1883 static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm,
1884                                         u64 value)
1885 {
1886         if (XE_IOCTL_DBG(xe, !value))
1887                 return -EINVAL;
1888
1889         if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
1890                 return -EOPNOTSUPP;
1891
1892         if (XE_IOCTL_DBG(xe, vm->async_ops.error_capture.addr))
1893                 return -EOPNOTSUPP;
1894
1895         vm->async_ops.error_capture.mm = current->mm;
1896         vm->async_ops.error_capture.addr = value;
1897         init_waitqueue_head(&vm->async_ops.error_capture.wq);
1898
1899         return 0;
1900 }
1901
1902 typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm,
1903                                      u64 value);
1904
1905 static const xe_vm_set_property_fn vm_set_property_funcs[] = {
1906         [XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] =
1907                 vm_set_error_capture_address,
1908 };
1909
1910 static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
1911                                     u64 extension)
1912 {
1913         u64 __user *address = u64_to_user_ptr(extension);
1914         struct drm_xe_ext_vm_set_property ext;
1915         int err;
1916
1917         err = __copy_from_user(&ext, address, sizeof(ext));
1918         if (XE_IOCTL_DBG(xe, err))
1919                 return -EFAULT;
1920
1921         if (XE_IOCTL_DBG(xe, ext.property >=
1922                          ARRAY_SIZE(vm_set_property_funcs)) ||
1923             XE_IOCTL_DBG(xe, ext.pad) ||
1924             XE_IOCTL_DBG(xe, ext.reserved[0] || ext.reserved[1]))
1925                 return -EINVAL;
1926
1927         return vm_set_property_funcs[ext.property](xe, vm, ext.value);
1928 }
1929
1930 typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm,
1931                                        u64 extension);
1932
1933 static const xe_vm_set_property_fn vm_user_extension_funcs[] = {
1934         [XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property,
1935 };
1936
1937 #define MAX_USER_EXTENSIONS     16
1938 static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm,
1939                               u64 extensions, int ext_number)
1940 {
1941         u64 __user *address = u64_to_user_ptr(extensions);
1942         struct xe_user_extension ext;
1943         int err;
1944
1945         if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
1946                 return -E2BIG;
1947
1948         err = __copy_from_user(&ext, address, sizeof(ext));
1949         if (XE_IOCTL_DBG(xe, err))
1950                 return -EFAULT;
1951
1952         if (XE_IOCTL_DBG(xe, ext.pad) ||
1953             XE_IOCTL_DBG(xe, ext.name >=
1954                          ARRAY_SIZE(vm_user_extension_funcs)))
1955                 return -EINVAL;
1956
1957         err = vm_user_extension_funcs[ext.name](xe, vm, extensions);
1958         if (XE_IOCTL_DBG(xe, err))
1959                 return err;
1960
1961         if (ext.next_extension)
1962                 return vm_user_extensions(xe, vm, ext.next_extension,
1963                                           ++ext_number);
1964
1965         return 0;
1966 }
1967
1968 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \
1969                                     DRM_XE_VM_CREATE_COMPUTE_MODE | \
1970                                     DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \
1971                                     DRM_XE_VM_CREATE_FAULT_MODE)
1972
1973 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1974                        struct drm_file *file)
1975 {
1976         struct xe_device *xe = to_xe_device(dev);
1977         struct xe_file *xef = to_xe_file(file);
1978         struct drm_xe_vm_create *args = data;
1979         struct xe_vm *vm;
1980         u32 id, asid;
1981         int err;
1982         u32 flags = 0;
1983
1984         if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
1985                 args->flags |= DRM_XE_VM_CREATE_SCRATCH_PAGE;
1986
1987         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
1988                          !xe->info.supports_usm))
1989                 return -EINVAL;
1990
1991         if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1992                 return -EINVAL;
1993
1994         if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1995                 return -EINVAL;
1996
1997         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE &&
1998                          args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
1999                 return -EINVAL;
2000
2001         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE &&
2002                          args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
2003                 return -EINVAL;
2004
2005         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
2006                          xe_device_in_non_fault_mode(xe)))
2007                 return -EINVAL;
2008
2009         if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) &&
2010                          xe_device_in_fault_mode(xe)))
2011                 return -EINVAL;
2012
2013         if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE)
2014                 flags |= XE_VM_FLAG_SCRATCH_PAGE;
2015         if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE)
2016                 flags |= XE_VM_FLAG_COMPUTE_MODE;
2017         if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS)
2018                 flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
2019         if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE)
2020                 flags |= XE_VM_FLAG_FAULT_MODE;
2021
2022         vm = xe_vm_create(xe, flags);
2023         if (IS_ERR(vm))
2024                 return PTR_ERR(vm);
2025
2026         if (args->extensions) {
2027                 err = vm_user_extensions(xe, vm, args->extensions, 0);
2028                 if (XE_IOCTL_DBG(xe, err)) {
2029                         xe_vm_close_and_put(vm);
2030                         return err;
2031                 }
2032         }
2033
2034         mutex_lock(&xef->vm.lock);
2035         err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
2036         mutex_unlock(&xef->vm.lock);
2037         if (err) {
2038                 xe_vm_close_and_put(vm);
2039                 return err;
2040         }
2041
2042         if (xe->info.has_asid) {
2043                 mutex_lock(&xe->usm.lock);
2044                 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
2045                                       XA_LIMIT(0, XE_MAX_ASID - 1),
2046                                       &xe->usm.next_asid, GFP_KERNEL);
2047                 mutex_unlock(&xe->usm.lock);
2048                 if (err) {
2049                         xe_vm_close_and_put(vm);
2050                         return err;
2051                 }
2052                 vm->usm.asid = asid;
2053         }
2054
2055         args->vm_id = id;
2056
2057 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
2058         /* Warning: Security issue - never enable by default */
2059         args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
2060 #endif
2061
2062         return 0;
2063 }
2064
2065 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
2066                         struct drm_file *file)
2067 {
2068         struct xe_device *xe = to_xe_device(dev);
2069         struct xe_file *xef = to_xe_file(file);
2070         struct drm_xe_vm_destroy *args = data;
2071         struct xe_vm *vm;
2072         int err = 0;
2073
2074         if (XE_IOCTL_DBG(xe, args->pad) ||
2075             XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2076                 return -EINVAL;
2077
2078         mutex_lock(&xef->vm.lock);
2079         vm = xa_load(&xef->vm.xa, args->vm_id);
2080         if (XE_IOCTL_DBG(xe, !vm))
2081                 err = -ENOENT;
2082         else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
2083                 err = -EBUSY;
2084         else
2085                 xa_erase(&xef->vm.xa, args->vm_id);
2086         mutex_unlock(&xef->vm.lock);
2087
2088         if (!err)
2089                 xe_vm_close_and_put(vm);
2090
2091         return err;
2092 }
2093
2094 static const u32 region_to_mem_type[] = {
2095         XE_PL_TT,
2096         XE_PL_VRAM0,
2097         XE_PL_VRAM1,
2098 };
2099
2100 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
2101                           struct xe_exec_queue *q, u32 region,
2102                           struct xe_sync_entry *syncs, u32 num_syncs,
2103                           struct async_op_fence *afence, bool first_op,
2104                           bool last_op)
2105 {
2106         int err;
2107
2108         XE_WARN_ON(region > ARRAY_SIZE(region_to_mem_type));
2109
2110         if (!xe_vma_has_no_bo(vma)) {
2111                 err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
2112                 if (err)
2113                         return err;
2114         }
2115
2116         if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
2117                 return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
2118                                   afence, true, first_op, last_op);
2119         } else {
2120                 int i;
2121
2122                 /* Nothing to do, signal fences now */
2123                 if (last_op) {
2124                         for (i = 0; i < num_syncs; i++)
2125                                 xe_sync_entry_signal(&syncs[i], NULL,
2126                                                      dma_fence_get_stub());
2127                 }
2128                 if (afence)
2129                         dma_fence_signal(&afence->fence);
2130                 return 0;
2131         }
2132 }
2133
2134 #define VM_BIND_OP(op)  (op & 0xffff)
2135
2136 static void vm_set_async_error(struct xe_vm *vm, int err)
2137 {
2138         lockdep_assert_held(&vm->lock);
2139         vm->async_ops.error = err;
2140 }
2141
2142 static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
2143                                     u64 addr, u64 range, u32 op)
2144 {
2145         struct xe_device *xe = vm->xe;
2146         struct xe_vma *vma;
2147         bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
2148
2149         lockdep_assert_held(&vm->lock);
2150
2151         switch (VM_BIND_OP(op)) {
2152         case XE_VM_BIND_OP_MAP:
2153         case XE_VM_BIND_OP_MAP_USERPTR:
2154                 vma = xe_vm_find_overlapping_vma(vm, addr, range);
2155                 if (XE_IOCTL_DBG(xe, vma && !async))
2156                         return -EBUSY;
2157                 break;
2158         case XE_VM_BIND_OP_UNMAP:
2159         case XE_VM_BIND_OP_PREFETCH:
2160                 vma = xe_vm_find_overlapping_vma(vm, addr, range);
2161                 if (XE_IOCTL_DBG(xe, !vma))
2162                         /* Not an actual error, IOCTL cleans up returns and 0 */
2163                         return -ENODATA;
2164                 if (XE_IOCTL_DBG(xe, (xe_vma_start(vma) != addr ||
2165                                       xe_vma_end(vma) != addr + range) && !async))
2166                         return -EINVAL;
2167                 break;
2168         case XE_VM_BIND_OP_UNMAP_ALL:
2169                 if (XE_IOCTL_DBG(xe, list_empty(&bo->ttm.base.gpuva.list)))
2170                         /* Not an actual error, IOCTL cleans up returns and 0 */
2171                         return -ENODATA;
2172                 break;
2173         default:
2174                 XE_WARN_ON("NOT POSSIBLE");
2175                 return -EINVAL;
2176         }
2177
2178         return 0;
2179 }
2180
2181 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2182                              bool post_commit)
2183 {
2184         down_read(&vm->userptr.notifier_lock);
2185         vma->gpuva.flags |= XE_VMA_DESTROYED;
2186         up_read(&vm->userptr.notifier_lock);
2187         if (post_commit)
2188                 xe_vm_remove_vma(vm, vma);
2189 }
2190
2191 #undef ULL
2192 #define ULL     unsigned long long
2193
2194 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
2195 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2196 {
2197         struct xe_vma *vma;
2198
2199         switch (op->op) {
2200         case DRM_GPUVA_OP_MAP:
2201                 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2202                        (ULL)op->map.va.addr, (ULL)op->map.va.range);
2203                 break;
2204         case DRM_GPUVA_OP_REMAP:
2205                 vma = gpuva_to_vma(op->remap.unmap->va);
2206                 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2207                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2208                        op->unmap.keep ? 1 : 0);
2209                 if (op->remap.prev)
2210                         vm_dbg(&xe->drm,
2211                                "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2212                                (ULL)op->remap.prev->va.addr,
2213                                (ULL)op->remap.prev->va.range);
2214                 if (op->remap.next)
2215                         vm_dbg(&xe->drm,
2216                                "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2217                                (ULL)op->remap.next->va.addr,
2218                                (ULL)op->remap.next->va.range);
2219                 break;
2220         case DRM_GPUVA_OP_UNMAP:
2221                 vma = gpuva_to_vma(op->unmap.va);
2222                 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2223                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2224                        op->unmap.keep ? 1 : 0);
2225                 break;
2226         case DRM_GPUVA_OP_PREFETCH:
2227                 vma = gpuva_to_vma(op->prefetch.va);
2228                 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2229                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2230                 break;
2231         default:
2232                 XE_WARN_ON("NOT POSSIBLE");
2233         }
2234 }
2235 #else
2236 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2237 {
2238 }
2239 #endif
2240
2241 /*
2242  * Create operations list from IOCTL arguments, setup operations fields so parse
2243  * and commit steps are decoupled from IOCTL arguments. This step can fail.
2244  */
2245 static struct drm_gpuva_ops *
2246 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
2247                          u64 bo_offset_or_userptr, u64 addr, u64 range,
2248                          u32 operation, u8 tile_mask, u32 region)
2249 {
2250         struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2251         struct drm_gpuva_ops *ops;
2252         struct drm_gpuva_op *__op;
2253         struct xe_vma_op *op;
2254         struct drm_gpuvm_bo *vm_bo;
2255         int err;
2256
2257         lockdep_assert_held_write(&vm->lock);
2258
2259         vm_dbg(&vm->xe->drm,
2260                "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2261                VM_BIND_OP(operation), (ULL)addr, (ULL)range,
2262                (ULL)bo_offset_or_userptr);
2263
2264         switch (VM_BIND_OP(operation)) {
2265         case XE_VM_BIND_OP_MAP:
2266         case XE_VM_BIND_OP_MAP_USERPTR:
2267                 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2268                                                   obj, bo_offset_or_userptr);
2269                 if (IS_ERR(ops))
2270                         return ops;
2271
2272                 drm_gpuva_for_each_op(__op, ops) {
2273                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2274
2275                         op->tile_mask = tile_mask;
2276                         op->map.immediate =
2277                                 operation & XE_VM_BIND_FLAG_IMMEDIATE;
2278                         op->map.read_only =
2279                                 operation & XE_VM_BIND_FLAG_READONLY;
2280                         op->map.is_null = operation & XE_VM_BIND_FLAG_NULL;
2281                 }
2282                 break;
2283         case XE_VM_BIND_OP_UNMAP:
2284                 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2285                 if (IS_ERR(ops))
2286                         return ops;
2287
2288                 drm_gpuva_for_each_op(__op, ops) {
2289                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2290
2291                         op->tile_mask = tile_mask;
2292                 }
2293                 break;
2294         case XE_VM_BIND_OP_PREFETCH:
2295                 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2296                 if (IS_ERR(ops))
2297                         return ops;
2298
2299                 drm_gpuva_for_each_op(__op, ops) {
2300                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2301
2302                         op->tile_mask = tile_mask;
2303                         op->prefetch.region = region;
2304                 }
2305                 break;
2306         case XE_VM_BIND_OP_UNMAP_ALL:
2307                 XE_WARN_ON(!bo);
2308
2309                 err = xe_bo_lock(bo, true);
2310                 if (err)
2311                         return ERR_PTR(err);
2312
2313                 vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj);
2314                 if (!vm_bo)
2315                         break;
2316
2317                 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2318                 drm_gpuvm_bo_put(vm_bo);
2319                 xe_bo_unlock(bo);
2320                 if (IS_ERR(ops))
2321                         return ops;
2322
2323                 drm_gpuva_for_each_op(__op, ops) {
2324                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2325
2326                         op->tile_mask = tile_mask;
2327                 }
2328                 break;
2329         default:
2330                 XE_WARN_ON("NOT POSSIBLE");
2331                 ops = ERR_PTR(-EINVAL);
2332         }
2333
2334 #ifdef TEST_VM_ASYNC_OPS_ERROR
2335         if (operation & FORCE_ASYNC_OP_ERROR) {
2336                 op = list_first_entry_or_null(&ops->list, struct xe_vma_op,
2337                                               base.entry);
2338                 if (op)
2339                         op->inject_error = true;
2340         }
2341 #endif
2342
2343         if (!IS_ERR(ops))
2344                 drm_gpuva_for_each_op(__op, ops)
2345                         print_op(vm->xe, __op);
2346
2347         return ops;
2348 }
2349
2350 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2351                               u8 tile_mask, bool read_only, bool is_null)
2352 {
2353         struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2354         struct xe_vma *vma;
2355         int err;
2356
2357         lockdep_assert_held_write(&vm->lock);
2358
2359         if (bo) {
2360                 err = xe_bo_lock(bo, true);
2361                 if (err)
2362                         return ERR_PTR(err);
2363         }
2364         vma = xe_vma_create(vm, bo, op->gem.offset,
2365                             op->va.addr, op->va.addr +
2366                             op->va.range - 1, read_only, is_null,
2367                             tile_mask);
2368         if (bo)
2369                 xe_bo_unlock(bo);
2370
2371         if (xe_vma_is_userptr(vma)) {
2372                 err = xe_vma_userptr_pin_pages(vma);
2373                 if (err) {
2374                         prep_vma_destroy(vm, vma, false);
2375                         xe_vma_destroy_unlocked(vma);
2376                         return ERR_PTR(err);
2377                 }
2378         } else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
2379                 vm_insert_extobj(vm, vma);
2380                 err = add_preempt_fences(vm, bo);
2381                 if (err) {
2382                         prep_vma_destroy(vm, vma, false);
2383                         xe_vma_destroy_unlocked(vma);
2384                         return ERR_PTR(err);
2385                 }
2386         }
2387
2388         return vma;
2389 }
2390
2391 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2392 {
2393         if (vma->gpuva.flags & XE_VMA_PTE_1G)
2394                 return SZ_1G;
2395         else if (vma->gpuva.flags & XE_VMA_PTE_2M)
2396                 return SZ_2M;
2397
2398         return SZ_4K;
2399 }
2400
2401 static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2402 {
2403         switch (size) {
2404         case SZ_1G:
2405                 vma->gpuva.flags |= XE_VMA_PTE_1G;
2406                 break;
2407         case SZ_2M:
2408                 vma->gpuva.flags |= XE_VMA_PTE_2M;
2409                 break;
2410         }
2411
2412         return SZ_4K;
2413 }
2414
2415 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2416 {
2417         int err = 0;
2418
2419         lockdep_assert_held_write(&vm->lock);
2420
2421         switch (op->base.op) {
2422         case DRM_GPUVA_OP_MAP:
2423                 err |= xe_vm_insert_vma(vm, op->map.vma);
2424                 if (!err)
2425                         op->flags |= XE_VMA_OP_COMMITTED;
2426                 break;
2427         case DRM_GPUVA_OP_REMAP:
2428                 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2429                                  true);
2430                 op->flags |= XE_VMA_OP_COMMITTED;
2431
2432                 if (op->remap.prev) {
2433                         err |= xe_vm_insert_vma(vm, op->remap.prev);
2434                         if (!err)
2435                                 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2436                         if (!err && op->remap.skip_prev)
2437                                 op->remap.prev = NULL;
2438                 }
2439                 if (op->remap.next) {
2440                         err |= xe_vm_insert_vma(vm, op->remap.next);
2441                         if (!err)
2442                                 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2443                         if (!err && op->remap.skip_next)
2444                                 op->remap.next = NULL;
2445                 }
2446
2447                 /* Adjust for partial unbind after removin VMA from VM */
2448                 if (!err) {
2449                         op->base.remap.unmap->va->va.addr = op->remap.start;
2450                         op->base.remap.unmap->va->va.range = op->remap.range;
2451                 }
2452                 break;
2453         case DRM_GPUVA_OP_UNMAP:
2454                 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2455                 op->flags |= XE_VMA_OP_COMMITTED;
2456                 break;
2457         case DRM_GPUVA_OP_PREFETCH:
2458                 op->flags |= XE_VMA_OP_COMMITTED;
2459                 break;
2460         default:
2461                 XE_WARN_ON("NOT POSSIBLE");
2462         }
2463
2464         return err;
2465 }
2466
2467
2468 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
2469                                    struct drm_gpuva_ops *ops,
2470                                    struct xe_sync_entry *syncs, u32 num_syncs,
2471                                    struct list_head *ops_list, bool last,
2472                                    bool async)
2473 {
2474         struct xe_vma_op *last_op = NULL;
2475         struct async_op_fence *fence = NULL;
2476         struct drm_gpuva_op *__op;
2477         int err = 0;
2478
2479         lockdep_assert_held_write(&vm->lock);
2480
2481         if (last && num_syncs && async) {
2482                 u64 seqno;
2483
2484                 fence = kmalloc(sizeof(*fence), GFP_KERNEL);
2485                 if (!fence)
2486                         return -ENOMEM;
2487
2488                 seqno = q ? ++q->bind.fence_seqno : ++vm->async_ops.fence.seqno;
2489                 dma_fence_init(&fence->fence, &async_op_fence_ops,
2490                                &vm->async_ops.lock, q ? q->bind.fence_ctx :
2491                                vm->async_ops.fence.context, seqno);
2492
2493                 if (!xe_vm_no_dma_fences(vm)) {
2494                         fence->vm = vm;
2495                         fence->started = false;
2496                         init_waitqueue_head(&fence->wq);
2497                 }
2498         }
2499
2500         drm_gpuva_for_each_op(__op, ops) {
2501                 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2502                 bool first = list_empty(ops_list);
2503
2504                 XE_WARN_ON(!first && !async);
2505
2506                 INIT_LIST_HEAD(&op->link);
2507                 list_add_tail(&op->link, ops_list);
2508
2509                 if (first) {
2510                         op->flags |= XE_VMA_OP_FIRST;
2511                         op->num_syncs = num_syncs;
2512                         op->syncs = syncs;
2513                 }
2514
2515                 op->q = q;
2516
2517                 switch (op->base.op) {
2518                 case DRM_GPUVA_OP_MAP:
2519                 {
2520                         struct xe_vma *vma;
2521
2522                         vma = new_vma(vm, &op->base.map,
2523                                       op->tile_mask, op->map.read_only,
2524                                       op->map.is_null);
2525                         if (IS_ERR(vma)) {
2526                                 err = PTR_ERR(vma);
2527                                 goto free_fence;
2528                         }
2529
2530                         op->map.vma = vma;
2531                         break;
2532                 }
2533                 case DRM_GPUVA_OP_REMAP:
2534                 {
2535                         struct xe_vma *old =
2536                                 gpuva_to_vma(op->base.remap.unmap->va);
2537
2538                         op->remap.start = xe_vma_start(old);
2539                         op->remap.range = xe_vma_size(old);
2540
2541                         if (op->base.remap.prev) {
2542                                 struct xe_vma *vma;
2543                                 bool read_only =
2544                                         op->base.remap.unmap->va->flags &
2545                                         XE_VMA_READ_ONLY;
2546                                 bool is_null =
2547                                         op->base.remap.unmap->va->flags &
2548                                         DRM_GPUVA_SPARSE;
2549
2550                                 vma = new_vma(vm, op->base.remap.prev,
2551                                               op->tile_mask, read_only,
2552                                               is_null);
2553                                 if (IS_ERR(vma)) {
2554                                         err = PTR_ERR(vma);
2555                                         goto free_fence;
2556                                 }
2557
2558                                 op->remap.prev = vma;
2559
2560                                 /*
2561                                  * Userptr creates a new SG mapping so
2562                                  * we must also rebind.
2563                                  */
2564                                 op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2565                                         IS_ALIGNED(xe_vma_end(vma),
2566                                                    xe_vma_max_pte_size(old));
2567                                 if (op->remap.skip_prev) {
2568                                         xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2569                                         op->remap.range -=
2570                                                 xe_vma_end(vma) -
2571                                                 xe_vma_start(old);
2572                                         op->remap.start = xe_vma_end(vma);
2573                                 }
2574                         }
2575
2576                         if (op->base.remap.next) {
2577                                 struct xe_vma *vma;
2578                                 bool read_only =
2579                                         op->base.remap.unmap->va->flags &
2580                                         XE_VMA_READ_ONLY;
2581
2582                                 bool is_null =
2583                                         op->base.remap.unmap->va->flags &
2584                                         DRM_GPUVA_SPARSE;
2585
2586                                 vma = new_vma(vm, op->base.remap.next,
2587                                               op->tile_mask, read_only,
2588                                               is_null);
2589                                 if (IS_ERR(vma)) {
2590                                         err = PTR_ERR(vma);
2591                                         goto free_fence;
2592                                 }
2593
2594                                 op->remap.next = vma;
2595
2596                                 /*
2597                                  * Userptr creates a new SG mapping so
2598                                  * we must also rebind.
2599                                  */
2600                                 op->remap.skip_next = !xe_vma_is_userptr(old) &&
2601                                         IS_ALIGNED(xe_vma_start(vma),
2602                                                    xe_vma_max_pte_size(old));
2603                                 if (op->remap.skip_next) {
2604                                         xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2605                                         op->remap.range -=
2606                                                 xe_vma_end(old) -
2607                                                 xe_vma_start(vma);
2608                                 }
2609                         }
2610                         break;
2611                 }
2612                 case DRM_GPUVA_OP_UNMAP:
2613                 case DRM_GPUVA_OP_PREFETCH:
2614                         /* Nothing to do */
2615                         break;
2616                 default:
2617                         XE_WARN_ON("NOT POSSIBLE");
2618                 }
2619
2620                 last_op = op;
2621
2622                 err = xe_vma_op_commit(vm, op);
2623                 if (err)
2624                         goto free_fence;
2625         }
2626
2627         /* FIXME: Unhandled corner case */
2628         XE_WARN_ON(!last_op && last && !list_empty(ops_list));
2629
2630         if (!last_op)
2631                 goto free_fence;
2632         last_op->ops = ops;
2633         if (last) {
2634                 last_op->flags |= XE_VMA_OP_LAST;
2635                 last_op->num_syncs = num_syncs;
2636                 last_op->syncs = syncs;
2637                 last_op->fence = fence;
2638         }
2639
2640         return 0;
2641
2642 free_fence:
2643         kfree(fence);
2644         return err;
2645 }
2646
2647 static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
2648                       struct xe_vma *vma, struct xe_vma_op *op)
2649 {
2650         int err;
2651
2652         lockdep_assert_held_write(&vm->lock);
2653
2654         err = xe_vm_prepare_vma(exec, vma, 1);
2655         if (err)
2656                 return err;
2657
2658         xe_vm_assert_held(vm);
2659         xe_bo_assert_held(xe_vma_bo(vma));
2660
2661         switch (op->base.op) {
2662         case DRM_GPUVA_OP_MAP:
2663                 err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
2664                                  op->syncs, op->num_syncs, op->fence,
2665                                  op->map.immediate || !xe_vm_in_fault_mode(vm),
2666                                  op->flags & XE_VMA_OP_FIRST,
2667                                  op->flags & XE_VMA_OP_LAST);
2668                 break;
2669         case DRM_GPUVA_OP_REMAP:
2670         {
2671                 bool prev = !!op->remap.prev;
2672                 bool next = !!op->remap.next;
2673
2674                 if (!op->remap.unmap_done) {
2675                         if (prev || next) {
2676                                 vm->async_ops.munmap_rebind_inflight = true;
2677                                 vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
2678                         }
2679                         err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2680                                            op->num_syncs,
2681                                            !prev && !next ? op->fence : NULL,
2682                                            op->flags & XE_VMA_OP_FIRST,
2683                                            op->flags & XE_VMA_OP_LAST && !prev &&
2684                                            !next);
2685                         if (err)
2686                                 break;
2687                         op->remap.unmap_done = true;
2688                 }
2689
2690                 if (prev) {
2691                         op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
2692                         err = xe_vm_bind(vm, op->remap.prev, op->q,
2693                                          xe_vma_bo(op->remap.prev), op->syncs,
2694                                          op->num_syncs,
2695                                          !next ? op->fence : NULL, true, false,
2696                                          op->flags & XE_VMA_OP_LAST && !next);
2697                         op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2698                         if (err)
2699                                 break;
2700                         op->remap.prev = NULL;
2701                 }
2702
2703                 if (next) {
2704                         op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
2705                         err = xe_vm_bind(vm, op->remap.next, op->q,
2706                                          xe_vma_bo(op->remap.next),
2707                                          op->syncs, op->num_syncs,
2708                                          op->fence, true, false,
2709                                          op->flags & XE_VMA_OP_LAST);
2710                         op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2711                         if (err)
2712                                 break;
2713                         op->remap.next = NULL;
2714                 }
2715                 vm->async_ops.munmap_rebind_inflight = false;
2716
2717                 break;
2718         }
2719         case DRM_GPUVA_OP_UNMAP:
2720                 err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2721                                    op->num_syncs, op->fence,
2722                                    op->flags & XE_VMA_OP_FIRST,
2723                                    op->flags & XE_VMA_OP_LAST);
2724                 break;
2725         case DRM_GPUVA_OP_PREFETCH:
2726                 err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
2727                                      op->syncs, op->num_syncs, op->fence,
2728                                      op->flags & XE_VMA_OP_FIRST,
2729                                      op->flags & XE_VMA_OP_LAST);
2730                 break;
2731         default:
2732                 XE_WARN_ON("NOT POSSIBLE");
2733         }
2734
2735         if (err)
2736                 trace_xe_vma_fail(vma);
2737
2738         return err;
2739 }
2740
2741 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
2742                                struct xe_vma_op *op)
2743 {
2744         struct drm_exec exec;
2745         int err;
2746
2747 retry_userptr:
2748         drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
2749         drm_exec_until_all_locked(&exec) {
2750                 err = op_execute(&exec, vm, vma, op);
2751                 drm_exec_retry_on_contention(&exec);
2752                 if (err)
2753                         break;
2754         }
2755         drm_exec_fini(&exec);
2756
2757         if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
2758                 lockdep_assert_held_write(&vm->lock);
2759                 err = xe_vma_userptr_pin_pages(vma);
2760                 if (!err)
2761                         goto retry_userptr;
2762
2763                 trace_xe_vma_fail(vma);
2764         }
2765
2766         return err;
2767 }
2768
2769 static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
2770 {
2771         int ret = 0;
2772
2773         lockdep_assert_held_write(&vm->lock);
2774
2775 #ifdef TEST_VM_ASYNC_OPS_ERROR
2776         if (op->inject_error) {
2777                 op->inject_error = false;
2778                 return -ENOMEM;
2779         }
2780 #endif
2781
2782         switch (op->base.op) {
2783         case DRM_GPUVA_OP_MAP:
2784                 ret = __xe_vma_op_execute(vm, op->map.vma, op);
2785                 break;
2786         case DRM_GPUVA_OP_REMAP:
2787         {
2788                 struct xe_vma *vma;
2789
2790                 if (!op->remap.unmap_done)
2791                         vma = gpuva_to_vma(op->base.remap.unmap->va);
2792                 else if (op->remap.prev)
2793                         vma = op->remap.prev;
2794                 else
2795                         vma = op->remap.next;
2796
2797                 ret = __xe_vma_op_execute(vm, vma, op);
2798                 break;
2799         }
2800         case DRM_GPUVA_OP_UNMAP:
2801                 ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
2802                                           op);
2803                 break;
2804         case DRM_GPUVA_OP_PREFETCH:
2805                 ret = __xe_vma_op_execute(vm,
2806                                           gpuva_to_vma(op->base.prefetch.va),
2807                                           op);
2808                 break;
2809         default:
2810                 XE_WARN_ON("NOT POSSIBLE");
2811         }
2812
2813         return ret;
2814 }
2815
2816 static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
2817 {
2818         bool last = op->flags & XE_VMA_OP_LAST;
2819
2820         if (last) {
2821                 while (op->num_syncs--)
2822                         xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
2823                 kfree(op->syncs);
2824                 if (op->q)
2825                         xe_exec_queue_put(op->q);
2826                 if (op->fence)
2827                         dma_fence_put(&op->fence->fence);
2828         }
2829         if (!list_empty(&op->link)) {
2830                 spin_lock_irq(&vm->async_ops.lock);
2831                 list_del(&op->link);
2832                 spin_unlock_irq(&vm->async_ops.lock);
2833         }
2834         if (op->ops)
2835                 drm_gpuva_ops_free(&vm->gpuvm, op->ops);
2836         if (last)
2837                 xe_vm_put(vm);
2838 }
2839
2840 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2841                              bool post_commit, bool prev_post_commit,
2842                              bool next_post_commit)
2843 {
2844         lockdep_assert_held_write(&vm->lock);
2845
2846         switch (op->base.op) {
2847         case DRM_GPUVA_OP_MAP:
2848                 if (op->map.vma) {
2849                         prep_vma_destroy(vm, op->map.vma, post_commit);
2850                         xe_vma_destroy_unlocked(op->map.vma);
2851                 }
2852                 break;
2853         case DRM_GPUVA_OP_UNMAP:
2854         {
2855                 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2856
2857                 if (vma) {
2858                         down_read(&vm->userptr.notifier_lock);
2859                         vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2860                         up_read(&vm->userptr.notifier_lock);
2861                         if (post_commit)
2862                                 xe_vm_insert_vma(vm, vma);
2863                 }
2864                 break;
2865         }
2866         case DRM_GPUVA_OP_REMAP:
2867         {
2868                 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2869
2870                 if (op->remap.prev) {
2871                         prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2872                         xe_vma_destroy_unlocked(op->remap.prev);
2873                 }
2874                 if (op->remap.next) {
2875                         prep_vma_destroy(vm, op->remap.next, next_post_commit);
2876                         xe_vma_destroy_unlocked(op->remap.next);
2877                 }
2878                 if (vma) {
2879                         down_read(&vm->userptr.notifier_lock);
2880                         vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2881                         up_read(&vm->userptr.notifier_lock);
2882                         if (post_commit)
2883                                 xe_vm_insert_vma(vm, vma);
2884                 }
2885                 break;
2886         }
2887         case DRM_GPUVA_OP_PREFETCH:
2888                 /* Nothing to do */
2889                 break;
2890         default:
2891                 XE_WARN_ON("NOT POSSIBLE");
2892         }
2893 }
2894
2895 static struct xe_vma_op *next_vma_op(struct xe_vm *vm)
2896 {
2897         return list_first_entry_or_null(&vm->async_ops.pending,
2898                                         struct xe_vma_op, link);
2899 }
2900
2901 static void xe_vma_op_work_func(struct work_struct *w)
2902 {
2903         struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work);
2904
2905         for (;;) {
2906                 struct xe_vma_op *op;
2907                 int err;
2908
2909                 if (vm->async_ops.error && !xe_vm_is_closed(vm))
2910                         break;
2911
2912                 spin_lock_irq(&vm->async_ops.lock);
2913                 op = next_vma_op(vm);
2914                 spin_unlock_irq(&vm->async_ops.lock);
2915
2916                 if (!op)
2917                         break;
2918
2919                 if (!xe_vm_is_closed(vm)) {
2920                         down_write(&vm->lock);
2921                         err = xe_vma_op_execute(vm, op);
2922                         if (err) {
2923                                 drm_warn(&vm->xe->drm,
2924                                          "Async VM op(%d) failed with %d",
2925                                          op->base.op, err);
2926                                 vm_set_async_error(vm, err);
2927                                 up_write(&vm->lock);
2928
2929                                 if (vm->async_ops.error_capture.addr)
2930                                         vm_error_capture(vm, err, 0, 0, 0);
2931                                 break;
2932                         }
2933                         up_write(&vm->lock);
2934                 } else {
2935                         struct xe_vma *vma;
2936
2937                         switch (op->base.op) {
2938                         case DRM_GPUVA_OP_REMAP:
2939                                 vma = gpuva_to_vma(op->base.remap.unmap->va);
2940                                 trace_xe_vma_flush(vma);
2941
2942                                 down_write(&vm->lock);
2943                                 xe_vma_destroy_unlocked(vma);
2944                                 up_write(&vm->lock);
2945                                 break;
2946                         case DRM_GPUVA_OP_UNMAP:
2947                                 vma = gpuva_to_vma(op->base.unmap.va);
2948                                 trace_xe_vma_flush(vma);
2949
2950                                 down_write(&vm->lock);
2951                                 xe_vma_destroy_unlocked(vma);
2952                                 up_write(&vm->lock);
2953                                 break;
2954                         default:
2955                                 /* Nothing to do */
2956                                 break;
2957                         }
2958
2959                         if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
2960                                                    &op->fence->fence.flags)) {
2961                                 if (!xe_vm_no_dma_fences(vm)) {
2962                                         op->fence->started = true;
2963                                         wake_up_all(&op->fence->wq);
2964                                 }
2965                                 dma_fence_signal(&op->fence->fence);
2966                         }
2967                 }
2968
2969                 xe_vma_op_cleanup(vm, op);
2970         }
2971 }
2972
2973 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
2974                                      struct list_head *ops_list, bool async)
2975 {
2976         struct xe_vma_op *op, *last_op, *next;
2977         int err;
2978
2979         lockdep_assert_held_write(&vm->lock);
2980
2981         list_for_each_entry(op, ops_list, link)
2982                 last_op = op;
2983
2984         if (!async) {
2985                 err = xe_vma_op_execute(vm, last_op);
2986                 if (err)
2987                         goto unwind;
2988                 xe_vma_op_cleanup(vm, last_op);
2989         } else {
2990                 int i;
2991                 bool installed = false;
2992
2993                 for (i = 0; i < last_op->num_syncs; i++)
2994                         installed |= xe_sync_entry_signal(&last_op->syncs[i],
2995                                                           NULL,
2996                                                           &last_op->fence->fence);
2997                 if (!installed && last_op->fence)
2998                         dma_fence_signal(&last_op->fence->fence);
2999
3000                 spin_lock_irq(&vm->async_ops.lock);
3001                 list_splice_tail(ops_list, &vm->async_ops.pending);
3002                 spin_unlock_irq(&vm->async_ops.lock);
3003
3004                 if (!vm->async_ops.error)
3005                         queue_work(system_unbound_wq, &vm->async_ops.work);
3006         }
3007
3008         return 0;
3009
3010 unwind:
3011         list_for_each_entry_reverse(op, ops_list, link)
3012                 xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED,
3013                                  op->flags & XE_VMA_OP_PREV_COMMITTED,
3014                                  op->flags & XE_VMA_OP_NEXT_COMMITTED);
3015         list_for_each_entry_safe(op, next, ops_list, link)
3016                 xe_vma_op_cleanup(vm, op);
3017
3018         return err;
3019 }
3020
3021 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
3022                                      struct drm_gpuva_ops **ops,
3023                                      int num_ops_list)
3024 {
3025         int i;
3026
3027         for (i = num_ops_list - 1; i; ++i) {
3028                 struct drm_gpuva_ops *__ops = ops[i];
3029                 struct drm_gpuva_op *__op;
3030
3031                 if (!__ops)
3032                         continue;
3033
3034                 drm_gpuva_for_each_op_reverse(__op, __ops) {
3035                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
3036
3037                         xe_vma_op_unwind(vm, op,
3038                                          op->flags & XE_VMA_OP_COMMITTED,
3039                                          op->flags & XE_VMA_OP_PREV_COMMITTED,
3040                                          op->flags & XE_VMA_OP_NEXT_COMMITTED);
3041                 }
3042
3043                 drm_gpuva_ops_free(&vm->gpuvm, __ops);
3044         }
3045 }
3046
3047 #ifdef TEST_VM_ASYNC_OPS_ERROR
3048 #define SUPPORTED_FLAGS \
3049         (FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
3050          XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | \
3051          XE_VM_BIND_FLAG_NULL | 0xffff)
3052 #else
3053 #define SUPPORTED_FLAGS \
3054         (XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
3055          XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | 0xffff)
3056 #endif
3057 #define XE_64K_PAGE_MASK 0xffffull
3058
3059 #define MAX_BINDS       512     /* FIXME: Picking random upper limit */
3060
3061 static int vm_bind_ioctl_check_args(struct xe_device *xe,
3062                                     struct drm_xe_vm_bind *args,
3063                                     struct drm_xe_vm_bind_op **bind_ops,
3064                                     bool *async)
3065 {
3066         int err;
3067         int i;
3068
3069         if (XE_IOCTL_DBG(xe, args->extensions) ||
3070             XE_IOCTL_DBG(xe, !args->num_binds) ||
3071             XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS))
3072                 return -EINVAL;
3073
3074         if (args->num_binds > 1) {
3075                 u64 __user *bind_user =
3076                         u64_to_user_ptr(args->vector_of_binds);
3077
3078                 *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
3079                                     args->num_binds, GFP_KERNEL);
3080                 if (!*bind_ops)
3081                         return -ENOMEM;
3082
3083                 err = __copy_from_user(*bind_ops, bind_user,
3084                                        sizeof(struct drm_xe_vm_bind_op) *
3085                                        args->num_binds);
3086                 if (XE_IOCTL_DBG(xe, err)) {
3087                         err = -EFAULT;
3088                         goto free_bind_ops;
3089                 }
3090         } else {
3091                 *bind_ops = &args->bind;
3092         }
3093
3094         for (i = 0; i < args->num_binds; ++i) {
3095                 u64 range = (*bind_ops)[i].range;
3096                 u64 addr = (*bind_ops)[i].addr;
3097                 u32 op = (*bind_ops)[i].op;
3098                 u32 obj = (*bind_ops)[i].obj;
3099                 u64 obj_offset = (*bind_ops)[i].obj_offset;
3100                 u32 region = (*bind_ops)[i].region;
3101                 bool is_null = op & XE_VM_BIND_FLAG_NULL;
3102
3103                 if (i == 0) {
3104                         *async = !!(op & XE_VM_BIND_FLAG_ASYNC);
3105                 } else if (XE_IOCTL_DBG(xe, !*async) ||
3106                            XE_IOCTL_DBG(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) ||
3107                            XE_IOCTL_DBG(xe, VM_BIND_OP(op) ==
3108                                         XE_VM_BIND_OP_RESTART)) {
3109                         err = -EINVAL;
3110                         goto free_bind_ops;
3111                 }
3112
3113                 if (XE_IOCTL_DBG(xe, !*async &&
3114                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) {
3115                         err = -EINVAL;
3116                         goto free_bind_ops;
3117                 }
3118
3119                 if (XE_IOCTL_DBG(xe, !*async &&
3120                                  VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) {
3121                         err = -EINVAL;
3122                         goto free_bind_ops;
3123                 }
3124
3125                 if (XE_IOCTL_DBG(xe, VM_BIND_OP(op) >
3126                                  XE_VM_BIND_OP_PREFETCH) ||
3127                     XE_IOCTL_DBG(xe, op & ~SUPPORTED_FLAGS) ||
3128                     XE_IOCTL_DBG(xe, obj && is_null) ||
3129                     XE_IOCTL_DBG(xe, obj_offset && is_null) ||
3130                     XE_IOCTL_DBG(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP &&
3131                                  is_null) ||
3132                     XE_IOCTL_DBG(xe, !obj &&
3133                                  VM_BIND_OP(op) == XE_VM_BIND_OP_MAP &&
3134                                  !is_null) ||
3135                     XE_IOCTL_DBG(xe, !obj &&
3136                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
3137                     XE_IOCTL_DBG(xe, addr &&
3138                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
3139                     XE_IOCTL_DBG(xe, range &&
3140                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
3141                     XE_IOCTL_DBG(xe, obj &&
3142                                  VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) ||
3143                     XE_IOCTL_DBG(xe, obj &&
3144                                  VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) ||
3145                     XE_IOCTL_DBG(xe, region &&
3146                                  VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) ||
3147                     XE_IOCTL_DBG(xe, !(BIT(region) &
3148                                        xe->info.mem_region_mask)) ||
3149                     XE_IOCTL_DBG(xe, obj &&
3150                                  VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) {
3151                         err = -EINVAL;
3152                         goto free_bind_ops;
3153                 }
3154
3155                 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3156                     XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3157                     XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3158                     XE_IOCTL_DBG(xe, !range && VM_BIND_OP(op) !=
3159                                  XE_VM_BIND_OP_RESTART &&
3160                                  VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) {
3161                         err = -EINVAL;
3162                         goto free_bind_ops;
3163                 }
3164         }
3165
3166         return 0;
3167
3168 free_bind_ops:
3169         if (args->num_binds > 1)
3170                 kfree(*bind_ops);
3171         return err;
3172 }
3173
3174 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3175 {
3176         struct xe_device *xe = to_xe_device(dev);
3177         struct xe_file *xef = to_xe_file(file);
3178         struct drm_xe_vm_bind *args = data;
3179         struct drm_xe_sync __user *syncs_user;
3180         struct xe_bo **bos = NULL;
3181         struct drm_gpuva_ops **ops = NULL;
3182         struct xe_vm *vm;
3183         struct xe_exec_queue *q = NULL;
3184         u32 num_syncs;
3185         struct xe_sync_entry *syncs = NULL;
3186         struct drm_xe_vm_bind_op *bind_ops;
3187         LIST_HEAD(ops_list);
3188         bool async;
3189         int err;
3190         int i;
3191
3192         err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
3193         if (err)
3194                 return err;
3195
3196         if (args->exec_queue_id) {
3197                 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3198                 if (XE_IOCTL_DBG(xe, !q)) {
3199                         err = -ENOENT;
3200                         goto free_objs;
3201                 }
3202
3203                 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3204                         err = -EINVAL;
3205                         goto put_exec_queue;
3206                 }
3207         }
3208
3209         vm = xe_vm_lookup(xef, args->vm_id);
3210         if (XE_IOCTL_DBG(xe, !vm)) {
3211                 err = -EINVAL;
3212                 goto put_exec_queue;
3213         }
3214
3215         err = down_write_killable(&vm->lock);
3216         if (err)
3217                 goto put_vm;
3218
3219         if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3220                 err = -ENOENT;
3221                 goto release_vm_lock;
3222         }
3223
3224         if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) {
3225                 if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
3226                         err = -EOPNOTSUPP;
3227                 if (XE_IOCTL_DBG(xe, !err && args->num_syncs))
3228                         err = EINVAL;
3229                 if (XE_IOCTL_DBG(xe, !err && !vm->async_ops.error))
3230                         err = -EPROTO;
3231
3232                 if (!err) {
3233                         trace_xe_vm_restart(vm);
3234                         vm_set_async_error(vm, 0);
3235
3236                         queue_work(system_unbound_wq, &vm->async_ops.work);
3237
3238                         /* Rebinds may have been blocked, give worker a kick */
3239                         if (xe_vm_in_compute_mode(vm))
3240                                 xe_vm_queue_rebind_worker(vm);
3241                 }
3242
3243                 goto release_vm_lock;
3244         }
3245
3246         if (XE_IOCTL_DBG(xe, !vm->async_ops.error &&
3247                          async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
3248                 err = -EOPNOTSUPP;
3249                 goto release_vm_lock;
3250         }
3251
3252         for (i = 0; i < args->num_binds; ++i) {
3253                 u64 range = bind_ops[i].range;
3254                 u64 addr = bind_ops[i].addr;
3255
3256                 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3257                     XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3258                         err = -EINVAL;
3259                         goto release_vm_lock;
3260                 }
3261
3262                 if (bind_ops[i].tile_mask) {
3263                         u64 valid_tiles = BIT(xe->info.tile_count) - 1;
3264
3265                         if (XE_IOCTL_DBG(xe, bind_ops[i].tile_mask &
3266                                          ~valid_tiles)) {
3267                                 err = -EINVAL;
3268                                 goto release_vm_lock;
3269                         }
3270                 }
3271         }
3272
3273         bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL);
3274         if (!bos) {
3275                 err = -ENOMEM;
3276                 goto release_vm_lock;
3277         }
3278
3279         ops = kzalloc(sizeof(*ops) * args->num_binds, GFP_KERNEL);
3280         if (!ops) {
3281                 err = -ENOMEM;
3282                 goto release_vm_lock;
3283         }
3284
3285         for (i = 0; i < args->num_binds; ++i) {
3286                 struct drm_gem_object *gem_obj;
3287                 u64 range = bind_ops[i].range;
3288                 u64 addr = bind_ops[i].addr;
3289                 u32 obj = bind_ops[i].obj;
3290                 u64 obj_offset = bind_ops[i].obj_offset;
3291
3292                 if (!obj)
3293                         continue;
3294
3295                 gem_obj = drm_gem_object_lookup(file, obj);
3296                 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3297                         err = -ENOENT;
3298                         goto put_obj;
3299                 }
3300                 bos[i] = gem_to_xe_bo(gem_obj);
3301
3302                 if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
3303                     XE_IOCTL_DBG(xe, obj_offset >
3304                                  bos[i]->size - range)) {
3305                         err = -EINVAL;
3306                         goto put_obj;
3307                 }
3308
3309                 if (bos[i]->flags & XE_BO_INTERNAL_64K) {
3310                         if (XE_IOCTL_DBG(xe, obj_offset &
3311                                          XE_64K_PAGE_MASK) ||
3312                             XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3313                             XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3314                                 err = -EINVAL;
3315                                 goto put_obj;
3316                         }
3317                 }
3318         }
3319
3320         if (args->num_syncs) {
3321                 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3322                 if (!syncs) {
3323                         err = -ENOMEM;
3324                         goto put_obj;
3325                 }
3326         }
3327
3328         syncs_user = u64_to_user_ptr(args->syncs);
3329         for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3330                 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3331                                           &syncs_user[num_syncs], false,
3332                                           xe_vm_no_dma_fences(vm));
3333                 if (err)
3334                         goto free_syncs;
3335         }
3336
3337         /* Do some error checking first to make the unwind easier */
3338         for (i = 0; i < args->num_binds; ++i) {
3339                 u64 range = bind_ops[i].range;
3340                 u64 addr = bind_ops[i].addr;
3341                 u32 op = bind_ops[i].op;
3342
3343                 err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op);
3344                 if (err)
3345                         goto free_syncs;
3346         }
3347
3348         for (i = 0; i < args->num_binds; ++i) {
3349                 u64 range = bind_ops[i].range;
3350                 u64 addr = bind_ops[i].addr;
3351                 u32 op = bind_ops[i].op;
3352                 u64 obj_offset = bind_ops[i].obj_offset;
3353                 u8 tile_mask = bind_ops[i].tile_mask;
3354                 u32 region = bind_ops[i].region;
3355
3356                 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3357                                                   addr, range, op, tile_mask,
3358                                                   region);
3359                 if (IS_ERR(ops[i])) {
3360                         err = PTR_ERR(ops[i]);
3361                         ops[i] = NULL;
3362                         goto unwind_ops;
3363                 }
3364
3365                 err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
3366                                               &ops_list,
3367                                               i == args->num_binds - 1,
3368                                               async);
3369                 if (err)
3370                         goto unwind_ops;
3371         }
3372
3373         /* Nothing to do */
3374         if (list_empty(&ops_list)) {
3375                 err = -ENODATA;
3376                 goto unwind_ops;
3377         }
3378
3379         err = vm_bind_ioctl_ops_execute(vm, &ops_list, async);
3380         up_write(&vm->lock);
3381
3382         for (i = 0; i < args->num_binds; ++i)
3383                 xe_bo_put(bos[i]);
3384
3385         kfree(bos);
3386         kfree(ops);
3387         if (args->num_binds > 1)
3388                 kfree(bind_ops);
3389
3390         return err;
3391
3392 unwind_ops:
3393         vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3394 free_syncs:
3395         for (i = 0; err == -ENODATA && i < num_syncs; i++)
3396                 xe_sync_entry_signal(&syncs[i], NULL, dma_fence_get_stub());
3397         while (num_syncs--)
3398                 xe_sync_entry_cleanup(&syncs[num_syncs]);
3399
3400         kfree(syncs);
3401 put_obj:
3402         for (i = 0; i < args->num_binds; ++i)
3403                 xe_bo_put(bos[i]);
3404 release_vm_lock:
3405         up_write(&vm->lock);
3406 put_vm:
3407         xe_vm_put(vm);
3408 put_exec_queue:
3409         if (q)
3410                 xe_exec_queue_put(q);
3411 free_objs:
3412         kfree(bos);
3413         kfree(ops);
3414         if (args->num_binds > 1)
3415                 kfree(bind_ops);
3416         return err == -ENODATA ? 0 : err;
3417 }
3418
3419 /**
3420  * xe_vm_lock() - Lock the vm's dma_resv object
3421  * @vm: The struct xe_vm whose lock is to be locked
3422  * @intr: Whether to perform any wait interruptible
3423  *
3424  * Return: 0 on success, -EINTR if @intr is true and the wait for a
3425  * contended lock was interrupted. If @intr is false, the function
3426  * always returns 0.
3427  */
3428 int xe_vm_lock(struct xe_vm *vm, bool intr)
3429 {
3430         if (intr)
3431                 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3432
3433         return dma_resv_lock(xe_vm_resv(vm), NULL);
3434 }
3435
3436 /**
3437  * xe_vm_unlock() - Unlock the vm's dma_resv object
3438  * @vm: The struct xe_vm whose lock is to be released.
3439  *
3440  * Unlock a buffer object lock that was locked by xe_vm_lock().
3441  */
3442 void xe_vm_unlock(struct xe_vm *vm)
3443 {
3444         dma_resv_unlock(xe_vm_resv(vm));
3445 }
3446
3447 /**
3448  * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3449  * @vma: VMA to invalidate
3450  *
3451  * Walks a list of page tables leaves which it memset the entries owned by this
3452  * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3453  * complete.
3454  *
3455  * Returns 0 for success, negative error code otherwise.
3456  */
3457 int xe_vm_invalidate_vma(struct xe_vma *vma)
3458 {
3459         struct xe_device *xe = xe_vma_vm(vma)->xe;
3460         struct xe_tile *tile;
3461         u32 tile_needs_invalidate = 0;
3462         int seqno[XE_MAX_TILES_PER_DEVICE];
3463         u8 id;
3464         int ret;
3465
3466         XE_WARN_ON(!xe_vm_in_fault_mode(xe_vma_vm(vma)));
3467         XE_WARN_ON(xe_vma_is_null(vma));
3468         trace_xe_vma_usm_invalidate(vma);
3469
3470         /* Check that we don't race with page-table updates */
3471         if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3472                 if (xe_vma_is_userptr(vma)) {
3473                         WARN_ON_ONCE(!mmu_interval_check_retry
3474                                      (&vma->userptr.notifier,
3475                                       vma->userptr.notifier_seq));
3476                         WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3477                                                              DMA_RESV_USAGE_BOOKKEEP));
3478
3479                 } else {
3480                         xe_bo_assert_held(xe_vma_bo(vma));
3481                 }
3482         }
3483
3484         for_each_tile(tile, xe, id) {
3485                 if (xe_pt_zap_ptes(tile, vma)) {
3486                         tile_needs_invalidate |= BIT(id);
3487                         xe_device_wmb(xe);
3488                         /*
3489                          * FIXME: We potentially need to invalidate multiple
3490                          * GTs within the tile
3491                          */
3492                         seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
3493                         if (seqno[id] < 0)
3494                                 return seqno[id];
3495                 }
3496         }
3497
3498         for_each_tile(tile, xe, id) {
3499                 if (tile_needs_invalidate & BIT(id)) {
3500                         ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
3501                         if (ret < 0)
3502                                 return ret;
3503                 }
3504         }
3505
3506         vma->usm.tile_invalidated = vma->tile_mask;
3507
3508         return 0;
3509 }
3510
3511 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
3512 {
3513         struct drm_gpuva *gpuva;
3514         bool is_vram;
3515         uint64_t addr;
3516
3517         if (!down_read_trylock(&vm->lock)) {
3518                 drm_printf(p, " Failed to acquire VM lock to dump capture");
3519                 return 0;
3520         }
3521         if (vm->pt_root[gt_id]) {
3522                 addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
3523                 is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
3524                 drm_printf(p, " VM root: A:0x%llx %s\n", addr,
3525                            is_vram ? "VRAM" : "SYS");
3526         }
3527
3528         drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3529                 struct xe_vma *vma = gpuva_to_vma(gpuva);
3530                 bool is_userptr = xe_vma_is_userptr(vma);
3531                 bool is_null = xe_vma_is_null(vma);
3532
3533                 if (is_null) {
3534                         addr = 0;
3535                 } else if (is_userptr) {
3536                         struct xe_res_cursor cur;
3537
3538                         if (vma->userptr.sg) {
3539                                 xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
3540                                                 &cur);
3541                                 addr = xe_res_dma(&cur);
3542                         } else {
3543                                 addr = 0;
3544                         }
3545                 } else {
3546                         addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
3547                         is_vram = xe_bo_is_vram(xe_vma_bo(vma));
3548                 }
3549                 drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
3550                            xe_vma_start(vma), xe_vma_end(vma) - 1,
3551                            xe_vma_size(vma),
3552                            addr, is_null ? "NULL" : is_userptr ? "USR" :
3553                            is_vram ? "VRAM" : "SYS");
3554         }
3555         up_read(&vm->lock);
3556
3557         return 0;
3558 }