drivers/gpu/drm/xe/xe_exec.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2022 Intel Corporation
   4  */
   5
   6 #include "xe_exec.h"
   7
   8 #include <drm/drm_device.h>
   9 #include <drm/drm_file.h>
  10 #include <drm/xe_drm.h>
  11
  12 #include "xe_bo.h"
  13 #include "xe_device.h"
  14 #include "xe_engine.h"
  15 #include "xe_macros.h"
  16 #include "xe_sched_job.h"
  17 #include "xe_sync.h"
  18 #include "xe_vm.h"
  19
  20 /**
  21  * DOC: Execbuf (User GPU command submission)
  22  *
  23  * Execs have historically been rather complicated in DRM drivers (at least in
  24  * the i915) because a few things:
  25  *
  26  * - Passing in a list BO which are read / written to creating implicit syncs
  27  * - Binding at exec time
  28  * - Flow controlling the ring at exec time
  29  *
  30  * In XE we avoid all of this complication by not allowing a BO list to be
  31  * passed into an exec, using the dma-buf implicit sync uAPI, have binds as
  32  * seperate operations, and using the DRM scheduler to flow control the ring.
  33  * Let's deep dive on each of these.
  34  *
  35  * We can get away from a BO list by forcing the user to use in / out fences on
  36  * every exec rather than the kernel tracking dependencies of BO (e.g. if the
  37  * user knows an exec writes to a BO and reads from the BO in the next exec, it
  38  * is the user's responsibility to pass in / out fence between the two execs).
  39  *
  40  * Implicit dependencies for external BOs are handled by using the dma-buf
  41  * implicit dependency uAPI (TODO: add link). To make this works each exec must
  42  * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external
  43  * BO mapped in the VM.
  44  *
  45  * We do not allow a user to trigger a bind at exec time rather we have a VM
  46  * bind IOCTL which uses the same in / out fence interface as exec. In that
  47  * sense, a VM bind is basically the same operation as an exec from the user
  48  * perspective. e.g. If an exec depends on a VM bind use the in / out fence
  49  * interface (struct drm_xe_sync) to synchronize like syncing between two
  50  * dependent execs.
  51  *
  52  * Although a user cannot trigger a bind, we still have to rebind userptrs in
  53  * the VM that have been invalidated since the last exec, likewise we also have
  54  * to rebind BOs that have been evicted by the kernel. We schedule these rebinds
  55  * behind any pending kernel operations on any external BOs in VM or any BOs
  56  * private to the VM. This is accomplished by the rebinds waiting on BOs
  57  * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs
  58  * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and
  59  * in DMA_RESV_USAGE_WRITE for external BOs).
  60  *
  61  * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute
  62  * mode VMs we use preempt fences and a rebind worker (TODO: add link).
  63  *
  64  * There is no need to flow control the ring in the exec as we write the ring at
  65  * submission time and set the DRM scheduler max job limit SIZE_OF_RING /
  66  * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the
  67  * ring is available.
  68  *
  69  * All of this results in a rather simple exec implementation.
  70  *
  71  * Flow
  72  * ~~~~
  73  *
  74  * .. code-block::
  75  *
  76  *      Parse input arguments
  77  *      Wait for any async VM bind passed as in-fences to start
  78  *      <----------------------------------------------------------------------|
  79  *      Lock global VM lock in read mode                                       |
  80  *      Pin userptrs (also finds userptr invalidated since last exec)          |
  81  *      Lock exec (VM dma-resv lock, external BOs dma-resv locks)              |
  82  *      Validate BOs that have been evicted                                    |
  83  *      Create job                                                             |
  84  *      Rebind invalidated userptrs + evicted BOs (non-compute-mode)           |
  85  *      Add rebind fence dependency to job                                     |
  86  *      Add job VM dma-resv bookkeeping slot (non-compute mode)                |
  87  *      Add job to external BOs dma-resv write slots (non-compute mode)        |
  88  *      Check if any userptrs invalidated since pin ------ Drop locks ---------|
  89  *      Install in / out fences for job
  90  *      Submit job
  91  *      Unlock all
  92  */
  93
  94 static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
  95                          struct ttm_validate_buffer tv_onstack[],
  96                          struct ttm_validate_buffer **tv,
  97                          struct list_head *objs)
  98 {
  99         struct xe_vm *vm = e->vm;
 100         struct xe_vma *vma;
 101         LIST_HEAD(dups);
 102         int err;
 103
 104         *tv = NULL;
 105         if (xe_vm_no_dma_fences(e->vm))
 106                 return 0;
 107
 108         err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1);
 109         if (err)
 110                 return err;
 111
 112         /*
 113          * Validate BOs that have been evicted (i.e. make sure the
 114          * BOs have valid placements possibly moving an evicted BO back
 115          * to a location where the GPU can access it).
 116          */
 117         list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
 118                 if (xe_vma_is_userptr(vma))
 119                         continue;
 120
 121                 err = xe_bo_validate(vma->bo, vm, false);
 122                 if (err) {
 123                         xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
 124                         *tv = NULL;
 125                         return err;
 126                 }
 127         }
 128
 129         return 0;
 130 }
 131
 132 static void xe_exec_end(struct xe_engine *e,
 133                         struct ttm_validate_buffer *tv_onstack,
 134                         struct ttm_validate_buffer *tv,
 135                         struct ww_acquire_ctx *ww,
 136                         struct list_head *objs)
 137 {
 138         if (!xe_vm_no_dma_fences(e->vm))
 139                 xe_vm_unlock_dma_resv(e->vm, tv_onstack, tv, ww, objs);
 140 }
 141
 142 int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 143 {
 144         struct xe_device *xe = to_xe_device(dev);
 145         struct xe_file *xef = to_xe_file(file);
 146         struct drm_xe_exec *args = data;
 147         struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs);
 148         u64 __user *addresses_user = u64_to_user_ptr(args->address);
 149         struct xe_engine *engine;
 150         struct xe_sync_entry *syncs = NULL;
 151         u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
 152         struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
 153         struct ttm_validate_buffer *tv = NULL;
 154         u32 i, num_syncs = 0;
 155         struct xe_sched_job *job;
 156         struct dma_fence *rebind_fence;
 157         struct xe_vm *vm;
 158         struct ww_acquire_ctx ww;
 159         struct list_head objs;
 160         bool write_locked;
 161         int err = 0;
 162
 163         if (XE_IOCTL_ERR(xe, args->extensions))
 164                 return -EINVAL;
 165
 166         engine = xe_engine_lookup(xef, args->engine_id);
 167         if (XE_IOCTL_ERR(xe, !engine))
 168                 return -ENOENT;
 169
 170         if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_VM))
 171                 return -EINVAL;
 172
 173         if (XE_IOCTL_ERR(xe, engine->width != args->num_batch_buffer))
 174                 return -EINVAL;
 175
 176         if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_BANNED)) {
 177                 err = -ECANCELED;
 178                 goto err_engine;
 179         }
 180
 181         if (args->num_syncs) {
 182                 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
 183                 if (!syncs) {
 184                         err = -ENOMEM;
 185                         goto err_engine;
 186                 }
 187         }
 188
 189         vm = engine->vm;
 190
 191         for (i = 0; i < args->num_syncs; i++) {
 192                 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
 193                                           &syncs_user[i], true,
 194                                           xe_vm_no_dma_fences(vm));
 195                 if (err)
 196                         goto err_syncs;
 197         }
 198
 199         if (xe_engine_is_parallel(engine)) {
 200                 err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
 201                                        engine->width);
 202                 if (err) {
 203                         err = -EFAULT;
 204                         goto err_syncs;
 205                 }
 206         }
 207
 208         /*
 209          * We can't install a job into the VM dma-resv shared slot before an
 210          * async VM bind passed in as a fence without the risk of deadlocking as
 211          * the bind can trigger an eviction which in turn depends on anything in
 212          * the VM dma-resv shared slots. Not an ideal solution, but we wait for
 213          * all dependent async VM binds to start (install correct fences into
 214          * dma-resv slots) before moving forward.
 215          */
 216         if (!xe_vm_no_dma_fences(vm) &&
 217             vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
 218                 for (i = 0; i < args->num_syncs; i++) {
 219                         struct dma_fence *fence = syncs[i].fence;
 220                         if (fence) {
 221                                 err = xe_vm_async_fence_wait_start(fence);
 222                                 if (err)
 223                                         goto err_syncs;
 224                         }
 225                 }
 226         }
 227
 228 retry:
 229         if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) {
 230                 err = down_write_killable(&vm->lock);
 231                 write_locked = true;
 232         } else {
 233                 /* We don't allow execs while the VM is in error state */
 234                 err = down_read_interruptible(&vm->lock);
 235                 write_locked = false;
 236         }
 237         if (err)
 238                 goto err_syncs;
 239
 240         /* We don't allow execs while the VM is in error state */
 241         if (vm->async_ops.error) {
 242                 err = vm->async_ops.error;
 243                 goto err_unlock_list;
 244         }
 245
 246         /*
 247          * Extreme corner where we exit a VM error state with a munmap style VM
 248          * unbind inflight which requires a rebind. In this case the rebind
 249          * needs to install some fences into the dma-resv slots. The worker to
 250          * do this queued, let that worker make progress by dropping vm->lock,
 251          * flushing the worker and retrying the exec.
 252          */
 253         if (vm->async_ops.munmap_rebind_inflight) {
 254                 if (write_locked)
 255                         up_write(&vm->lock);
 256                 else
 257                         up_read(&vm->lock);
 258                 flush_work(&vm->async_ops.work);
 259                 goto retry;
 260         }
 261
 262         if (write_locked) {
 263                 err = xe_vm_userptr_pin(vm);
 264                 downgrade_write(&vm->lock);
 265                 write_locked = false;
 266                 if (err)
 267                         goto err_unlock_list;
 268         }
 269
 270         err = xe_exec_begin(engine, &ww, tv_onstack, &tv, &objs);
 271         if (err)
 272                 goto err_unlock_list;
 273
 274         if (xe_vm_is_closed(engine->vm)) {
 275                 drm_warn(&xe->drm, "Trying to schedule after vm is closed\n");
 276                 err = -EIO;
 277                 goto err_engine_end;
 278         }
 279
 280         job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ?
 281                                   addresses : &args->address);
 282         if (IS_ERR(job)) {
 283                 err = PTR_ERR(job);
 284                 goto err_engine_end;
 285         }
 286
 287         /*
 288          * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
 289          * VM mode only.
 290          */
 291         rebind_fence = xe_vm_rebind(vm, false);
 292         if (IS_ERR(rebind_fence)) {
 293                 err = PTR_ERR(rebind_fence);
 294                 goto err_put_job;
 295         }
 296
 297         /*
 298          * We store the rebind_fence in the VM so subsequent execs don't get
 299          * scheduled before the rebinds of userptrs / evicted BOs is complete.
 300          */
 301         if (rebind_fence) {
 302                 dma_fence_put(vm->rebind_fence);
 303                 vm->rebind_fence = rebind_fence;
 304         }
 305         if (vm->rebind_fence) {
 306                 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 307                              &vm->rebind_fence->flags)) {
 308                         dma_fence_put(vm->rebind_fence);
 309                         vm->rebind_fence = NULL;
 310                 } else {
 311                         dma_fence_get(vm->rebind_fence);
 312                         err = drm_sched_job_add_dependency(&job->drm,
 313                                                            vm->rebind_fence);
 314                         if (err)
 315                                 goto err_put_job;
 316                 }
 317         }
 318
 319         /* Wait behind munmap style rebinds */
 320         if (!xe_vm_no_dma_fences(vm)) {
 321                 err = drm_sched_job_add_resv_dependencies(&job->drm,
 322                                                           &vm->resv,
 323                                                           DMA_RESV_USAGE_KERNEL);
 324                 if (err)
 325                         goto err_put_job;
 326         }
 327
 328         for (i = 0; i < num_syncs && !err; i++)
 329                 err = xe_sync_entry_add_deps(&syncs[i], job);
 330         if (err)
 331                 goto err_put_job;
 332
 333         if (!xe_vm_no_dma_fences(vm)) {
 334                 err = down_read_interruptible(&vm->userptr.notifier_lock);
 335                 if (err)
 336                         goto err_put_job;
 337
 338                 err = __xe_vm_userptr_needs_repin(vm);
 339                 if (err)
 340                         goto err_repin;
 341         }
 342
 343         /*
 344          * Point of no return, if we error after this point just set an error on
 345          * the job and let the DRM scheduler / backend clean up the job.
 346          */
 347         xe_sched_job_arm(job);
 348         if (!xe_vm_no_dma_fences(vm)) {
 349                 /* Block userptr invalidations / BO eviction */
 350                 dma_resv_add_fence(&vm->resv,
 351                                    &job->drm.s_fence->finished,
 352                                    DMA_RESV_USAGE_BOOKKEEP);
 353
 354                 /*
 355                  * Make implicit sync work across drivers, assuming all external
 356                  * BOs are written as we don't pass in a read / write list.
 357                  */
 358                 xe_vm_fence_all_extobjs(vm, &job->drm.s_fence->finished,
 359                                         DMA_RESV_USAGE_WRITE);
 360         }
 361
 362         for (i = 0; i < num_syncs; i++)
 363                 xe_sync_entry_signal(&syncs[i], job,
 364                                      &job->drm.s_fence->finished);
 365
 366         xe_sched_job_push(job);
 367         xe_vm_reactivate_rebind(vm);
 368
 369 err_repin:
 370         if (!xe_vm_no_dma_fences(vm))
 371                 up_read(&vm->userptr.notifier_lock);
 372 err_put_job:
 373         if (err)
 374                 xe_sched_job_put(job);
 375 err_engine_end:
 376         xe_exec_end(engine, tv_onstack, tv, &ww, &objs);
 377 err_unlock_list:
 378         if (write_locked)
 379                 up_write(&vm->lock);
 380         else
 381                 up_read(&vm->lock);
 382         if (err == -EAGAIN)
 383                 goto retry;
 384 err_syncs:
 385         for (i = 0; i < num_syncs; i++)
 386                 xe_sync_entry_cleanup(&syncs[i]);
 387         kfree(syncs);
 388 err_engine:
 389         xe_engine_put(engine);
 390
 391         return err;
 392 }