drivers/gpu/drm/nouveau/nouveau_exec.c

   1 // SPDX-License-Identifier: MIT
   2
   3 #include "nouveau_drv.h"
   4 #include "nouveau_gem.h"
   5 #include "nouveau_mem.h"
   6 #include "nouveau_dma.h"
   7 #include "nouveau_exec.h"
   8 #include "nouveau_abi16.h"
   9 #include "nouveau_chan.h"
  10 #include "nouveau_sched.h"
  11 #include "nouveau_uvmm.h"
  12
  13 /**
  14  * DOC: Overview
  15  *
  16  * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT,
  17  * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC.
  18  *
  19  * In order to use the UAPI firstly a user client must initialize the VA space
  20  * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space
  21  * should be managed by the kernel and which by the UMD.
  22  *
  23  * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the
  24  * userspace-managable portion of the VA space. It provides operations to map
  25  * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not
  26  * backed by a GEM object and the kernel will ignore GEM handles provided
  27  * alongside a sparse mapping.
  28  *
  29  * Userspace may request memory backed mappings either within or outside of the
  30  * bounds (but not crossing those bounds) of a previously mapped sparse
  31  * mapping. Subsequently requested memory backed mappings within a sparse
  32  * mapping will take precedence over the corresponding range of the sparse
  33  * mapping. If such memory backed mappings are unmapped the kernel will make
  34  * sure that the corresponding sparse mapping will take their place again.
  35  * Requests to unmap a sparse mapping that still contains memory backed mappings
  36  * will result in those memory backed mappings being unmapped first.
  37  *
  38  * Unmap requests are not bound to the range of existing mappings and can even
  39  * overlap the bounds of sparse mappings. For such a request the kernel will
  40  * make sure to unmap all memory backed mappings within the given range,
  41  * splitting up memory backed mappings which are only partially contained
  42  * within the given range. Unmap requests with the sparse flag set must match
  43  * the range of a previously mapped sparse mapping exactly though.
  44  *
  45  * While the kernel generally permits arbitrary sequences and ranges of memory
  46  * backed mappings being mapped and unmapped, either within a single or multiple
  47  * VM_BIND ioctl calls, there are some restrictions for sparse mappings.
  48  *
  49  * The kernel does not permit to:
  50  *   - unmap non-existent sparse mappings
  51  *   - unmap a sparse mapping and map a new sparse mapping overlapping the range
  52  *     of the previously unmapped sparse mapping within the same VM_BIND ioctl
  53  *   - unmap a sparse mapping and map new memory backed mappings overlapping the
  54  *     range of the previously unmapped sparse mapping within the same VM_BIND
  55  *     ioctl
  56  *
  57  * When using the VM_BIND ioctl to request the kernel to map memory to a given
  58  * virtual address in the GPU's VA space there is no guarantee that the actual
  59  * mappings are created in the GPU's MMU. If the given memory is swapped out
  60  * at the time the bind operation is executed the kernel will stash the mapping
  61  * details into it's internal alloctor and create the actual MMU mappings once
  62  * the memory is swapped back in. While this is transparent for userspace, it is
  63  * guaranteed that all the backing memory is swapped back in and all the memory
  64  * mappings, as requested by userspace previously, are actually mapped once the
  65  * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job.
  66  *
  67  * A VM_BIND job can be executed either synchronously or asynchronously. If
  68  * exectued asynchronously, userspace may provide a list of syncobjs this job
  69  * will wait for and/or a list of syncobj the kernel will signal once the
  70  * VM_BIND job finished execution. If executed synchronously the ioctl will
  71  * block until the bind job is finished. For synchronous jobs the kernel will
  72  * not permit any syncobjs submitted to the kernel.
  73  *
  74  * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC
  75  * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide
  76  * the option to synchronize them with syncobjs.
  77  *
  78  * Besides that, EXEC jobs can be scheduled for a specified channel to execute on.
  79  *
  80  * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have
  81  * an up to date view of the VA space. However, the actual mappings might still
  82  * be pending. Hence, EXEC jobs require to have the particular fences - of
  83  * the corresponding VM_BIND jobs they depent on - attached to them.
  84  */
  85
  86 static int
  87 nouveau_exec_job_submit(struct nouveau_job *job,
  88                         struct drm_gpuvm_exec *vme)
  89 {
  90         struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
  91         struct nouveau_cli *cli = job->cli;
  92         struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
  93         int ret;
  94
  95         /* Create a new fence, but do not emit yet. */
  96         ret = nouveau_fence_create(&exec_job->fence, exec_job->chan);
  97         if (ret)
  98                 return ret;
  99
 100         nouveau_uvmm_lock(uvmm);
 101         ret = drm_gpuvm_exec_lock(vme);
 102         if (ret) {
 103                 nouveau_uvmm_unlock(uvmm);
 104                 return ret;
 105         }
 106         nouveau_uvmm_unlock(uvmm);
 107
 108         ret = drm_gpuvm_exec_validate(vme);
 109         if (ret) {
 110                 drm_gpuvm_exec_unlock(vme);
 111                 return ret;
 112         }
 113
 114         return 0;
 115 }
 116
 117 static void
 118 nouveau_exec_job_armed_submit(struct nouveau_job *job,
 119                               struct drm_gpuvm_exec *vme)
 120 {
 121         drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
 122                                       job->resv_usage, job->resv_usage);
 123         drm_gpuvm_exec_unlock(vme);
 124 }
 125
 126 static struct dma_fence *
 127 nouveau_exec_job_run(struct nouveau_job *job)
 128 {
 129         struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
 130         struct nouveau_channel *chan = exec_job->chan;
 131         struct nouveau_fence *fence = exec_job->fence;
 132         int i, ret;
 133
 134         ret = nouveau_dma_wait(chan, exec_job->push.count + 1, 16);
 135         if (ret) {
 136                 NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret);
 137                 return ERR_PTR(ret);
 138         }
 139
 140         for (i = 0; i < exec_job->push.count; i++) {
 141                 struct drm_nouveau_exec_push *p = &exec_job->push.s[i];
 142                 bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
 143
 144                 nv50_dma_push(chan, p->va, p->va_len, no_prefetch);
 145         }
 146
 147         ret = nouveau_fence_emit(fence);
 148         if (ret) {
 149                 nouveau_fence_unref(&exec_job->fence);
 150                 NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
 151                 WIND_RING(chan);
 152                 return ERR_PTR(ret);
 153         }
 154
 155         /* The fence was emitted successfully, set the job's fence pointer to
 156          * NULL in order to avoid freeing it up when the job is cleaned up.
 157          */
 158         exec_job->fence = NULL;
 159
 160         return &fence->base;
 161 }
 162
 163 static void
 164 nouveau_exec_job_free(struct nouveau_job *job)
 165 {
 166         struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
 167
 168         nouveau_job_done(job);
 169         nouveau_job_free(job);
 170
 171         kfree(exec_job->fence);
 172         kfree(exec_job->push.s);
 173         kfree(exec_job);
 174 }
 175
 176 static enum drm_gpu_sched_stat
 177 nouveau_exec_job_timeout(struct nouveau_job *job)
 178 {
 179         struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
 180         struct nouveau_channel *chan = exec_job->chan;
 181
 182         if (unlikely(!atomic_read(&chan->killed)))
 183                 nouveau_channel_kill(chan);
 184
 185         NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
 186                   chan->chid);
 187
 188         return DRM_GPU_SCHED_STAT_NOMINAL;
 189 }
 190
 191 static struct nouveau_job_ops nouveau_exec_job_ops = {
 192         .submit = nouveau_exec_job_submit,
 193         .armed_submit = nouveau_exec_job_armed_submit,
 194         .run = nouveau_exec_job_run,
 195         .free = nouveau_exec_job_free,
 196         .timeout = nouveau_exec_job_timeout,
 197 };
 198
 199 int
 200 nouveau_exec_job_init(struct nouveau_exec_job **pjob,
 201                       struct nouveau_exec_job_args *__args)
 202 {
 203         struct nouveau_exec_job *job;
 204         struct nouveau_job_args args = {};
 205         int i, ret;
 206
 207         for (i = 0; i < __args->push.count; i++) {
 208                 struct drm_nouveau_exec_push *p = &__args->push.s[i];
 209
 210                 if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) {
 211                         NV_PRINTK(err, nouveau_cli(__args->file_priv),
 212                                   "pushbuf size exceeds limit: 0x%x max 0x%x\n",
 213                                   p->va_len, NV50_DMA_PUSH_MAX_LENGTH);
 214                         return -EINVAL;
 215                 }
 216         }
 217
 218         job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL);
 219         if (!job)
 220                 return -ENOMEM;
 221
 222         job->push.count = __args->push.count;
 223         if (__args->push.count) {
 224                 job->push.s = kmemdup(__args->push.s,
 225                                       sizeof(*__args->push.s) *
 226                                       __args->push.count,
 227                                       GFP_KERNEL);
 228                 if (!job->push.s) {
 229                         ret = -ENOMEM;
 230                         goto err_free_job;
 231                 }
 232         }
 233
 234         args.file_priv = __args->file_priv;
 235         job->chan = __args->chan;
 236
 237         args.sched = __args->sched;
 238         /* Plus one to account for the HW fence. */
 239         args.credits = job->push.count + 1;
 240
 241         args.in_sync.count = __args->in_sync.count;
 242         args.in_sync.s = __args->in_sync.s;
 243
 244         args.out_sync.count = __args->out_sync.count;
 245         args.out_sync.s = __args->out_sync.s;
 246
 247         args.ops = &nouveau_exec_job_ops;
 248         args.resv_usage = DMA_RESV_USAGE_WRITE;
 249
 250         ret = nouveau_job_init(&job->base, &args);
 251         if (ret)
 252                 goto err_free_pushs;
 253
 254         return 0;
 255
 256 err_free_pushs:
 257         kfree(job->push.s);
 258 err_free_job:
 259         kfree(job);
 260         *pjob = NULL;
 261
 262         return ret;
 263 }
 264
 265 static int
 266 nouveau_exec(struct nouveau_exec_job_args *args)
 267 {
 268         struct nouveau_exec_job *job;
 269         int ret;
 270
 271         ret = nouveau_exec_job_init(&job, args);
 272         if (ret)
 273                 return ret;
 274
 275         ret = nouveau_job_submit(&job->base);
 276         if (ret)
 277                 goto err_job_fini;
 278
 279         return 0;
 280
 281 err_job_fini:
 282         nouveau_job_fini(&job->base);
 283         return ret;
 284 }
 285
 286 static int
 287 nouveau_exec_ucopy(struct nouveau_exec_job_args *args,
 288                    struct drm_nouveau_exec *req)
 289 {
 290         struct drm_nouveau_sync **s;
 291         u32 inc = req->wait_count;
 292         u64 ins = req->wait_ptr;
 293         u32 outc = req->sig_count;
 294         u64 outs = req->sig_ptr;
 295         u32 pushc = req->push_count;
 296         u64 pushs = req->push_ptr;
 297         int ret;
 298
 299         if (pushc) {
 300                 args->push.count = pushc;
 301                 args->push.s = u_memcpya(pushs, pushc, sizeof(*args->push.s));
 302                 if (IS_ERR(args->push.s))
 303                         return PTR_ERR(args->push.s);
 304         }
 305
 306         if (inc) {
 307                 s = &args->in_sync.s;
 308
 309                 args->in_sync.count = inc;
 310                 *s = u_memcpya(ins, inc, sizeof(**s));
 311                 if (IS_ERR(*s)) {
 312                         ret = PTR_ERR(*s);
 313                         goto err_free_pushs;
 314                 }
 315         }
 316
 317         if (outc) {
 318                 s = &args->out_sync.s;
 319
 320                 args->out_sync.count = outc;
 321                 *s = u_memcpya(outs, outc, sizeof(**s));
 322                 if (IS_ERR(*s)) {
 323                         ret = PTR_ERR(*s);
 324                         goto err_free_ins;
 325                 }
 326         }
 327
 328         return 0;
 329
 330 err_free_pushs:
 331         u_free(args->push.s);
 332 err_free_ins:
 333         u_free(args->in_sync.s);
 334         return ret;
 335 }
 336
 337 static void
 338 nouveau_exec_ufree(struct nouveau_exec_job_args *args)
 339 {
 340         u_free(args->push.s);
 341         u_free(args->in_sync.s);
 342         u_free(args->out_sync.s);
 343 }
 344
 345 int
 346 nouveau_exec_ioctl_exec(struct drm_device *dev,
 347                         void *data,
 348                         struct drm_file *file_priv)
 349 {
 350         struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
 351         struct nouveau_cli *cli = nouveau_cli(file_priv);
 352         struct nouveau_abi16_chan *chan16;
 353         struct nouveau_channel *chan = NULL;
 354         struct nouveau_exec_job_args args = {};
 355         struct drm_nouveau_exec *req = data;
 356         int push_max, ret = 0;
 357
 358         if (unlikely(!abi16))
 359                 return -ENOMEM;
 360
 361         /* abi16 locks already */
 362         if (unlikely(!nouveau_cli_uvmm(cli)))
 363                 return nouveau_abi16_put(abi16, -ENOSYS);
 364
 365         list_for_each_entry(chan16, &abi16->channels, head) {
 366                 if (chan16->chan->chid == req->channel) {
 367                         chan = chan16->chan;
 368                         break;
 369                 }
 370         }
 371
 372         if (!chan)
 373                 return nouveau_abi16_put(abi16, -ENOENT);
 374
 375         if (unlikely(atomic_read(&chan->killed)))
 376                 return nouveau_abi16_put(abi16, -ENODEV);
 377
 378         if (!chan->dma.ib_max)
 379                 return nouveau_abi16_put(abi16, -ENOSYS);
 380
 381         push_max = nouveau_exec_push_max_from_ib_max(chan->dma.ib_max);
 382         if (unlikely(req->push_count > push_max)) {
 383                 NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
 384                           req->push_count, push_max);
 385                 return nouveau_abi16_put(abi16, -EINVAL);
 386         }
 387
 388         ret = nouveau_exec_ucopy(&args, req);
 389         if (ret)
 390                 goto out;
 391
 392         args.sched = chan16->sched;
 393         args.file_priv = file_priv;
 394         args.chan = chan;
 395
 396         ret = nouveau_exec(&args);
 397         if (ret)
 398                 goto out_free_args;
 399
 400 out_free_args:
 401         nouveau_exec_ufree(&args);
 402 out:
 403         return nouveau_abi16_put(abi16, ret);
 404 }