1 // SPDX-License-Identifier: MIT
3 #include <linux/slab.h>
4 #include <drm/gpu_scheduler.h>
5 #include <drm/drm_syncobj.h>
7 #include "nouveau_drv.h"
8 #include "nouveau_gem.h"
9 #include "nouveau_mem.h"
10 #include "nouveau_dma.h"
11 #include "nouveau_exec.h"
12 #include "nouveau_abi16.h"
13 #include "nouveau_sched.h"
15 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000
17 /* Starts at 0, since the DRM scheduler interprets those parameters as (initial)
18 * index to the run-queue array.
20 enum nouveau_sched_priority {
21 NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_KERNEL,
22 NOUVEAU_SCHED_PRIORITY_COUNT,
26 nouveau_job_init(struct nouveau_job *job,
27 struct nouveau_job_args *args)
29 struct nouveau_sched *sched = args->sched;
32 INIT_LIST_HEAD(&job->entry);
34 job->file_priv = args->file_priv;
35 job->cli = nouveau_cli(args->file_priv);
38 job->sync = args->sync;
39 job->resv_usage = args->resv_usage;
43 job->in_sync.count = args->in_sync.count;
44 if (job->in_sync.count) {
48 job->in_sync.data = kmemdup(args->in_sync.s,
49 sizeof(*args->in_sync.s) *
52 if (!job->in_sync.data)
56 job->out_sync.count = args->out_sync.count;
57 if (job->out_sync.count) {
60 goto err_free_in_sync;
63 job->out_sync.data = kmemdup(args->out_sync.s,
64 sizeof(*args->out_sync.s) *
67 if (!job->out_sync.data) {
69 goto err_free_in_sync;
72 job->out_sync.objs = kcalloc(job->out_sync.count,
73 sizeof(*job->out_sync.objs),
75 if (!job->out_sync.objs) {
77 goto err_free_out_sync;
80 job->out_sync.chains = kcalloc(job->out_sync.count,
81 sizeof(*job->out_sync.chains),
83 if (!job->out_sync.chains) {
89 ret = drm_sched_job_init(&job->base, &sched->entity,
94 job->state = NOUVEAU_JOB_INITIALIZED;
99 kfree(job->out_sync.chains);
101 kfree(job->out_sync.objs);
103 kfree(job->out_sync.data);
105 kfree(job->in_sync.data);
110 nouveau_job_fini(struct nouveau_job *job)
112 dma_fence_put(job->done_fence);
113 drm_sched_job_cleanup(&job->base);
119 nouveau_job_done(struct nouveau_job *job)
121 struct nouveau_sched *sched = job->sched;
123 spin_lock(&sched->job.list.lock);
124 list_del(&job->entry);
125 spin_unlock(&sched->job.list.lock);
127 wake_up(&sched->job.wq);
131 nouveau_job_free(struct nouveau_job *job)
133 kfree(job->in_sync.data);
134 kfree(job->out_sync.data);
135 kfree(job->out_sync.objs);
136 kfree(job->out_sync.chains);
140 sync_find_fence(struct nouveau_job *job,
141 struct drm_nouveau_sync *sync,
142 struct dma_fence **fence)
144 u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
148 if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
149 stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
152 if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
153 point = sync->timeline_value;
155 ret = drm_syncobj_find_fence(job->file_priv,
157 0 /* flags */, fence);
165 nouveau_job_add_deps(struct nouveau_job *job)
167 struct dma_fence *in_fence = NULL;
170 for (i = 0; i < job->in_sync.count; i++) {
171 struct drm_nouveau_sync *sync = &job->in_sync.data[i];
173 ret = sync_find_fence(job, sync, &in_fence);
175 NV_PRINTK(warn, job->cli,
176 "Failed to find syncobj (-> in): handle=%d\n",
181 ret = drm_sched_job_add_dependency(&job->base, in_fence);
190 nouveau_job_fence_attach_cleanup(struct nouveau_job *job)
194 for (i = 0; i < job->out_sync.count; i++) {
195 struct drm_syncobj *obj = job->out_sync.objs[i];
196 struct dma_fence_chain *chain = job->out_sync.chains[i];
199 drm_syncobj_put(obj);
202 dma_fence_chain_free(chain);
207 nouveau_job_fence_attach_prepare(struct nouveau_job *job)
211 for (i = 0; i < job->out_sync.count; i++) {
212 struct drm_nouveau_sync *sync = &job->out_sync.data[i];
213 struct drm_syncobj **pobj = &job->out_sync.objs[i];
214 struct dma_fence_chain **pchain = &job->out_sync.chains[i];
215 u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
217 if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
218 stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
220 goto err_sync_cleanup;
223 *pobj = drm_syncobj_find(job->file_priv, sync->handle);
225 NV_PRINTK(warn, job->cli,
226 "Failed to find syncobj (-> out): handle=%d\n",
229 goto err_sync_cleanup;
232 if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
233 *pchain = dma_fence_chain_alloc();
236 goto err_sync_cleanup;
244 nouveau_job_fence_attach_cleanup(job);
249 nouveau_job_fence_attach(struct nouveau_job *job)
251 struct dma_fence *fence = job->done_fence;
254 for (i = 0; i < job->out_sync.count; i++) {
255 struct drm_nouveau_sync *sync = &job->out_sync.data[i];
256 struct drm_syncobj **pobj = &job->out_sync.objs[i];
257 struct dma_fence_chain **pchain = &job->out_sync.chains[i];
258 u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
260 if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
261 drm_syncobj_add_point(*pobj, *pchain, fence,
262 sync->timeline_value);
264 drm_syncobj_replace_fence(*pobj, fence);
267 drm_syncobj_put(*pobj);
274 nouveau_job_submit(struct nouveau_job *job)
276 struct nouveau_sched *sched = job->sched;
277 struct dma_fence *done_fence = NULL;
278 struct drm_gpuvm_exec vm_exec = {
279 .vm = &nouveau_cli_uvmm(job->cli)->base,
280 .flags = DRM_EXEC_IGNORE_DUPLICATES,
285 ret = nouveau_job_add_deps(job);
289 ret = nouveau_job_fence_attach_prepare(job);
293 /* Make sure the job appears on the sched_entity's queue in the same
294 * order as it was submitted.
296 mutex_lock(&sched->mutex);
298 /* Guarantee we won't fail after the submit() callback returned
301 if (job->ops->submit) {
302 ret = job->ops->submit(job, &vm_exec);
307 /* Submit was successful; add the job to the schedulers job list. */
308 spin_lock(&sched->job.list.lock);
309 list_add(&job->entry, &sched->job.list.head);
310 spin_unlock(&sched->job.list.lock);
312 drm_sched_job_arm(&job->base);
313 job->done_fence = dma_fence_get(&job->base.s_fence->finished);
315 done_fence = dma_fence_get(job->done_fence);
317 if (job->ops->armed_submit)
318 job->ops->armed_submit(job, &vm_exec);
320 nouveau_job_fence_attach(job);
322 /* Set job state before pushing the job to the scheduler,
323 * such that we do not overwrite the job state set in run().
325 job->state = NOUVEAU_JOB_SUBMIT_SUCCESS;
327 drm_sched_entity_push_job(&job->base);
329 mutex_unlock(&sched->mutex);
332 dma_fence_wait(done_fence, true);
333 dma_fence_put(done_fence);
339 mutex_unlock(&sched->mutex);
340 nouveau_job_fence_attach_cleanup(job);
342 job->state = NOUVEAU_JOB_SUBMIT_FAILED;
346 static struct dma_fence *
347 nouveau_job_run(struct nouveau_job *job)
349 struct dma_fence *fence;
351 fence = job->ops->run(job);
353 job->state = NOUVEAU_JOB_RUN_FAILED;
355 job->state = NOUVEAU_JOB_RUN_SUCCESS;
360 static struct dma_fence *
361 nouveau_sched_run_job(struct drm_sched_job *sched_job)
363 struct nouveau_job *job = to_nouveau_job(sched_job);
365 return nouveau_job_run(job);
368 static enum drm_gpu_sched_stat
369 nouveau_sched_timedout_job(struct drm_sched_job *sched_job)
371 struct drm_gpu_scheduler *sched = sched_job->sched;
372 struct nouveau_job *job = to_nouveau_job(sched_job);
373 enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_NOMINAL;
375 drm_sched_stop(sched, sched_job);
377 if (job->ops->timeout)
378 stat = job->ops->timeout(job);
380 NV_PRINTK(warn, job->cli, "Generic job timeout.\n");
382 drm_sched_start(sched, true);
388 nouveau_sched_free_job(struct drm_sched_job *sched_job)
390 struct nouveau_job *job = to_nouveau_job(sched_job);
392 nouveau_job_fini(job);
395 static const struct drm_sched_backend_ops nouveau_sched_ops = {
396 .run_job = nouveau_sched_run_job,
397 .timedout_job = nouveau_sched_timedout_job,
398 .free_job = nouveau_sched_free_job,
402 nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
403 struct workqueue_struct *wq, u32 credit_limit)
405 struct drm_gpu_scheduler *drm_sched = &sched->base;
406 struct drm_sched_entity *entity = &sched->entity;
407 long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);
411 wq = alloc_workqueue("nouveau_sched_wq_%d", 0, WQ_MAX_ACTIVE,
419 ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq,
420 NOUVEAU_SCHED_PRIORITY_COUNT,
421 credit_limit, 0, job_hang_limit,
422 NULL, NULL, "nouveau_sched", drm->dev->dev);
426 /* Using DRM_SCHED_PRIORITY_KERNEL, since that's what we're required to use
427 * when we want to have a single run-queue only.
429 * It's not documented, but one will find out when trying to use any
430 * other priority running into faults, because the scheduler uses the
431 * priority as array index.
433 * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not
434 * matching the enum type used in drm_sched_entity_init().
436 ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_KERNEL,
437 &drm_sched, 1, NULL);
441 mutex_init(&sched->mutex);
442 spin_lock_init(&sched->job.list.lock);
443 INIT_LIST_HEAD(&sched->job.list.head);
444 init_waitqueue_head(&sched->job.wq);
449 drm_sched_fini(drm_sched);
452 destroy_workqueue(sched->wq);
457 nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
458 struct workqueue_struct *wq, u32 credit_limit)
460 struct nouveau_sched *sched;
463 sched = kzalloc(sizeof(*sched), GFP_KERNEL);
467 ret = nouveau_sched_init(sched, drm, wq, credit_limit);
480 nouveau_sched_fini(struct nouveau_sched *sched)
482 struct drm_gpu_scheduler *drm_sched = &sched->base;
483 struct drm_sched_entity *entity = &sched->entity;
485 rmb(); /* for list_empty to work without lock */
486 wait_event(sched->job.wq, list_empty(&sched->job.list.head));
488 drm_sched_entity_fini(entity);
489 drm_sched_fini(drm_sched);
491 /* Destroy workqueue after scheduler tear down, otherwise it might still
495 destroy_workqueue(sched->wq);
499 nouveau_sched_destroy(struct nouveau_sched **psched)
501 struct nouveau_sched *sched = *psched;
503 nouveau_sched_fini(sched);