drivers/gpu/drm/scheduler/sched_main.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  */
  23
  24 /**
  25  * DOC: Overview
  26  *
  27  * The GPU scheduler provides entities which allow userspace to push jobs
  28  * into software queues which are then scheduled on a hardware run queue.
  29  * The software queues have a priority among them. The scheduler selects the entities
  30  * from the run queue using a FIFO. The scheduler provides dependency handling
  31  * features among jobs. The driver is supposed to provide callback functions for
  32  * backend operations to the scheduler like submitting a job to hardware run queue,
  33  * returning the dependencies of a job etc.
  34  *
  35  * The organisation of the scheduler is the following:
  36  *
  37  * 1. Each hw run queue has one scheduler
  38  * 2. Each scheduler has multiple run queues with different priorities
  39  *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
  40  * 3. Each scheduler run queue has a queue of entities to schedule
  41  * 4. Entities themselves maintain a queue of jobs that will be scheduled on
  42  *    the hardware.
  43  *
  44  * The jobs in a entity are always scheduled in the order that they were pushed.
  45  */
  46
  47 #include <linux/kthread.h>
  48 #include <linux/wait.h>
  49 #include <linux/sched.h>
  50 #include <linux/completion.h>
  51 #include <uapi/linux/sched/types.h>
  52
  53 #include <drm/drm_print.h>
  54 #include <drm/gpu_scheduler.h>
  55 #include <drm/spsc_queue.h>
  56
  57 #define CREATE_TRACE_POINTS
  58 #include "gpu_scheduler_trace.h"
  59
  60 #define to_drm_sched_job(sched_job)             \
  61                 container_of((sched_job), struct drm_sched_job, queue_node)
  62
  63 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
  64
  65 /**
  66  * drm_sched_rq_init - initialize a given run queue struct
  67  *
  68  * @rq: scheduler run queue
  69  *
  70  * Initializes a scheduler runqueue.
  71  */
  72 static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
  73                               struct drm_sched_rq *rq)
  74 {
  75         spin_lock_init(&rq->lock);
  76         INIT_LIST_HEAD(&rq->entities);
  77         rq->current_entity = NULL;
  78         rq->sched = sched;
  79 }
  80
  81 /**
  82  * drm_sched_rq_add_entity - add an entity
  83  *
  84  * @rq: scheduler run queue
  85  * @entity: scheduler entity
  86  *
  87  * Adds a scheduler entity to the run queue.
  88  */
  89 void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
  90                              struct drm_sched_entity *entity)
  91 {
  92         if (!list_empty(&entity->list))
  93                 return;
  94         spin_lock(&rq->lock);
  95         list_add_tail(&entity->list, &rq->entities);
  96         spin_unlock(&rq->lock);
  97 }
  98
  99 /**
 100  * drm_sched_rq_remove_entity - remove an entity
 101  *
 102  * @rq: scheduler run queue
 103  * @entity: scheduler entity
 104  *
 105  * Removes a scheduler entity from the run queue.
 106  */
 107 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 108                                 struct drm_sched_entity *entity)
 109 {
 110         if (list_empty(&entity->list))
 111                 return;
 112         spin_lock(&rq->lock);
 113         list_del_init(&entity->list);
 114         if (rq->current_entity == entity)
 115                 rq->current_entity = NULL;
 116         spin_unlock(&rq->lock);
 117 }
 118
 119 /**
 120  * drm_sched_rq_select_entity - Select an entity which could provide a job to run
 121  *
 122  * @rq: scheduler run queue to check.
 123  *
 124  * Try to find a ready entity, returns NULL if none found.
 125  */
 126 static struct drm_sched_entity *
 127 drm_sched_rq_select_entity(struct drm_sched_rq *rq)
 128 {
 129         struct drm_sched_entity *entity;
 130
 131         spin_lock(&rq->lock);
 132
 133         entity = rq->current_entity;
 134         if (entity) {
 135                 list_for_each_entry_continue(entity, &rq->entities, list) {
 136                         if (drm_sched_entity_is_ready(entity)) {
 137                                 rq->current_entity = entity;
 138                                 reinit_completion(&entity->entity_idle);
 139                                 spin_unlock(&rq->lock);
 140                                 return entity;
 141                         }
 142                 }
 143         }
 144
 145         list_for_each_entry(entity, &rq->entities, list) {
 146
 147                 if (drm_sched_entity_is_ready(entity)) {
 148                         rq->current_entity = entity;
 149                         reinit_completion(&entity->entity_idle);
 150                         spin_unlock(&rq->lock);
 151                         return entity;
 152                 }
 153
 154                 if (entity == rq->current_entity)
 155                         break;
 156         }
 157
 158         spin_unlock(&rq->lock);
 159
 160         return NULL;
 161 }
 162
 163 /**
 164  * drm_sched_dependency_optimized
 165  *
 166  * @fence: the dependency fence
 167  * @entity: the entity which depends on the above fence
 168  *
 169  * Returns true if the dependency can be optimized and false otherwise
 170  */
 171 bool drm_sched_dependency_optimized(struct dma_fence* fence,
 172                                     struct drm_sched_entity *entity)
 173 {
 174         struct drm_gpu_scheduler *sched = entity->rq->sched;
 175         struct drm_sched_fence *s_fence;
 176
 177         if (!fence || dma_fence_is_signaled(fence))
 178                 return false;
 179         if (fence->context == entity->fence_context)
 180                 return true;
 181         s_fence = to_drm_sched_fence(fence);
 182         if (s_fence && s_fence->sched == sched)
 183                 return true;
 184
 185         return false;
 186 }
 187 EXPORT_SYMBOL(drm_sched_dependency_optimized);
 188
 189 /**
 190  * drm_sched_start_timeout - start timeout for reset worker
 191  *
 192  * @sched: scheduler instance to start the worker for
 193  *
 194  * Start the timeout for the given scheduler.
 195  */
 196 static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
 197 {
 198         if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 199             !list_empty(&sched->ring_mirror_list))
 200                 schedule_delayed_work(&sched->work_tdr, sched->timeout);
 201 }
 202
 203 /**
 204  * drm_sched_fault - immediately start timeout handler
 205  *
 206  * @sched: scheduler where the timeout handling should be started.
 207  *
 208  * Start timeout handling immediately when the driver detects a hardware fault.
 209  */
 210 void drm_sched_fault(struct drm_gpu_scheduler *sched)
 211 {
 212         mod_delayed_work(system_wq, &sched->work_tdr, 0);
 213 }
 214 EXPORT_SYMBOL(drm_sched_fault);
 215
 216 /**
 217  * drm_sched_suspend_timeout - Suspend scheduler job timeout
 218  *
 219  * @sched: scheduler instance for which to suspend the timeout
 220  *
 221  * Suspend the delayed work timeout for the scheduler. This is done by
 222  * modifying the delayed work timeout to an arbitrary large value,
 223  * MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be
 224  * called from an IRQ context.
 225  *
 226  * Returns the timeout remaining
 227  *
 228  */
 229 unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
 230 {
 231         unsigned long sched_timeout, now = jiffies;
 232
 233         sched_timeout = sched->work_tdr.timer.expires;
 234
 235         /*
 236          * Modify the timeout to an arbitrarily large value. This also prevents
 237          * the timeout to be restarted when new submissions arrive
 238          */
 239         if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
 240                         && time_after(sched_timeout, now))
 241                 return sched_timeout - now;
 242         else
 243                 return sched->timeout;
 244 }
 245 EXPORT_SYMBOL(drm_sched_suspend_timeout);
 246
 247 /**
 248  * drm_sched_resume_timeout - Resume scheduler job timeout
 249  *
 250  * @sched: scheduler instance for which to resume the timeout
 251  * @remaining: remaining timeout
 252  *
 253  * Resume the delayed work timeout for the scheduler. Note that
 254  * this function can be called from an IRQ context.
 255  */
 256 void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
 257                 unsigned long remaining)
 258 {
 259         unsigned long flags;
 260
 261         spin_lock_irqsave(&sched->job_list_lock, flags);
 262
 263         if (list_empty(&sched->ring_mirror_list))
 264                 cancel_delayed_work(&sched->work_tdr);
 265         else
 266                 mod_delayed_work(system_wq, &sched->work_tdr, remaining);
 267
 268         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 269 }
 270 EXPORT_SYMBOL(drm_sched_resume_timeout);
 271
 272 static void drm_sched_job_begin(struct drm_sched_job *s_job)
 273 {
 274         struct drm_gpu_scheduler *sched = s_job->sched;
 275         unsigned long flags;
 276
 277         spin_lock_irqsave(&sched->job_list_lock, flags);
 278         list_add_tail(&s_job->node, &sched->ring_mirror_list);
 279         drm_sched_start_timeout(sched);
 280         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 281 }
 282
 283 static void drm_sched_job_timedout(struct work_struct *work)
 284 {
 285         struct drm_gpu_scheduler *sched;
 286         struct drm_sched_job *job;
 287         unsigned long flags;
 288
 289         sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 290         job = list_first_entry_or_null(&sched->ring_mirror_list,
 291                                        struct drm_sched_job, node);
 292
 293         if (job) {
 294                 job->sched->ops->timedout_job(job);
 295
 296                 /*
 297                  * Guilty job did complete and hence needs to be manually removed
 298                  * See drm_sched_stop doc.
 299                  */
 300                 if (sched->free_guilty) {
 301                         job->sched->ops->free_job(job);
 302                         sched->free_guilty = false;
 303                 }
 304         }
 305
 306         spin_lock_irqsave(&sched->job_list_lock, flags);
 307         drm_sched_start_timeout(sched);
 308         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 309 }
 310
 311  /**
 312   * drm_sched_increase_karma - Update sched_entity guilty flag
 313   *
 314   * @bad: The job guilty of time out
 315   *
 316   * Increment on every hang caused by the 'bad' job. If this exceeds the hang
 317   * limit of the scheduler then the respective sched entity is marked guilty and
 318   * jobs from it will not be scheduled further
 319   */
 320 void drm_sched_increase_karma(struct drm_sched_job *bad)
 321 {
 322         int i;
 323         struct drm_sched_entity *tmp;
 324         struct drm_sched_entity *entity;
 325         struct drm_gpu_scheduler *sched = bad->sched;
 326
 327         /* don't increase @bad's karma if it's from KERNEL RQ,
 328          * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
 329          * corrupt but keep in mind that kernel jobs always considered good.
 330          */
 331         if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 332                 atomic_inc(&bad->karma);
 333                 for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
 334                      i++) {
 335                         struct drm_sched_rq *rq = &sched->sched_rq[i];
 336
 337                         spin_lock(&rq->lock);
 338                         list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
 339                                 if (bad->s_fence->scheduled.context ==
 340                                     entity->fence_context) {
 341                                         if (atomic_read(&bad->karma) >
 342                                             bad->sched->hang_limit)
 343                                                 if (entity->guilty)
 344                                                         atomic_set(entity->guilty, 1);
 345                                         break;
 346                                 }
 347                         }
 348                         spin_unlock(&rq->lock);
 349                         if (&entity->list != &rq->entities)
 350                                 break;
 351                 }
 352         }
 353 }
 354 EXPORT_SYMBOL(drm_sched_increase_karma);
 355
 356 /**
 357  * drm_sched_stop - stop the scheduler
 358  *
 359  * @sched: scheduler instance
 360  * @bad: job which caused the time out
 361  *
 362  * Stop the scheduler and also removes and frees all completed jobs.
 363  * Note: bad job will not be freed as it might be used later and so it's
 364  * callers responsibility to release it manually if it's not part of the
 365  * mirror list any more.
 366  *
 367  */
 368 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 369 {
 370         struct drm_sched_job *s_job, *tmp;
 371         unsigned long flags;
 372
 373         kthread_park(sched->thread);
 374
 375         /*
 376          * Iterate the job list from later to  earlier one and either deactive
 377          * their HW callbacks or remove them from mirror list if they already
 378          * signaled.
 379          * This iteration is thread safe as sched thread is stopped.
 380          */
 381         list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
 382                 if (s_job->s_fence->parent &&
 383                     dma_fence_remove_callback(s_job->s_fence->parent,
 384                                               &s_job->cb)) {
 385                         atomic_dec(&sched->hw_rq_count);
 386                 } else {
 387                         /*
 388                          * remove job from ring_mirror_list.
 389                          * Locking here is for concurrent resume timeout
 390                          */
 391                         spin_lock_irqsave(&sched->job_list_lock, flags);
 392                         list_del_init(&s_job->node);
 393                         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 394
 395                         /*
 396                          * Wait for job's HW fence callback to finish using s_job
 397                          * before releasing it.
 398                          *
 399                          * Job is still alive so fence refcount at least 1
 400                          */
 401                         dma_fence_wait(&s_job->s_fence->finished, false);
 402
 403                         /*
 404                          * We must keep bad job alive for later use during
 405                          * recovery by some of the drivers but leave a hint
 406                          * that the guilty job must be released.
 407                          */
 408                         if (bad != s_job)
 409                                 sched->ops->free_job(s_job);
 410                         else
 411                                 sched->free_guilty = true;
 412                 }
 413         }
 414
 415         /*
 416          * Stop pending timer in flight as we rearm it in  drm_sched_start. This
 417          * avoids the pending timeout work in progress to fire right away after
 418          * this TDR finished and before the newly restarted jobs had a
 419          * chance to complete.
 420          */
 421         cancel_delayed_work(&sched->work_tdr);
 422 }
 423
 424 EXPORT_SYMBOL(drm_sched_stop);
 425
 426 /**
 427  * drm_sched_job_recovery - recover jobs after a reset
 428  *
 429  * @sched: scheduler instance
 430  * @full_recovery: proceed with complete sched restart
 431  *
 432  */
 433 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 434 {
 435         struct drm_sched_job *s_job, *tmp;
 436         unsigned long flags;
 437         int r;
 438
 439         /*
 440          * Locking the list is not required here as the sched thread is parked
 441          * so no new jobs are being inserted or removed. Also concurrent
 442          * GPU recovers can't run in parallel.
 443          */
 444         list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 445                 struct dma_fence *fence = s_job->s_fence->parent;
 446
 447                 atomic_inc(&sched->hw_rq_count);
 448
 449                 if (!full_recovery)
 450                         continue;
 451
 452                 if (fence) {
 453                         r = dma_fence_add_callback(fence, &s_job->cb,
 454                                                    drm_sched_process_job);
 455                         if (r == -ENOENT)
 456                                 drm_sched_process_job(fence, &s_job->cb);
 457                         else if (r)
 458                                 DRM_ERROR("fence add callback failed (%d)\n",
 459                                           r);
 460                 } else
 461                         drm_sched_process_job(NULL, &s_job->cb);
 462         }
 463
 464         if (full_recovery) {
 465                 spin_lock_irqsave(&sched->job_list_lock, flags);
 466                 drm_sched_start_timeout(sched);
 467                 spin_unlock_irqrestore(&sched->job_list_lock, flags);
 468         }
 469
 470         kthread_unpark(sched->thread);
 471 }
 472 EXPORT_SYMBOL(drm_sched_start);
 473
 474 /**
 475  * drm_sched_resubmit_jobs - helper to relunch job from mirror ring list
 476  *
 477  * @sched: scheduler instance
 478  *
 479  */
 480 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 481 {
 482         struct drm_sched_job *s_job, *tmp;
 483         uint64_t guilty_context;
 484         bool found_guilty = false;
 485         struct dma_fence *fence;
 486
 487         list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 488                 struct drm_sched_fence *s_fence = s_job->s_fence;
 489
 490                 if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
 491                         found_guilty = true;
 492                         guilty_context = s_job->s_fence->scheduled.context;
 493                 }
 494
 495                 if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 496                         dma_fence_set_error(&s_fence->finished, -ECANCELED);
 497
 498                 dma_fence_put(s_job->s_fence->parent);
 499                 fence = sched->ops->run_job(s_job);
 500
 501                 if (IS_ERR_OR_NULL(fence)) {
 502                         if (IS_ERR(fence))
 503                                 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 504
 505                         s_job->s_fence->parent = NULL;
 506                 } else {
 507                         s_job->s_fence->parent = fence;
 508                 }
 509
 510
 511         }
 512 }
 513 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
 514
 515 /**
 516  * drm_sched_job_init - init a scheduler job
 517  *
 518  * @job: scheduler job to init
 519  * @entity: scheduler entity to use
 520  * @owner: job owner for debugging
 521  *
 522  * Refer to drm_sched_entity_push_job() documentation
 523  * for locking considerations.
 524  *
 525  * Returns 0 for success, negative error code otherwise.
 526  */
 527 int drm_sched_job_init(struct drm_sched_job *job,
 528                        struct drm_sched_entity *entity,
 529                        void *owner)
 530 {
 531         struct drm_gpu_scheduler *sched;
 532
 533         drm_sched_entity_select_rq(entity);
 534         if (!entity->rq)
 535                 return -ENOENT;
 536
 537         sched = entity->rq->sched;
 538
 539         job->sched = sched;
 540         job->entity = entity;
 541         job->s_priority = entity->rq - sched->sched_rq;
 542         job->s_fence = drm_sched_fence_create(entity, owner);
 543         if (!job->s_fence)
 544                 return -ENOMEM;
 545         job->id = atomic64_inc_return(&sched->job_id_count);
 546
 547         INIT_LIST_HEAD(&job->node);
 548
 549         return 0;
 550 }
 551 EXPORT_SYMBOL(drm_sched_job_init);
 552
 553 /**
 554  * drm_sched_job_cleanup - clean up scheduler job resources
 555  *
 556  * @job: scheduler job to clean up
 557  */
 558 void drm_sched_job_cleanup(struct drm_sched_job *job)
 559 {
 560         dma_fence_put(&job->s_fence->finished);
 561         job->s_fence = NULL;
 562 }
 563 EXPORT_SYMBOL(drm_sched_job_cleanup);
 564
 565 /**
 566  * drm_sched_ready - is the scheduler ready
 567  *
 568  * @sched: scheduler instance
 569  *
 570  * Return true if we can push more jobs to the hw, otherwise false.
 571  */
 572 static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
 573 {
 574         return atomic_read(&sched->hw_rq_count) <
 575                 sched->hw_submission_limit;
 576 }
 577
 578 /**
 579  * drm_sched_wakeup - Wake up the scheduler when it is ready
 580  *
 581  * @sched: scheduler instance
 582  *
 583  */
 584 void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 585 {
 586         if (drm_sched_ready(sched))
 587                 wake_up_interruptible(&sched->wake_up_worker);
 588 }
 589
 590 /**
 591  * drm_sched_select_entity - Select next entity to process
 592  *
 593  * @sched: scheduler instance
 594  *
 595  * Returns the entity to process or NULL if none are found.
 596  */
 597 static struct drm_sched_entity *
 598 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 599 {
 600         struct drm_sched_entity *entity;
 601         int i;
 602
 603         if (!drm_sched_ready(sched))
 604                 return NULL;
 605
 606         /* Kernel run queue has higher priority than normal run queue*/
 607         for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 608                 entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
 609                 if (entity)
 610                         break;
 611         }
 612
 613         return entity;
 614 }
 615
 616 /**
 617  * drm_sched_process_job - process a job
 618  *
 619  * @f: fence
 620  * @cb: fence callbacks
 621  *
 622  * Called after job has finished execution.
 623  */
 624 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 625 {
 626         struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
 627         struct drm_sched_fence *s_fence = s_job->s_fence;
 628         struct drm_gpu_scheduler *sched = s_fence->sched;
 629
 630         atomic_dec(&sched->hw_rq_count);
 631         atomic_dec(&sched->num_jobs);
 632
 633         trace_drm_sched_process_job(s_fence);
 634
 635         drm_sched_fence_finished(s_fence);
 636         wake_up_interruptible(&sched->wake_up_worker);
 637 }
 638
 639 /**
 640  * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
 641  *
 642  * @sched: scheduler instance
 643  *
 644  * Returns the next finished job from the mirror list (if there is one)
 645  * ready for it to be destroyed.
 646  */
 647 static struct drm_sched_job *
 648 drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 649 {
 650         struct drm_sched_job *job;
 651         unsigned long flags;
 652
 653         /* Don't destroy jobs while the timeout worker is running */
 654         if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 655             !cancel_delayed_work(&sched->work_tdr))
 656                 return NULL;
 657
 658         spin_lock_irqsave(&sched->job_list_lock, flags);
 659
 660         job = list_first_entry_or_null(&sched->ring_mirror_list,
 661                                        struct drm_sched_job, node);
 662
 663         if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
 664                 /* remove job from ring_mirror_list */
 665                 list_del_init(&job->node);
 666         } else {
 667                 job = NULL;
 668                 /* queue timeout for next job */
 669                 drm_sched_start_timeout(sched);
 670         }
 671
 672         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 673
 674         return job;
 675 }
 676
 677 /**
 678  * drm_sched_blocked - check if the scheduler is blocked
 679  *
 680  * @sched: scheduler instance
 681  *
 682  * Returns true if blocked, otherwise false.
 683  */
 684 static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
 685 {
 686         if (kthread_should_park()) {
 687                 kthread_parkme();
 688                 return true;
 689         }
 690
 691         return false;
 692 }
 693
 694 /**
 695  * drm_sched_main - main scheduler thread
 696  *
 697  * @param: scheduler instance
 698  *
 699  * Returns 0.
 700  */
 701 static int drm_sched_main(void *param)
 702 {
 703         struct sched_param sparam = {.sched_priority = 1};
 704         struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
 705         int r;
 706
 707         sched_setscheduler(current, SCHED_FIFO, &sparam);
 708
 709         while (!kthread_should_stop()) {
 710                 struct drm_sched_entity *entity = NULL;
 711                 struct drm_sched_fence *s_fence;
 712                 struct drm_sched_job *sched_job;
 713                 struct dma_fence *fence;
 714                 struct drm_sched_job *cleanup_job = NULL;
 715
 716                 wait_event_interruptible(sched->wake_up_worker,
 717                                          (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
 718                                          (!drm_sched_blocked(sched) &&
 719                                           (entity = drm_sched_select_entity(sched))) ||
 720                                          kthread_should_stop());
 721
 722                 if (cleanup_job) {
 723                         sched->ops->free_job(cleanup_job);
 724                         /* queue timeout for next job */
 725                         drm_sched_start_timeout(sched);
 726                 }
 727
 728                 if (!entity)
 729                         continue;
 730
 731                 sched_job = drm_sched_entity_pop_job(entity);
 732
 733                 complete(&entity->entity_idle);
 734
 735                 if (!sched_job)
 736                         continue;
 737
 738                 s_fence = sched_job->s_fence;
 739
 740                 atomic_inc(&sched->hw_rq_count);
 741                 drm_sched_job_begin(sched_job);
 742
 743                 fence = sched->ops->run_job(sched_job);
 744                 drm_sched_fence_scheduled(s_fence);
 745
 746                 if (!IS_ERR_OR_NULL(fence)) {
 747                         s_fence->parent = dma_fence_get(fence);
 748                         r = dma_fence_add_callback(fence, &sched_job->cb,
 749                                                    drm_sched_process_job);
 750                         if (r == -ENOENT)
 751                                 drm_sched_process_job(fence, &sched_job->cb);
 752                         else if (r)
 753                                 DRM_ERROR("fence add callback failed (%d)\n",
 754                                           r);
 755                         dma_fence_put(fence);
 756                 } else {
 757                         if (IS_ERR(fence))
 758                                 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 759
 760                         drm_sched_process_job(NULL, &sched_job->cb);
 761                 }
 762
 763                 wake_up(&sched->job_scheduled);
 764         }
 765         return 0;
 766 }
 767
 768 /**
 769  * drm_sched_init - Init a gpu scheduler instance
 770  *
 771  * @sched: scheduler instance
 772  * @ops: backend operations for this scheduler
 773  * @hw_submission: number of hw submissions that can be in flight
 774  * @hang_limit: number of times to allow a job to hang before dropping it
 775  * @timeout: timeout value in jiffies for the scheduler
 776  * @name: name used for debugging
 777  *
 778  * Return 0 on success, otherwise error code.
 779  */
 780 int drm_sched_init(struct drm_gpu_scheduler *sched,
 781                    const struct drm_sched_backend_ops *ops,
 782                    unsigned hw_submission,
 783                    unsigned hang_limit,
 784                    long timeout,
 785                    const char *name)
 786 {
 787         int i, ret;
 788         sched->ops = ops;
 789         sched->hw_submission_limit = hw_submission;
 790         sched->name = name;
 791         sched->timeout = timeout;
 792         sched->hang_limit = hang_limit;
 793         for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
 794                 drm_sched_rq_init(sched, &sched->sched_rq[i]);
 795
 796         init_waitqueue_head(&sched->wake_up_worker);
 797         init_waitqueue_head(&sched->job_scheduled);
 798         INIT_LIST_HEAD(&sched->ring_mirror_list);
 799         spin_lock_init(&sched->job_list_lock);
 800         atomic_set(&sched->hw_rq_count, 0);
 801         INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 802         atomic_set(&sched->num_jobs, 0);
 803         atomic64_set(&sched->job_id_count, 0);
 804
 805         /* Each scheduler will run on a seperate kernel thread */
 806         sched->thread = kthread_run(drm_sched_main, sched, sched->name);
 807         if (IS_ERR(sched->thread)) {
 808                 ret = PTR_ERR(sched->thread);
 809                 sched->thread = NULL;
 810                 DRM_ERROR("Failed to create scheduler for %s.\n", name);
 811                 return ret;
 812         }
 813
 814         sched->ready = true;
 815         return 0;
 816 }
 817 EXPORT_SYMBOL(drm_sched_init);
 818
 819 /**
 820  * drm_sched_fini - Destroy a gpu scheduler
 821  *
 822  * @sched: scheduler instance
 823  *
 824  * Tears down and cleans up the scheduler.
 825  */
 826 void drm_sched_fini(struct drm_gpu_scheduler *sched)
 827 {
 828         if (sched->thread)
 829                 kthread_stop(sched->thread);
 830
 831         sched->ready = false;
 832 }
 833 EXPORT_SYMBOL(drm_sched_fini);