drivers/gpu/drm/scheduler/sched_main.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  */
  23
  24 /**
  25  * DOC: Overview
  26  *
  27  * The GPU scheduler provides entities which allow userspace to push jobs
  28  * into software queues which are then scheduled on a hardware run queue.
  29  * The software queues have a priority among them. The scheduler selects the entities
  30  * from the run queue using a FIFO. The scheduler provides dependency handling
  31  * features among jobs. The driver is supposed to provide callback functions for
  32  * backend operations to the scheduler like submitting a job to hardware run queue,
  33  * returning the dependencies of a job etc.
  34  *
  35  * The organisation of the scheduler is the following:
  36  *
  37  * 1. Each hw run queue has one scheduler
  38  * 2. Each scheduler has multiple run queues with different priorities
  39  *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
  40  * 3. Each scheduler run queue has a queue of entities to schedule
  41  * 4. Entities themselves maintain a queue of jobs that will be scheduled on
  42  *    the hardware.
  43  *
  44  * The jobs in a entity are always scheduled in the order that they were pushed.
  45  */
  46
  47 #include <linux/kthread.h>
  48 #include <linux/wait.h>
  49 #include <linux/sched.h>
  50 #include <linux/completion.h>
  51 #include <uapi/linux/sched/types.h>
  52
  53 #include <drm/drm_print.h>
  54 #include <drm/gpu_scheduler.h>
  55 #include <drm/spsc_queue.h>
  56
  57 #define CREATE_TRACE_POINTS
  58 #include "gpu_scheduler_trace.h"
  59
  60 #define to_drm_sched_job(sched_job)             \
  61                 container_of((sched_job), struct drm_sched_job, queue_node)
  62
  63 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
  64
  65 /**
  66  * drm_sched_rq_init - initialize a given run queue struct
  67  *
  68  * @rq: scheduler run queue
  69  *
  70  * Initializes a scheduler runqueue.
  71  */
  72 static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
  73                               struct drm_sched_rq *rq)
  74 {
  75         spin_lock_init(&rq->lock);
  76         INIT_LIST_HEAD(&rq->entities);
  77         rq->current_entity = NULL;
  78         rq->sched = sched;
  79 }
  80
  81 /**
  82  * drm_sched_rq_add_entity - add an entity
  83  *
  84  * @rq: scheduler run queue
  85  * @entity: scheduler entity
  86  *
  87  * Adds a scheduler entity to the run queue.
  88  */
  89 void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
  90                              struct drm_sched_entity *entity)
  91 {
  92         if (!list_empty(&entity->list))
  93                 return;
  94         spin_lock(&rq->lock);
  95         list_add_tail(&entity->list, &rq->entities);
  96         spin_unlock(&rq->lock);
  97 }
  98
  99 /**
 100  * drm_sched_rq_remove_entity - remove an entity
 101  *
 102  * @rq: scheduler run queue
 103  * @entity: scheduler entity
 104  *
 105  * Removes a scheduler entity from the run queue.
 106  */
 107 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 108                                 struct drm_sched_entity *entity)
 109 {
 110         if (list_empty(&entity->list))
 111                 return;
 112         spin_lock(&rq->lock);
 113         list_del_init(&entity->list);
 114         if (rq->current_entity == entity)
 115                 rq->current_entity = NULL;
 116         spin_unlock(&rq->lock);
 117 }
 118
 119 /**
 120  * drm_sched_rq_select_entity - Select an entity which could provide a job to run
 121  *
 122  * @rq: scheduler run queue to check.
 123  *
 124  * Try to find a ready entity, returns NULL if none found.
 125  */
 126 static struct drm_sched_entity *
 127 drm_sched_rq_select_entity(struct drm_sched_rq *rq)
 128 {
 129         struct drm_sched_entity *entity;
 130
 131         spin_lock(&rq->lock);
 132
 133         entity = rq->current_entity;
 134         if (entity) {
 135                 list_for_each_entry_continue(entity, &rq->entities, list) {
 136                         if (drm_sched_entity_is_ready(entity)) {
 137                                 rq->current_entity = entity;
 138                                 reinit_completion(&entity->entity_idle);
 139                                 spin_unlock(&rq->lock);
 140                                 return entity;
 141                         }
 142                 }
 143         }
 144
 145         list_for_each_entry(entity, &rq->entities, list) {
 146
 147                 if (drm_sched_entity_is_ready(entity)) {
 148                         rq->current_entity = entity;
 149                         reinit_completion(&entity->entity_idle);
 150                         spin_unlock(&rq->lock);
 151                         return entity;
 152                 }
 153
 154                 if (entity == rq->current_entity)
 155                         break;
 156         }
 157
 158         spin_unlock(&rq->lock);
 159
 160         return NULL;
 161 }
 162
 163 /**
 164  * drm_sched_dependency_optimized
 165  *
 166  * @fence: the dependency fence
 167  * @entity: the entity which depends on the above fence
 168  *
 169  * Returns true if the dependency can be optimized and false otherwise
 170  */
 171 bool drm_sched_dependency_optimized(struct dma_fence* fence,
 172                                     struct drm_sched_entity *entity)
 173 {
 174         struct drm_gpu_scheduler *sched = entity->rq->sched;
 175         struct drm_sched_fence *s_fence;
 176
 177         if (!fence || dma_fence_is_signaled(fence))
 178                 return false;
 179         if (fence->context == entity->fence_context)
 180                 return true;
 181         s_fence = to_drm_sched_fence(fence);
 182         if (s_fence && s_fence->sched == sched)
 183                 return true;
 184
 185         return false;
 186 }
 187 EXPORT_SYMBOL(drm_sched_dependency_optimized);
 188
 189 /**
 190  * drm_sched_start_timeout - start timeout for reset worker
 191  *
 192  * @sched: scheduler instance to start the worker for
 193  *
 194  * Start the timeout for the given scheduler.
 195  */
 196 static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
 197 {
 198         if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 199             !list_empty(&sched->ring_mirror_list))
 200                 schedule_delayed_work(&sched->work_tdr, sched->timeout);
 201 }
 202
 203 /**
 204  * drm_sched_fault - immediately start timeout handler
 205  *
 206  * @sched: scheduler where the timeout handling should be started.
 207  *
 208  * Start timeout handling immediately when the driver detects a hardware fault.
 209  */
 210 void drm_sched_fault(struct drm_gpu_scheduler *sched)
 211 {
 212         mod_delayed_work(system_wq, &sched->work_tdr, 0);
 213 }
 214 EXPORT_SYMBOL(drm_sched_fault);
 215
 216 /**
 217  * drm_sched_suspend_timeout - Suspend scheduler job timeout
 218  *
 219  * @sched: scheduler instance for which to suspend the timeout
 220  *
 221  * Suspend the delayed work timeout for the scheduler. This is done by
 222  * modifying the delayed work timeout to an arbitrary large value,
 223  * MAX_SCHEDULE_TIMEOUT in this case.
 224  *
 225  * Returns the timeout remaining
 226  *
 227  */
 228 unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
 229 {
 230         unsigned long sched_timeout, now = jiffies;
 231
 232         sched_timeout = sched->work_tdr.timer.expires;
 233
 234         /*
 235          * Modify the timeout to an arbitrarily large value. This also prevents
 236          * the timeout to be restarted when new submissions arrive
 237          */
 238         if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
 239                         && time_after(sched_timeout, now))
 240                 return sched_timeout - now;
 241         else
 242                 return sched->timeout;
 243 }
 244 EXPORT_SYMBOL(drm_sched_suspend_timeout);
 245
 246 /**
 247  * drm_sched_resume_timeout - Resume scheduler job timeout
 248  *
 249  * @sched: scheduler instance for which to resume the timeout
 250  * @remaining: remaining timeout
 251  *
 252  * Resume the delayed work timeout for the scheduler.
 253  */
 254 void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
 255                 unsigned long remaining)
 256 {
 257         spin_lock(&sched->job_list_lock);
 258
 259         if (list_empty(&sched->ring_mirror_list))
 260                 cancel_delayed_work(&sched->work_tdr);
 261         else
 262                 mod_delayed_work(system_wq, &sched->work_tdr, remaining);
 263
 264         spin_unlock(&sched->job_list_lock);
 265 }
 266 EXPORT_SYMBOL(drm_sched_resume_timeout);
 267
 268 static void drm_sched_job_begin(struct drm_sched_job *s_job)
 269 {
 270         struct drm_gpu_scheduler *sched = s_job->sched;
 271
 272         spin_lock(&sched->job_list_lock);
 273         list_add_tail(&s_job->node, &sched->ring_mirror_list);
 274         drm_sched_start_timeout(sched);
 275         spin_unlock(&sched->job_list_lock);
 276 }
 277
 278 static void drm_sched_job_timedout(struct work_struct *work)
 279 {
 280         struct drm_gpu_scheduler *sched;
 281         struct drm_sched_job *job;
 282
 283         sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 284
 285         /* Protects against concurrent deletion in drm_sched_get_cleanup_job */
 286         spin_lock(&sched->job_list_lock);
 287         job = list_first_entry_or_null(&sched->ring_mirror_list,
 288                                        struct drm_sched_job, node);
 289
 290         if (job) {
 291                 /*
 292                  * Remove the bad job so it cannot be freed by concurrent
 293                  * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
 294                  * is parked at which point it's safe.
 295                  */
 296                 list_del_init(&job->node);
 297                 spin_unlock(&sched->job_list_lock);
 298
 299                 job->sched->ops->timedout_job(job);
 300
 301                 /*
 302                  * Guilty job did complete and hence needs to be manually removed
 303                  * See drm_sched_stop doc.
 304                  */
 305                 if (sched->free_guilty) {
 306                         job->sched->ops->free_job(job);
 307                         sched->free_guilty = false;
 308                 }
 309         } else {
 310                 spin_unlock(&sched->job_list_lock);
 311         }
 312
 313         spin_lock(&sched->job_list_lock);
 314         drm_sched_start_timeout(sched);
 315         spin_unlock(&sched->job_list_lock);
 316 }
 317
 318  /**
 319   * drm_sched_increase_karma - Update sched_entity guilty flag
 320   *
 321   * @bad: The job guilty of time out
 322   *
 323   * Increment on every hang caused by the 'bad' job. If this exceeds the hang
 324   * limit of the scheduler then the respective sched entity is marked guilty and
 325   * jobs from it will not be scheduled further
 326   */
 327 void drm_sched_increase_karma(struct drm_sched_job *bad)
 328 {
 329         int i;
 330         struct drm_sched_entity *tmp;
 331         struct drm_sched_entity *entity;
 332         struct drm_gpu_scheduler *sched = bad->sched;
 333
 334         /* don't increase @bad's karma if it's from KERNEL RQ,
 335          * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
 336          * corrupt but keep in mind that kernel jobs always considered good.
 337          */
 338         if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 339                 atomic_inc(&bad->karma);
 340                 for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
 341                      i++) {
 342                         struct drm_sched_rq *rq = &sched->sched_rq[i];
 343
 344                         spin_lock(&rq->lock);
 345                         list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
 346                                 if (bad->s_fence->scheduled.context ==
 347                                     entity->fence_context) {
 348                                         if (atomic_read(&bad->karma) >
 349                                             bad->sched->hang_limit)
 350                                                 if (entity->guilty)
 351                                                         atomic_set(entity->guilty, 1);
 352                                         break;
 353                                 }
 354                         }
 355                         spin_unlock(&rq->lock);
 356                         if (&entity->list != &rq->entities)
 357                                 break;
 358                 }
 359         }
 360 }
 361 EXPORT_SYMBOL(drm_sched_increase_karma);
 362
 363 /**
 364  * drm_sched_stop - stop the scheduler
 365  *
 366  * @sched: scheduler instance
 367  * @bad: job which caused the time out
 368  *
 369  * Stop the scheduler and also removes and frees all completed jobs.
 370  * Note: bad job will not be freed as it might be used later and so it's
 371  * callers responsibility to release it manually if it's not part of the
 372  * mirror list any more.
 373  *
 374  */
 375 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 376 {
 377         struct drm_sched_job *s_job, *tmp;
 378
 379         kthread_park(sched->thread);
 380
 381         /*
 382          * Reinsert back the bad job here - now it's safe as
 383          * drm_sched_get_cleanup_job cannot race against us and release the
 384          * bad job at this point - we parked (waited for) any in progress
 385          * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
 386          * now until the scheduler thread is unparked.
 387          */
 388         if (bad && bad->sched == sched)
 389                 /*
 390                  * Add at the head of the queue to reflect it was the earliest
 391                  * job extracted.
 392                  */
 393                 list_add(&bad->node, &sched->ring_mirror_list);
 394
 395         /*
 396          * Iterate the job list from later to  earlier one and either deactive
 397          * their HW callbacks or remove them from mirror list if they already
 398          * signaled.
 399          * This iteration is thread safe as sched thread is stopped.
 400          */
 401         list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
 402                 if (s_job->s_fence->parent &&
 403                     dma_fence_remove_callback(s_job->s_fence->parent,
 404                                               &s_job->cb)) {
 405                         atomic_dec(&sched->hw_rq_count);
 406                 } else {
 407                         /*
 408                          * remove job from ring_mirror_list.
 409                          * Locking here is for concurrent resume timeout
 410                          */
 411                         spin_lock(&sched->job_list_lock);
 412                         list_del_init(&s_job->node);
 413                         spin_unlock(&sched->job_list_lock);
 414
 415                         /*
 416                          * Wait for job's HW fence callback to finish using s_job
 417                          * before releasing it.
 418                          *
 419                          * Job is still alive so fence refcount at least 1
 420                          */
 421                         dma_fence_wait(&s_job->s_fence->finished, false);
 422
 423                         /*
 424                          * We must keep bad job alive for later use during
 425                          * recovery by some of the drivers but leave a hint
 426                          * that the guilty job must be released.
 427                          */
 428                         if (bad != s_job)
 429                                 sched->ops->free_job(s_job);
 430                         else
 431                                 sched->free_guilty = true;
 432                 }
 433         }
 434
 435         /*
 436          * Stop pending timer in flight as we rearm it in  drm_sched_start. This
 437          * avoids the pending timeout work in progress to fire right away after
 438          * this TDR finished and before the newly restarted jobs had a
 439          * chance to complete.
 440          */
 441         cancel_delayed_work(&sched->work_tdr);
 442 }
 443
 444 EXPORT_SYMBOL(drm_sched_stop);
 445
 446 /**
 447  * drm_sched_job_recovery - recover jobs after a reset
 448  *
 449  * @sched: scheduler instance
 450  * @full_recovery: proceed with complete sched restart
 451  *
 452  */
 453 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 454 {
 455         struct drm_sched_job *s_job, *tmp;
 456         int r;
 457
 458         /*
 459          * Locking the list is not required here as the sched thread is parked
 460          * so no new jobs are being inserted or removed. Also concurrent
 461          * GPU recovers can't run in parallel.
 462          */
 463         list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 464                 struct dma_fence *fence = s_job->s_fence->parent;
 465
 466                 atomic_inc(&sched->hw_rq_count);
 467
 468                 if (!full_recovery)
 469                         continue;
 470
 471                 if (fence) {
 472                         r = dma_fence_add_callback(fence, &s_job->cb,
 473                                                    drm_sched_process_job);
 474                         if (r == -ENOENT)
 475                                 drm_sched_process_job(fence, &s_job->cb);
 476                         else if (r)
 477                                 DRM_ERROR("fence add callback failed (%d)\n",
 478                                           r);
 479                 } else
 480                         drm_sched_process_job(NULL, &s_job->cb);
 481         }
 482
 483         if (full_recovery) {
 484                 spin_lock(&sched->job_list_lock);
 485                 drm_sched_start_timeout(sched);
 486                 spin_unlock(&sched->job_list_lock);
 487         }
 488
 489         kthread_unpark(sched->thread);
 490 }
 491 EXPORT_SYMBOL(drm_sched_start);
 492
 493 /**
 494  * drm_sched_resubmit_jobs - helper to relunch job from mirror ring list
 495  *
 496  * @sched: scheduler instance
 497  *
 498  */
 499 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 500 {
 501         struct drm_sched_job *s_job, *tmp;
 502         uint64_t guilty_context;
 503         bool found_guilty = false;
 504         struct dma_fence *fence;
 505
 506         list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 507                 struct drm_sched_fence *s_fence = s_job->s_fence;
 508
 509                 if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
 510                         found_guilty = true;
 511                         guilty_context = s_job->s_fence->scheduled.context;
 512                 }
 513
 514                 if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 515                         dma_fence_set_error(&s_fence->finished, -ECANCELED);
 516
 517                 dma_fence_put(s_job->s_fence->parent);
 518                 fence = sched->ops->run_job(s_job);
 519
 520                 if (IS_ERR_OR_NULL(fence)) {
 521                         if (IS_ERR(fence))
 522                                 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 523
 524                         s_job->s_fence->parent = NULL;
 525                 } else {
 526                         s_job->s_fence->parent = fence;
 527                 }
 528
 529
 530         }
 531 }
 532 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
 533
 534 /**
 535  * drm_sched_job_init - init a scheduler job
 536  *
 537  * @job: scheduler job to init
 538  * @entity: scheduler entity to use
 539  * @owner: job owner for debugging
 540  *
 541  * Refer to drm_sched_entity_push_job() documentation
 542  * for locking considerations.
 543  *
 544  * Returns 0 for success, negative error code otherwise.
 545  */
 546 int drm_sched_job_init(struct drm_sched_job *job,
 547                        struct drm_sched_entity *entity,
 548                        void *owner)
 549 {
 550         struct drm_gpu_scheduler *sched;
 551
 552         drm_sched_entity_select_rq(entity);
 553         if (!entity->rq)
 554                 return -ENOENT;
 555
 556         sched = entity->rq->sched;
 557
 558         job->sched = sched;
 559         job->entity = entity;
 560         job->s_priority = entity->rq - sched->sched_rq;
 561         job->s_fence = drm_sched_fence_create(entity, owner);
 562         if (!job->s_fence)
 563                 return -ENOMEM;
 564         job->id = atomic64_inc_return(&sched->job_id_count);
 565
 566         INIT_LIST_HEAD(&job->node);
 567
 568         return 0;
 569 }
 570 EXPORT_SYMBOL(drm_sched_job_init);
 571
 572 /**
 573  * drm_sched_job_cleanup - clean up scheduler job resources
 574  *
 575  * @job: scheduler job to clean up
 576  */
 577 void drm_sched_job_cleanup(struct drm_sched_job *job)
 578 {
 579         dma_fence_put(&job->s_fence->finished);
 580         job->s_fence = NULL;
 581 }
 582 EXPORT_SYMBOL(drm_sched_job_cleanup);
 583
 584 /**
 585  * drm_sched_ready - is the scheduler ready
 586  *
 587  * @sched: scheduler instance
 588  *
 589  * Return true if we can push more jobs to the hw, otherwise false.
 590  */
 591 static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
 592 {
 593         return atomic_read(&sched->hw_rq_count) <
 594                 sched->hw_submission_limit;
 595 }
 596
 597 /**
 598  * drm_sched_wakeup - Wake up the scheduler when it is ready
 599  *
 600  * @sched: scheduler instance
 601  *
 602  */
 603 void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 604 {
 605         if (drm_sched_ready(sched))
 606                 wake_up_interruptible(&sched->wake_up_worker);
 607 }
 608
 609 /**
 610  * drm_sched_select_entity - Select next entity to process
 611  *
 612  * @sched: scheduler instance
 613  *
 614  * Returns the entity to process or NULL if none are found.
 615  */
 616 static struct drm_sched_entity *
 617 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 618 {
 619         struct drm_sched_entity *entity;
 620         int i;
 621
 622         if (!drm_sched_ready(sched))
 623                 return NULL;
 624
 625         /* Kernel run queue has higher priority than normal run queue*/
 626         for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 627                 entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
 628                 if (entity)
 629                         break;
 630         }
 631
 632         return entity;
 633 }
 634
 635 /**
 636  * drm_sched_process_job - process a job
 637  *
 638  * @f: fence
 639  * @cb: fence callbacks
 640  *
 641  * Called after job has finished execution.
 642  */
 643 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 644 {
 645         struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
 646         struct drm_sched_fence *s_fence = s_job->s_fence;
 647         struct drm_gpu_scheduler *sched = s_fence->sched;
 648
 649         atomic_dec(&sched->hw_rq_count);
 650         atomic_dec(&sched->num_jobs);
 651
 652         trace_drm_sched_process_job(s_fence);
 653
 654         dma_fence_get(&s_fence->finished);
 655         drm_sched_fence_finished(s_fence);
 656         dma_fence_put(&s_fence->finished);
 657         wake_up_interruptible(&sched->wake_up_worker);
 658 }
 659
 660 /**
 661  * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
 662  *
 663  * @sched: scheduler instance
 664  *
 665  * Returns the next finished job from the mirror list (if there is one)
 666  * ready for it to be destroyed.
 667  */
 668 static struct drm_sched_job *
 669 drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 670 {
 671         struct drm_sched_job *job;
 672
 673         /*
 674          * Don't destroy jobs while the timeout worker is running  OR thread
 675          * is being parked and hence assumed to not touch ring_mirror_list
 676          */
 677         if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 678             !cancel_delayed_work(&sched->work_tdr)) ||
 679             __kthread_should_park(sched->thread))
 680                 return NULL;
 681
 682         spin_lock(&sched->job_list_lock);
 683
 684         job = list_first_entry_or_null(&sched->ring_mirror_list,
 685                                        struct drm_sched_job, node);
 686
 687         if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
 688                 /* remove job from ring_mirror_list */
 689                 list_del_init(&job->node);
 690         } else {
 691                 job = NULL;
 692                 /* queue timeout for next job */
 693                 drm_sched_start_timeout(sched);
 694         }
 695
 696         spin_unlock(&sched->job_list_lock);
 697
 698         return job;
 699 }
 700
 701 /**
 702  * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
 703  * @sched_list: list of drm_gpu_schedulers
 704  * @num_sched_list: number of drm_gpu_schedulers in the sched_list
 705  *
 706  * Returns pointer of the sched with the least load or NULL if none of the
 707  * drm_gpu_schedulers are ready
 708  */
 709 struct drm_gpu_scheduler *
 710 drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
 711                      unsigned int num_sched_list)
 712 {
 713         struct drm_gpu_scheduler *sched, *picked_sched = NULL;
 714         int i;
 715         unsigned int min_jobs = UINT_MAX, num_jobs;
 716
 717         for (i = 0; i < num_sched_list; ++i) {
 718                 sched = sched_list[i];
 719
 720                 if (!sched->ready) {
 721                         DRM_WARN("scheduler %s is not ready, skipping",
 722                                  sched->name);
 723                         continue;
 724                 }
 725
 726                 num_jobs = atomic_read(&sched->num_jobs);
 727                 if (num_jobs < min_jobs) {
 728                         min_jobs = num_jobs;
 729                         picked_sched = sched;
 730                 }
 731         }
 732
 733         return picked_sched;
 734 }
 735 EXPORT_SYMBOL(drm_sched_pick_best);
 736
 737 /**
 738  * drm_sched_blocked - check if the scheduler is blocked
 739  *
 740  * @sched: scheduler instance
 741  *
 742  * Returns true if blocked, otherwise false.
 743  */
 744 static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
 745 {
 746         if (kthread_should_park()) {
 747                 kthread_parkme();
 748                 return true;
 749         }
 750
 751         return false;
 752 }
 753
 754 /**
 755  * drm_sched_main - main scheduler thread
 756  *
 757  * @param: scheduler instance
 758  *
 759  * Returns 0.
 760  */
 761 static int drm_sched_main(void *param)
 762 {
 763         struct sched_param sparam = {.sched_priority = 1};
 764         struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
 765         int r;
 766
 767         sched_setscheduler(current, SCHED_FIFO, &sparam);
 768
 769         while (!kthread_should_stop()) {
 770                 struct drm_sched_entity *entity = NULL;
 771                 struct drm_sched_fence *s_fence;
 772                 struct drm_sched_job *sched_job;
 773                 struct dma_fence *fence;
 774                 struct drm_sched_job *cleanup_job = NULL;
 775
 776                 wait_event_interruptible(sched->wake_up_worker,
 777                                          (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
 778                                          (!drm_sched_blocked(sched) &&
 779                                           (entity = drm_sched_select_entity(sched))) ||
 780                                          kthread_should_stop());
 781
 782                 if (cleanup_job) {
 783                         sched->ops->free_job(cleanup_job);
 784                         /* queue timeout for next job */
 785                         drm_sched_start_timeout(sched);
 786                 }
 787
 788                 if (!entity)
 789                         continue;
 790
 791                 sched_job = drm_sched_entity_pop_job(entity);
 792
 793                 complete(&entity->entity_idle);
 794
 795                 if (!sched_job)
 796                         continue;
 797
 798                 s_fence = sched_job->s_fence;
 799
 800                 atomic_inc(&sched->hw_rq_count);
 801                 drm_sched_job_begin(sched_job);
 802
 803                 trace_drm_run_job(sched_job, entity);
 804                 fence = sched->ops->run_job(sched_job);
 805                 drm_sched_fence_scheduled(s_fence);
 806
 807                 if (!IS_ERR_OR_NULL(fence)) {
 808                         s_fence->parent = dma_fence_get(fence);
 809                         r = dma_fence_add_callback(fence, &sched_job->cb,
 810                                                    drm_sched_process_job);
 811                         if (r == -ENOENT)
 812                                 drm_sched_process_job(fence, &sched_job->cb);
 813                         else if (r)
 814                                 DRM_ERROR("fence add callback failed (%d)\n",
 815                                           r);
 816                         dma_fence_put(fence);
 817                 } else {
 818                         if (IS_ERR(fence))
 819                                 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 820
 821                         drm_sched_process_job(NULL, &sched_job->cb);
 822                 }
 823
 824                 wake_up(&sched->job_scheduled);
 825         }
 826         return 0;
 827 }
 828
 829 /**
 830  * drm_sched_init - Init a gpu scheduler instance
 831  *
 832  * @sched: scheduler instance
 833  * @ops: backend operations for this scheduler
 834  * @hw_submission: number of hw submissions that can be in flight
 835  * @hang_limit: number of times to allow a job to hang before dropping it
 836  * @timeout: timeout value in jiffies for the scheduler
 837  * @name: name used for debugging
 838  *
 839  * Return 0 on success, otherwise error code.
 840  */
 841 int drm_sched_init(struct drm_gpu_scheduler *sched,
 842                    const struct drm_sched_backend_ops *ops,
 843                    unsigned hw_submission,
 844                    unsigned hang_limit,
 845                    long timeout,
 846                    const char *name)
 847 {
 848         int i, ret;
 849         sched->ops = ops;
 850         sched->hw_submission_limit = hw_submission;
 851         sched->name = name;
 852         sched->timeout = timeout;
 853         sched->hang_limit = hang_limit;
 854         for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
 855                 drm_sched_rq_init(sched, &sched->sched_rq[i]);
 856
 857         init_waitqueue_head(&sched->wake_up_worker);
 858         init_waitqueue_head(&sched->job_scheduled);
 859         INIT_LIST_HEAD(&sched->ring_mirror_list);
 860         spin_lock_init(&sched->job_list_lock);
 861         atomic_set(&sched->hw_rq_count, 0);
 862         INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 863         atomic_set(&sched->num_jobs, 0);
 864         atomic64_set(&sched->job_id_count, 0);
 865
 866         /* Each scheduler will run on a seperate kernel thread */
 867         sched->thread = kthread_run(drm_sched_main, sched, sched->name);
 868         if (IS_ERR(sched->thread)) {
 869                 ret = PTR_ERR(sched->thread);
 870                 sched->thread = NULL;
 871                 DRM_ERROR("Failed to create scheduler for %s.\n", name);
 872                 return ret;
 873         }
 874
 875         sched->ready = true;
 876         return 0;
 877 }
 878 EXPORT_SYMBOL(drm_sched_init);
 879
 880 /**
 881  * drm_sched_fini - Destroy a gpu scheduler
 882  *
 883  * @sched: scheduler instance
 884  *
 885  * Tears down and cleans up the scheduler.
 886  */
 887 void drm_sched_fini(struct drm_gpu_scheduler *sched)
 888 {
 889         if (sched->thread)
 890                 kthread_stop(sched->thread);
 891
 892         sched->ready = false;
 893 }
 894 EXPORT_SYMBOL(drm_sched_fini);