drivers/gpu/drm/amd/scheduler/gpu_scheduler.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  *
  23  */
  24 #include <linux/kthread.h>
  25 #include <linux/wait.h>
  26 #include <linux/sched.h>
  27 #include <drm/drmP.h>
  28 #include "gpu_scheduler.h"
  29
  30 /* Initialize a given run queue struct */
  31 static void init_rq(struct amd_run_queue *rq)
  32 {
  33         INIT_LIST_HEAD(&rq->head.list);
  34         rq->head.belongto_rq = rq;
  35         mutex_init(&rq->lock);
  36         atomic_set(&rq->nr_entity, 0);
  37         rq->current_entity = &rq->head;
  38 }
  39
  40 /* Note: caller must hold the lock or in a atomic context */
  41 static void rq_remove_entity(struct amd_run_queue *rq,
  42                              struct amd_sched_entity *entity)
  43 {
  44         if (rq->current_entity == entity)
  45                 rq->current_entity = list_entry(entity->list.prev,
  46                                                 typeof(*entity), list);
  47         list_del_init(&entity->list);
  48         atomic_dec(&rq->nr_entity);
  49 }
  50
  51 static void rq_add_entity(struct amd_run_queue *rq,
  52                           struct amd_sched_entity *entity)
  53 {
  54         list_add_tail(&entity->list, &rq->head.list);
  55         atomic_inc(&rq->nr_entity);
  56 }
  57
  58 /**
  59  * Select next entity from a specified run queue with round robin policy.
  60  * It could return the same entity as current one if current is the only
  61  * available one in the queue. Return NULL if nothing available.
  62  */
  63 static struct amd_sched_entity *rq_select_entity(struct amd_run_queue *rq)
  64 {
  65         struct amd_sched_entity *p = rq->current_entity;
  66         int i = atomic_read(&rq->nr_entity) + 1; /*real count + dummy head*/
  67
  68         while (i) {
  69                 p = list_entry(p->list.next, typeof(*p), list);
  70                 if (!rq->check_entity_status(p)) {
  71                         rq->current_entity = p;
  72                         break;
  73                 }
  74                 i--;
  75         }
  76         return i ? p : NULL;
  77 }
  78
  79 static bool context_entity_is_waiting(struct amd_sched_entity *entity)
  80 {
  81         /* TODO: sync obj for multi-ring synchronization */
  82         return false;
  83 }
  84
  85 static int gpu_entity_check_status(struct amd_sched_entity *entity)
  86 {
  87         if (entity == &entity->belongto_rq->head)
  88                 return -1;
  89
  90         if (kfifo_is_empty(&entity->job_queue) ||
  91             context_entity_is_waiting(entity))
  92                 return -1;
  93
  94         return 0;
  95 }
  96
  97 /**
  98  * Note: This function should only been called inside scheduler main
  99  * function for thread safety, there is no other protection here.
 100  * return ture if scheduler has something ready to run.
 101  *
 102  * For active_hw_rq, there is only one producer(scheduler thread) and
 103  * one consumer(ISR). It should be safe to use this function in scheduler
 104  * main thread to decide whether to continue emit more IBs.
 105 */
 106 static bool is_scheduler_ready(struct amd_gpu_scheduler *sched)
 107 {
 108         unsigned long flags;
 109         bool full;
 110
 111         spin_lock_irqsave(&sched->queue_lock, flags);
 112         full = atomic64_read(&sched->hw_rq_count) <
 113                 sched->hw_submission_limit ? true : false;
 114         spin_unlock_irqrestore(&sched->queue_lock, flags);
 115
 116         return full;
 117 }
 118
 119 /**
 120  * Select next entity from the kernel run queue, if not available,
 121  * return null.
 122 */
 123 static struct amd_sched_entity *
 124 kernel_rq_select_context(struct amd_gpu_scheduler *sched)
 125 {
 126         struct amd_sched_entity *sched_entity;
 127         struct amd_run_queue *rq = &sched->kernel_rq;
 128
 129         mutex_lock(&rq->lock);
 130         sched_entity = rq_select_entity(rq);
 131         mutex_unlock(&rq->lock);
 132         return sched_entity;
 133 }
 134
 135 /**
 136  * Select next entity containing real IB submissions
 137 */
 138 static struct amd_sched_entity *
 139 select_context(struct amd_gpu_scheduler *sched)
 140 {
 141         struct amd_sched_entity *wake_entity = NULL;
 142         struct amd_sched_entity *tmp;
 143         struct amd_run_queue *rq;
 144
 145         if (!is_scheduler_ready(sched))
 146                 return NULL;
 147
 148         /* Kernel run queue has higher priority than normal run queue*/
 149         tmp = kernel_rq_select_context(sched);
 150         if (tmp != NULL)
 151                 goto exit;
 152
 153         rq = &sched->sched_rq;
 154         mutex_lock(&rq->lock);
 155         tmp = rq_select_entity(rq);
 156         mutex_unlock(&rq->lock);
 157 exit:
 158         if (sched->current_entity && (sched->current_entity != tmp))
 159                 wake_entity = sched->current_entity;
 160         sched->current_entity = tmp;
 161         if (wake_entity)
 162                 wake_up(&wake_entity->wait_queue);
 163         return tmp;
 164 }
 165
 166 /**
 167  * Init a context entity used by scheduler when submit to HW ring.
 168  *
 169  * @sched       The pointer to the scheduler
 170  * @entity      The pointer to a valid amd_sched_entity
 171  * @rq          The run queue this entity belongs
 172  * @kernel      If this is an entity for the kernel
 173  * @jobs        The max number of jobs in the job queue
 174  *
 175  * return 0 if succeed. negative error code on failure
 176 */
 177 int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 178                           struct amd_sched_entity *entity,
 179                           struct amd_run_queue *rq,
 180                           uint32_t jobs)
 181 {
 182         uint64_t seq_ring = 0;
 183         char name[20];
 184
 185         if (!(sched && entity && rq))
 186                 return -EINVAL;
 187
 188         memset(entity, 0, sizeof(struct amd_sched_entity));
 189         seq_ring = ((uint64_t)sched->ring_id) << 60;
 190         spin_lock_init(&entity->lock);
 191         entity->belongto_rq = rq;
 192         entity->scheduler = sched;
 193         init_waitqueue_head(&entity->wait_queue);
 194         init_waitqueue_head(&entity->wait_emit);
 195         entity->fence_context = fence_context_alloc(1);
 196         snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context);
 197         memcpy(entity->name, name, 20);
 198         if(kfifo_alloc(&entity->job_queue,
 199                        jobs * sizeof(void *),
 200                        GFP_KERNEL))
 201                 return -EINVAL;
 202
 203         spin_lock_init(&entity->queue_lock);
 204         atomic64_set(&entity->last_queued_v_seq, seq_ring);
 205         atomic64_set(&entity->last_signaled_v_seq, seq_ring);
 206
 207         /* Add the entity to the run queue */
 208         mutex_lock(&rq->lock);
 209         rq_add_entity(rq, entity);
 210         mutex_unlock(&rq->lock);
 211         return 0;
 212 }
 213
 214 /**
 215  * Query if entity is initialized
 216  *
 217  * @sched       Pointer to scheduler instance
 218  * @entity      The pointer to a valid scheduler entity
 219  *
 220  * return true if entity is initialized, false otherwise
 221 */
 222 static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched,
 223                                           struct amd_sched_entity *entity)
 224 {
 225         return entity->scheduler == sched &&
 226                 entity->belongto_rq != NULL;
 227 }
 228
 229 static bool is_context_entity_idle(struct amd_gpu_scheduler *sched,
 230                                    struct amd_sched_entity *entity)
 231 {
 232         /**
 233          * Idle means no pending IBs, and the entity is not
 234          * currently being used.
 235         */
 236         barrier();
 237         if ((sched->current_entity != entity) &&
 238             kfifo_is_empty(&entity->job_queue))
 239                 return true;
 240
 241         return false;
 242 }
 243
 244 /**
 245  * Destroy a context entity
 246  *
 247  * @sched       Pointer to scheduler instance
 248  * @entity      The pointer to a valid scheduler entity
 249  *
 250  * return 0 if succeed. negative error code on failure
 251  */
 252 int amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
 253                             struct amd_sched_entity *entity)
 254 {
 255         int r = 0;
 256         struct amd_run_queue *rq = entity->belongto_rq;
 257
 258         if (!is_context_entity_initialized(sched, entity))
 259                 return 0;
 260
 261         /**
 262          * The client will not queue more IBs during this fini, consume existing
 263          * queued IBs
 264         */
 265         r = wait_event_timeout(
 266                 entity->wait_queue,
 267                 is_context_entity_idle(sched, entity),
 268                 msecs_to_jiffies(AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS)
 269                 ) ?  0 : -1;
 270
 271         if (r) {
 272                 if (entity->is_pending)
 273                         DRM_INFO("Entity %p is in waiting state during fini,\
 274                                 all pending ibs will be canceled.\n",
 275                                  entity);
 276         }
 277
 278         mutex_lock(&rq->lock);
 279         rq_remove_entity(rq, entity);
 280         mutex_unlock(&rq->lock);
 281         kfifo_free(&entity->job_queue);
 282         return r;
 283 }
 284
 285 /**
 286  * Submit a normal job to the job queue
 287  *
 288  * @sched       The pointer to the scheduler
 289  * @c_entity    The pointer to amd_sched_entity
 290  * @job         The pointer to job required to submit
 291  * return 0 if succeed. -1 if failed.
 292  *        -2 indicate queue is full for this client, client should wait untill
 293  *           scheduler consum some queued command.
 294  *        -1 other fail.
 295 */
 296 int amd_sched_push_job(struct amd_gpu_scheduler *sched,
 297                        struct amd_sched_entity *c_entity,
 298                        void *data,
 299                        struct amd_sched_fence **fence)
 300 {
 301         struct amd_sched_job *job;
 302
 303         if (!fence)
 304                 return -EINVAL;
 305         job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL);
 306         if (!job)
 307                 return -ENOMEM;
 308         job->sched = sched;
 309         job->s_entity = c_entity;
 310         job->data = data;
 311         *fence = amd_sched_fence_create(c_entity);
 312         if ((*fence) == NULL) {
 313                 kfree(job);
 314                 return -EINVAL;
 315         }
 316         fence_get(&(*fence)->base);
 317         job->s_fence = *fence;
 318         while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *),
 319                                    &c_entity->queue_lock) != sizeof(void *)) {
 320                 /**
 321                  * Current context used up all its IB slots
 322                  * wait here, or need to check whether GPU is hung
 323                 */
 324                 schedule();
 325         }
 326
 327         wake_up_interruptible(&sched->wait_queue);
 328         return 0;
 329 }
 330
 331 static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
 332 {
 333         struct amd_sched_job *sched_job =
 334                 container_of(cb, struct amd_sched_job, cb);
 335         struct amd_gpu_scheduler *sched;
 336         unsigned long flags;
 337
 338         sched = sched_job->sched;
 339         atomic64_set(&sched_job->s_entity->last_signaled_v_seq,
 340                      sched_job->s_fence->v_seq);
 341         amd_sched_fence_signal(sched_job->s_fence);
 342         spin_lock_irqsave(&sched->queue_lock, flags);
 343         list_del(&sched_job->list);
 344         atomic64_dec(&sched->hw_rq_count);
 345         spin_unlock_irqrestore(&sched->queue_lock, flags);
 346
 347         sched->ops->process_job(sched, sched_job);
 348         fence_put(&sched_job->s_fence->base);
 349         kfree(sched_job);
 350         wake_up_interruptible(&sched->wait_queue);
 351 }
 352
 353 static int amd_sched_main(void *param)
 354 {
 355         int r;
 356         struct amd_sched_job *job;
 357         struct sched_param sparam = {.sched_priority = 1};
 358         struct amd_sched_entity *c_entity = NULL;
 359         struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
 360
 361         sched_setscheduler(current, SCHED_FIFO, &sparam);
 362
 363         while (!kthread_should_stop()) {
 364                 struct fence *fence;
 365
 366                 wait_event_interruptible(sched->wait_queue,
 367                                          is_scheduler_ready(sched) &&
 368                                          (c_entity = select_context(sched)));
 369                 r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *));
 370                 if (r != sizeof(void *))
 371                         continue;
 372                 r = sched->ops->prepare_job(sched, c_entity, job);
 373                 if (!r) {
 374                         unsigned long flags;
 375                         spin_lock_irqsave(&sched->queue_lock, flags);
 376                         list_add_tail(&job->list, &sched->active_hw_rq);
 377                         atomic64_inc(&sched->hw_rq_count);
 378                         spin_unlock_irqrestore(&sched->queue_lock, flags);
 379                 }
 380                 mutex_lock(&sched->sched_lock);
 381                 fence = sched->ops->run_job(sched, c_entity, job);
 382                 if (fence) {
 383                         r = fence_add_callback(fence, &job->cb,
 384                                                amd_sched_process_job);
 385                         if (r == -ENOENT)
 386                                 amd_sched_process_job(fence, &job->cb);
 387                         else if (r)
 388                                 DRM_ERROR("fence add callback failed (%d)\n", r);
 389                         fence_put(fence);
 390                 }
 391                 mutex_unlock(&sched->sched_lock);
 392         }
 393         return 0;
 394 }
 395
 396 /**
 397  * Create a gpu scheduler
 398  *
 399  * @device      The device context for this scheduler
 400  * @ops         The backend operations for this scheduler.
 401  * @id          The scheduler is per ring, here is ring id.
 402  * @granularity The minumum ms unit the scheduler will scheduled.
 403  * @preemption  Indicate whether this ring support preemption, 0 is no.
 404  *
 405  * return the pointer to scheduler for success, otherwise return NULL
 406 */
 407 struct amd_gpu_scheduler *amd_sched_create(void *device,
 408                                            struct amd_sched_backend_ops *ops,
 409                                            unsigned ring,
 410                                            unsigned granularity,
 411                                            unsigned preemption,
 412                                            unsigned hw_submission)
 413 {
 414         struct amd_gpu_scheduler *sched;
 415         char name[20];
 416
 417         sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL);
 418         if (!sched)
 419                 return NULL;
 420
 421         sched->device = device;
 422         sched->ops = ops;
 423         sched->granularity = granularity;
 424         sched->ring_id = ring;
 425         sched->preemption = preemption;
 426         sched->hw_submission_limit = hw_submission;
 427         snprintf(name, sizeof(name), "gpu_sched[%d]", ring);
 428         mutex_init(&sched->sched_lock);
 429         spin_lock_init(&sched->queue_lock);
 430         init_rq(&sched->sched_rq);
 431         sched->sched_rq.check_entity_status = gpu_entity_check_status;
 432
 433         init_rq(&sched->kernel_rq);
 434         sched->kernel_rq.check_entity_status = gpu_entity_check_status;
 435
 436         init_waitqueue_head(&sched->wait_queue);
 437         INIT_LIST_HEAD(&sched->active_hw_rq);
 438         atomic64_set(&sched->hw_rq_count, 0);
 439         /* Each scheduler will run on a seperate kernel thread */
 440         sched->thread = kthread_create(amd_sched_main, sched, name);
 441         if (sched->thread) {
 442                 wake_up_process(sched->thread);
 443                 return sched;
 444         }
 445
 446         DRM_ERROR("Failed to create scheduler for id %d.\n", ring);
 447         kfree(sched);
 448         return NULL;
 449 }
 450
 451 /**
 452  * Destroy a gpu scheduler
 453  *
 454  * @sched       The pointer to the scheduler
 455  *
 456  * return 0 if succeed. -1 if failed.
 457  */
 458 int amd_sched_destroy(struct amd_gpu_scheduler *sched)
 459 {
 460         kthread_stop(sched->thread);
 461         kfree(sched);
 462         return  0;
 463 }
 464
 465 /**
 466  * Get next queued sequence number
 467  *
 468  * @entity The context entity
 469  *
 470  * return the next queued sequence number
 471 */
 472 uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity)
 473 {
 474         return atomic64_read(&c_entity->last_queued_v_seq) + 1;
 475 }