drm/amdgpu: add reference for **fence
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / scheduler / gpu_scheduler.c
1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  *
23  */
24 #include <linux/kthread.h>
25 #include <linux/wait.h>
26 #include <linux/sched.h>
27 #include <drm/drmP.h>
28 #include "gpu_scheduler.h"
29
30 /* Initialize a given run queue struct */
31 static void init_rq(struct amd_run_queue *rq)
32 {
33         INIT_LIST_HEAD(&rq->head.list);
34         rq->head.belongto_rq = rq;
35         mutex_init(&rq->lock);
36         atomic_set(&rq->nr_entity, 0);
37         rq->current_entity = &rq->head;
38 }
39
40 /* Note: caller must hold the lock or in a atomic context */
41 static void rq_remove_entity(struct amd_run_queue *rq,
42                              struct amd_sched_entity *entity)
43 {
44         if (rq->current_entity == entity)
45                 rq->current_entity = list_entry(entity->list.prev,
46                                                 typeof(*entity), list);
47         list_del_init(&entity->list);
48         atomic_dec(&rq->nr_entity);
49 }
50
51 static void rq_add_entity(struct amd_run_queue *rq,
52                           struct amd_sched_entity *entity)
53 {
54         list_add_tail(&entity->list, &rq->head.list);
55         atomic_inc(&rq->nr_entity);
56 }
57
58 /**
59  * Select next entity from a specified run queue with round robin policy.
60  * It could return the same entity as current one if current is the only
61  * available one in the queue. Return NULL if nothing available.
62  */
63 static struct amd_sched_entity *rq_select_entity(struct amd_run_queue *rq)
64 {
65         struct amd_sched_entity *p = rq->current_entity;
66         int i = atomic_read(&rq->nr_entity) + 1; /*real count + dummy head*/
67
68         while (i) {
69                 p = list_entry(p->list.next, typeof(*p), list);
70                 if (!rq->check_entity_status(p)) {
71                         rq->current_entity = p;
72                         break;
73                 }
74                 i--;
75         }
76         return i ? p : NULL;
77 }
78
79 static bool context_entity_is_waiting(struct amd_sched_entity *entity)
80 {
81         /* TODO: sync obj for multi-ring synchronization */
82         return false;
83 }
84
85 static int gpu_entity_check_status(struct amd_sched_entity *entity)
86 {
87         if (entity == &entity->belongto_rq->head)
88                 return -1;
89
90         if (kfifo_is_empty(&entity->job_queue) ||
91             context_entity_is_waiting(entity))
92                 return -1;
93
94         return 0;
95 }
96
97 /**
98  * Note: This function should only been called inside scheduler main
99  * function for thread safety, there is no other protection here.
100  * return ture if scheduler has something ready to run.
101  *
102  * For active_hw_rq, there is only one producer(scheduler thread) and
103  * one consumer(ISR). It should be safe to use this function in scheduler
104  * main thread to decide whether to continue emit more IBs.
105 */
106 static bool is_scheduler_ready(struct amd_gpu_scheduler *sched)
107 {
108         unsigned long flags;
109         bool full;
110
111         spin_lock_irqsave(&sched->queue_lock, flags);
112         full = atomic64_read(&sched->hw_rq_count) <
113                 sched->hw_submission_limit ? true : false;
114         spin_unlock_irqrestore(&sched->queue_lock, flags);
115
116         return full;
117 }
118
119 /**
120  * Select next entity from the kernel run queue, if not available,
121  * return null.
122 */
123 static struct amd_sched_entity *
124 kernel_rq_select_context(struct amd_gpu_scheduler *sched)
125 {
126         struct amd_sched_entity *sched_entity;
127         struct amd_run_queue *rq = &sched->kernel_rq;
128
129         mutex_lock(&rq->lock);
130         sched_entity = rq_select_entity(rq);
131         mutex_unlock(&rq->lock);
132         return sched_entity;
133 }
134
135 /**
136  * Select next entity containing real IB submissions
137 */
138 static struct amd_sched_entity *
139 select_context(struct amd_gpu_scheduler *sched)
140 {
141         struct amd_sched_entity *wake_entity = NULL;
142         struct amd_sched_entity *tmp;
143         struct amd_run_queue *rq;
144
145         if (!is_scheduler_ready(sched))
146                 return NULL;
147
148         /* Kernel run queue has higher priority than normal run queue*/
149         tmp = kernel_rq_select_context(sched);
150         if (tmp != NULL)
151                 goto exit;
152
153         rq = &sched->sched_rq;
154         mutex_lock(&rq->lock);
155         tmp = rq_select_entity(rq);
156         mutex_unlock(&rq->lock);
157 exit:
158         if (sched->current_entity && (sched->current_entity != tmp))
159                 wake_entity = sched->current_entity;
160         sched->current_entity = tmp;
161         if (wake_entity)
162                 wake_up(&wake_entity->wait_queue);
163         return tmp;
164 }
165
166 /**
167  * Init a context entity used by scheduler when submit to HW ring.
168  *
169  * @sched       The pointer to the scheduler
170  * @entity      The pointer to a valid amd_sched_entity
171  * @rq          The run queue this entity belongs
172  * @kernel      If this is an entity for the kernel
173  * @jobs        The max number of jobs in the job queue
174  *
175  * return 0 if succeed. negative error code on failure
176 */
177 int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
178                           struct amd_sched_entity *entity,
179                           struct amd_run_queue *rq,
180                           uint32_t jobs)
181 {
182         uint64_t seq_ring = 0;
183         char name[20];
184
185         if (!(sched && entity && rq))
186                 return -EINVAL;
187
188         memset(entity, 0, sizeof(struct amd_sched_entity));
189         seq_ring = ((uint64_t)sched->ring_id) << 60;
190         spin_lock_init(&entity->lock);
191         entity->belongto_rq = rq;
192         entity->scheduler = sched;
193         init_waitqueue_head(&entity->wait_queue);
194         init_waitqueue_head(&entity->wait_emit);
195         entity->fence_context = fence_context_alloc(1);
196         snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context);
197         memcpy(entity->name, name, 20);
198         if(kfifo_alloc(&entity->job_queue,
199                        jobs * sizeof(void *),
200                        GFP_KERNEL))
201                 return -EINVAL;
202
203         spin_lock_init(&entity->queue_lock);
204         atomic64_set(&entity->last_queued_v_seq, seq_ring);
205         atomic64_set(&entity->last_signaled_v_seq, seq_ring);
206
207         /* Add the entity to the run queue */
208         mutex_lock(&rq->lock);
209         rq_add_entity(rq, entity);
210         mutex_unlock(&rq->lock);
211         return 0;
212 }
213
214 /**
215  * Query if entity is initialized
216  *
217  * @sched       Pointer to scheduler instance
218  * @entity      The pointer to a valid scheduler entity
219  *
220  * return true if entity is initialized, false otherwise
221 */
222 static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched,
223                                           struct amd_sched_entity *entity)
224 {
225         return entity->scheduler == sched &&
226                 entity->belongto_rq != NULL;
227 }
228
229 static bool is_context_entity_idle(struct amd_gpu_scheduler *sched,
230                                    struct amd_sched_entity *entity)
231 {
232         /**
233          * Idle means no pending IBs, and the entity is not
234          * currently being used.
235         */
236         barrier();
237         if ((sched->current_entity != entity) &&
238             kfifo_is_empty(&entity->job_queue))
239                 return true;
240
241         return false;
242 }
243
244 /**
245  * Destroy a context entity
246  *
247  * @sched       Pointer to scheduler instance
248  * @entity      The pointer to a valid scheduler entity
249  *
250  * return 0 if succeed. negative error code on failure
251  */
252 int amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
253                             struct amd_sched_entity *entity)
254 {
255         int r = 0;
256         struct amd_run_queue *rq = entity->belongto_rq;
257
258         if (!is_context_entity_initialized(sched, entity))
259                 return 0;
260
261         /**
262          * The client will not queue more IBs during this fini, consume existing
263          * queued IBs
264         */
265         r = wait_event_timeout(
266                 entity->wait_queue,
267                 is_context_entity_idle(sched, entity),
268                 msecs_to_jiffies(AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS)
269                 ) ?  0 : -1;
270
271         if (r) {
272                 if (entity->is_pending)
273                         DRM_INFO("Entity %p is in waiting state during fini,\
274                                 all pending ibs will be canceled.\n",
275                                  entity);
276         }
277
278         mutex_lock(&rq->lock);
279         rq_remove_entity(rq, entity);
280         mutex_unlock(&rq->lock);
281         kfifo_free(&entity->job_queue);
282         return r;
283 }
284
285 /**
286  * Submit a normal job to the job queue
287  *
288  * @sched       The pointer to the scheduler
289  * @c_entity    The pointer to amd_sched_entity
290  * @job         The pointer to job required to submit
291  * return 0 if succeed. -1 if failed.
292  *        -2 indicate queue is full for this client, client should wait untill
293  *           scheduler consum some queued command.
294  *        -1 other fail.
295 */
296 int amd_sched_push_job(struct amd_gpu_scheduler *sched,
297                        struct amd_sched_entity *c_entity,
298                        void *data,
299                        struct amd_sched_fence **fence)
300 {
301         struct amd_sched_job *job;
302
303         if (!fence)
304                 return -EINVAL;
305         job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL);
306         if (!job)
307                 return -ENOMEM;
308         job->sched = sched;
309         job->s_entity = c_entity;
310         job->data = data;
311         *fence = amd_sched_fence_create(c_entity);
312         if ((*fence) == NULL) {
313                 kfree(job);
314                 return -EINVAL;
315         }
316         fence_get(&(*fence)->base);
317         job->s_fence = *fence;
318         while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *),
319                                    &c_entity->queue_lock) != sizeof(void *)) {
320                 /**
321                  * Current context used up all its IB slots
322                  * wait here, or need to check whether GPU is hung
323                 */
324                 schedule();
325         }
326
327         wake_up_interruptible(&sched->wait_queue);
328         return 0;
329 }
330
331 static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
332 {
333         struct amd_sched_job *sched_job =
334                 container_of(cb, struct amd_sched_job, cb);
335         struct amd_gpu_scheduler *sched;
336         unsigned long flags;
337
338         sched = sched_job->sched;
339         atomic64_set(&sched_job->s_entity->last_signaled_v_seq,
340                      sched_job->s_fence->v_seq);
341         amd_sched_fence_signal(sched_job->s_fence);
342         spin_lock_irqsave(&sched->queue_lock, flags);
343         list_del(&sched_job->list);
344         atomic64_dec(&sched->hw_rq_count);
345         spin_unlock_irqrestore(&sched->queue_lock, flags);
346
347         sched->ops->process_job(sched, sched_job);
348         fence_put(&sched_job->s_fence->base);
349         kfree(sched_job);
350         wake_up_interruptible(&sched->wait_queue);
351 }
352
353 static int amd_sched_main(void *param)
354 {
355         int r;
356         struct amd_sched_job *job;
357         struct sched_param sparam = {.sched_priority = 1};
358         struct amd_sched_entity *c_entity = NULL;
359         struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
360
361         sched_setscheduler(current, SCHED_FIFO, &sparam);
362
363         while (!kthread_should_stop()) {
364                 struct fence *fence;
365
366                 wait_event_interruptible(sched->wait_queue,
367                                          is_scheduler_ready(sched) &&
368                                          (c_entity = select_context(sched)));
369                 r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *));
370                 if (r != sizeof(void *))
371                         continue;
372                 r = sched->ops->prepare_job(sched, c_entity, job);
373                 if (!r) {
374                         unsigned long flags;
375                         spin_lock_irqsave(&sched->queue_lock, flags);
376                         list_add_tail(&job->list, &sched->active_hw_rq);
377                         atomic64_inc(&sched->hw_rq_count);
378                         spin_unlock_irqrestore(&sched->queue_lock, flags);
379                 }
380                 mutex_lock(&sched->sched_lock);
381                 fence = sched->ops->run_job(sched, c_entity, job);
382                 if (fence) {
383                         r = fence_add_callback(fence, &job->cb,
384                                                amd_sched_process_job);
385                         if (r == -ENOENT)
386                                 amd_sched_process_job(fence, &job->cb);
387                         else if (r)
388                                 DRM_ERROR("fence add callback failed (%d)\n", r);
389                         fence_put(fence);
390                 }
391                 mutex_unlock(&sched->sched_lock);
392         }
393         return 0;
394 }
395
396 /**
397  * Create a gpu scheduler
398  *
399  * @device      The device context for this scheduler
400  * @ops         The backend operations for this scheduler.
401  * @id          The scheduler is per ring, here is ring id.
402  * @granularity The minumum ms unit the scheduler will scheduled.
403  * @preemption  Indicate whether this ring support preemption, 0 is no.
404  *
405  * return the pointer to scheduler for success, otherwise return NULL
406 */
407 struct amd_gpu_scheduler *amd_sched_create(void *device,
408                                            struct amd_sched_backend_ops *ops,
409                                            unsigned ring,
410                                            unsigned granularity,
411                                            unsigned preemption,
412                                            unsigned hw_submission)
413 {
414         struct amd_gpu_scheduler *sched;
415         char name[20];
416
417         sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL);
418         if (!sched)
419                 return NULL;
420
421         sched->device = device;
422         sched->ops = ops;
423         sched->granularity = granularity;
424         sched->ring_id = ring;
425         sched->preemption = preemption;
426         sched->hw_submission_limit = hw_submission;
427         snprintf(name, sizeof(name), "gpu_sched[%d]", ring);
428         mutex_init(&sched->sched_lock);
429         spin_lock_init(&sched->queue_lock);
430         init_rq(&sched->sched_rq);
431         sched->sched_rq.check_entity_status = gpu_entity_check_status;
432
433         init_rq(&sched->kernel_rq);
434         sched->kernel_rq.check_entity_status = gpu_entity_check_status;
435
436         init_waitqueue_head(&sched->wait_queue);
437         INIT_LIST_HEAD(&sched->active_hw_rq);
438         atomic64_set(&sched->hw_rq_count, 0);
439         /* Each scheduler will run on a seperate kernel thread */
440         sched->thread = kthread_create(amd_sched_main, sched, name);
441         if (sched->thread) {
442                 wake_up_process(sched->thread);
443                 return sched;
444         }
445
446         DRM_ERROR("Failed to create scheduler for id %d.\n", ring);
447         kfree(sched);
448         return NULL;
449 }
450
451 /**
452  * Destroy a gpu scheduler
453  *
454  * @sched       The pointer to the scheduler
455  *
456  * return 0 if succeed. -1 if failed.
457  */
458 int amd_sched_destroy(struct amd_gpu_scheduler *sched)
459 {
460         kthread_stop(sched->thread);
461         kfree(sched);
462         return  0;
463 }
464
465 /**
466  * Get next queued sequence number
467  *
468  * @entity The context entity
469  *
470  * return the next queued sequence number
471 */
472 uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity)
473 {
474         return atomic64_read(&c_entity->last_queued_v_seq) + 1;
475 }