drm/i915/uapi: reject set_domain for discrete
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / i915_scheduler.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6
7 #include <linux/mutex.h>
8
9 #include "i915_drv.h"
10 #include "i915_globals.h"
11 #include "i915_request.h"
12 #include "i915_scheduler.h"
13
14 static struct i915_global_scheduler {
15         struct i915_global base;
16         struct kmem_cache *slab_dependencies;
17         struct kmem_cache *slab_priorities;
18 } global;
19
20 static DEFINE_SPINLOCK(schedule_lock);
21
22 static const struct i915_request *
23 node_to_request(const struct i915_sched_node *node)
24 {
25         return container_of(node, const struct i915_request, sched);
26 }
27
28 static inline bool node_started(const struct i915_sched_node *node)
29 {
30         return i915_request_started(node_to_request(node));
31 }
32
33 static inline bool node_signaled(const struct i915_sched_node *node)
34 {
35         return i915_request_completed(node_to_request(node));
36 }
37
38 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
39 {
40         return rb_entry(rb, struct i915_priolist, node);
41 }
42
43 static void assert_priolists(struct i915_sched_engine * const sched_engine)
44 {
45         struct rb_node *rb;
46         long last_prio;
47
48         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
49                 return;
50
51         GEM_BUG_ON(rb_first_cached(&sched_engine->queue) !=
52                    rb_first(&sched_engine->queue.rb_root));
53
54         last_prio = INT_MAX;
55         for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
56                 const struct i915_priolist *p = to_priolist(rb);
57
58                 GEM_BUG_ON(p->priority > last_prio);
59                 last_prio = p->priority;
60         }
61 }
62
63 struct list_head *
64 i915_sched_lookup_priolist(struct i915_sched_engine *sched_engine, int prio)
65 {
66         struct i915_priolist *p;
67         struct rb_node **parent, *rb;
68         bool first = true;
69
70         lockdep_assert_held(&sched_engine->lock);
71         assert_priolists(sched_engine);
72
73         if (unlikely(sched_engine->no_priolist))
74                 prio = I915_PRIORITY_NORMAL;
75
76 find_priolist:
77         /* most positive priority is scheduled first, equal priorities fifo */
78         rb = NULL;
79         parent = &sched_engine->queue.rb_root.rb_node;
80         while (*parent) {
81                 rb = *parent;
82                 p = to_priolist(rb);
83                 if (prio > p->priority) {
84                         parent = &rb->rb_left;
85                 } else if (prio < p->priority) {
86                         parent = &rb->rb_right;
87                         first = false;
88                 } else {
89                         return &p->requests;
90                 }
91         }
92
93         if (prio == I915_PRIORITY_NORMAL) {
94                 p = &sched_engine->default_priolist;
95         } else {
96                 p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
97                 /* Convert an allocation failure to a priority bump */
98                 if (unlikely(!p)) {
99                         prio = I915_PRIORITY_NORMAL; /* recurses just once */
100
101                         /* To maintain ordering with all rendering, after an
102                          * allocation failure we have to disable all scheduling.
103                          * Requests will then be executed in fifo, and schedule
104                          * will ensure that dependencies are emitted in fifo.
105                          * There will be still some reordering with existing
106                          * requests, so if userspace lied about their
107                          * dependencies that reordering may be visible.
108                          */
109                         sched_engine->no_priolist = true;
110                         goto find_priolist;
111                 }
112         }
113
114         p->priority = prio;
115         INIT_LIST_HEAD(&p->requests);
116
117         rb_link_node(&p->node, rb, parent);
118         rb_insert_color_cached(&p->node, &sched_engine->queue, first);
119
120         return &p->requests;
121 }
122
123 void __i915_priolist_free(struct i915_priolist *p)
124 {
125         kmem_cache_free(global.slab_priorities, p);
126 }
127
128 struct sched_cache {
129         struct list_head *priolist;
130 };
131
132 static struct i915_sched_engine *
133 lock_sched_engine(struct i915_sched_node *node,
134                   struct i915_sched_engine *locked,
135                   struct sched_cache *cache)
136 {
137         const struct i915_request *rq = node_to_request(node);
138         struct i915_sched_engine *sched_engine;
139
140         GEM_BUG_ON(!locked);
141
142         /*
143          * Virtual engines complicate acquiring the engine timeline lock,
144          * as their rq->engine pointer is not stable until under that
145          * engine lock. The simple ploy we use is to take the lock then
146          * check that the rq still belongs to the newly locked engine.
147          */
148         while (locked != (sched_engine = READ_ONCE(rq->engine)->sched_engine)) {
149                 spin_unlock(&locked->lock);
150                 memset(cache, 0, sizeof(*cache));
151                 spin_lock(&sched_engine->lock);
152                 locked = sched_engine;
153         }
154
155         GEM_BUG_ON(locked != sched_engine);
156         return locked;
157 }
158
159 static void __i915_schedule(struct i915_sched_node *node,
160                             const struct i915_sched_attr *attr)
161 {
162         const int prio = max(attr->priority, node->attr.priority);
163         struct i915_sched_engine *sched_engine;
164         struct i915_dependency *dep, *p;
165         struct i915_dependency stack;
166         struct sched_cache cache;
167         LIST_HEAD(dfs);
168
169         /* Needed in order to use the temporary link inside i915_dependency */
170         lockdep_assert_held(&schedule_lock);
171         GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
172
173         if (node_signaled(node))
174                 return;
175
176         stack.signaler = node;
177         list_add(&stack.dfs_link, &dfs);
178
179         /*
180          * Recursively bump all dependent priorities to match the new request.
181          *
182          * A naive approach would be to use recursion:
183          * static void update_priorities(struct i915_sched_node *node, prio) {
184          *      list_for_each_entry(dep, &node->signalers_list, signal_link)
185          *              update_priorities(dep->signal, prio)
186          *      queue_request(node);
187          * }
188          * but that may have unlimited recursion depth and so runs a very
189          * real risk of overunning the kernel stack. Instead, we build
190          * a flat list of all dependencies starting with the current request.
191          * As we walk the list of dependencies, we add all of its dependencies
192          * to the end of the list (this may include an already visited
193          * request) and continue to walk onwards onto the new dependencies. The
194          * end result is a topological list of requests in reverse order, the
195          * last element in the list is the request we must execute first.
196          */
197         list_for_each_entry(dep, &dfs, dfs_link) {
198                 struct i915_sched_node *node = dep->signaler;
199
200                 /* If we are already flying, we know we have no signalers */
201                 if (node_started(node))
202                         continue;
203
204                 /*
205                  * Within an engine, there can be no cycle, but we may
206                  * refer to the same dependency chain multiple times
207                  * (redundant dependencies are not eliminated) and across
208                  * engines.
209                  */
210                 list_for_each_entry(p, &node->signalers_list, signal_link) {
211                         GEM_BUG_ON(p == dep); /* no cycles! */
212
213                         if (node_signaled(p->signaler))
214                                 continue;
215
216                         if (prio > READ_ONCE(p->signaler->attr.priority))
217                                 list_move_tail(&p->dfs_link, &dfs);
218                 }
219         }
220
221         /*
222          * If we didn't need to bump any existing priorities, and we haven't
223          * yet submitted this request (i.e. there is no potential race with
224          * execlists_submit_request()), we can set our own priority and skip
225          * acquiring the engine locks.
226          */
227         if (node->attr.priority == I915_PRIORITY_INVALID) {
228                 GEM_BUG_ON(!list_empty(&node->link));
229                 node->attr = *attr;
230
231                 if (stack.dfs_link.next == stack.dfs_link.prev)
232                         return;
233
234                 __list_del_entry(&stack.dfs_link);
235         }
236
237         memset(&cache, 0, sizeof(cache));
238         sched_engine = node_to_request(node)->engine->sched_engine;
239         spin_lock(&sched_engine->lock);
240
241         /* Fifo and depth-first replacement ensure our deps execute before us */
242         sched_engine = lock_sched_engine(node, sched_engine, &cache);
243         list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
244                 INIT_LIST_HEAD(&dep->dfs_link);
245
246                 node = dep->signaler;
247                 sched_engine = lock_sched_engine(node, sched_engine, &cache);
248                 lockdep_assert_held(&sched_engine->lock);
249
250                 /* Recheck after acquiring the engine->timeline.lock */
251                 if (prio <= node->attr.priority || node_signaled(node))
252                         continue;
253
254                 GEM_BUG_ON(node_to_request(node)->engine->sched_engine !=
255                            sched_engine);
256
257                 WRITE_ONCE(node->attr.priority, prio);
258
259                 /*
260                  * Once the request is ready, it will be placed into the
261                  * priority lists and then onto the HW runlist. Before the
262                  * request is ready, it does not contribute to our preemption
263                  * decisions and we can safely ignore it, as it will, and
264                  * any preemption required, be dealt with upon submission.
265                  * See engine->submit_request()
266                  */
267                 if (list_empty(&node->link))
268                         continue;
269
270                 if (i915_request_in_priority_queue(node_to_request(node))) {
271                         if (!cache.priolist)
272                                 cache.priolist =
273                                         i915_sched_lookup_priolist(sched_engine,
274                                                                    prio);
275                         list_move_tail(&node->link, cache.priolist);
276                 }
277
278                 /* Defer (tasklet) submission until after all of our updates. */
279                 if (sched_engine->kick_backend)
280                         sched_engine->kick_backend(node_to_request(node), prio);
281         }
282
283         spin_unlock(&sched_engine->lock);
284 }
285
286 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
287 {
288         spin_lock_irq(&schedule_lock);
289         __i915_schedule(&rq->sched, attr);
290         spin_unlock_irq(&schedule_lock);
291 }
292
293 void i915_sched_node_init(struct i915_sched_node *node)
294 {
295         INIT_LIST_HEAD(&node->signalers_list);
296         INIT_LIST_HEAD(&node->waiters_list);
297         INIT_LIST_HEAD(&node->link);
298
299         i915_sched_node_reinit(node);
300 }
301
302 void i915_sched_node_reinit(struct i915_sched_node *node)
303 {
304         node->attr.priority = I915_PRIORITY_INVALID;
305         node->semaphores = 0;
306         node->flags = 0;
307
308         GEM_BUG_ON(!list_empty(&node->signalers_list));
309         GEM_BUG_ON(!list_empty(&node->waiters_list));
310         GEM_BUG_ON(!list_empty(&node->link));
311 }
312
313 static struct i915_dependency *
314 i915_dependency_alloc(void)
315 {
316         return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
317 }
318
319 static void
320 i915_dependency_free(struct i915_dependency *dep)
321 {
322         kmem_cache_free(global.slab_dependencies, dep);
323 }
324
325 bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
326                                       struct i915_sched_node *signal,
327                                       struct i915_dependency *dep,
328                                       unsigned long flags)
329 {
330         bool ret = false;
331
332         spin_lock_irq(&schedule_lock);
333
334         if (!node_signaled(signal)) {
335                 INIT_LIST_HEAD(&dep->dfs_link);
336                 dep->signaler = signal;
337                 dep->waiter = node;
338                 dep->flags = flags;
339
340                 /* All set, now publish. Beware the lockless walkers. */
341                 list_add_rcu(&dep->signal_link, &node->signalers_list);
342                 list_add_rcu(&dep->wait_link, &signal->waiters_list);
343
344                 /* Propagate the chains */
345                 node->flags |= signal->flags;
346                 ret = true;
347         }
348
349         spin_unlock_irq(&schedule_lock);
350
351         return ret;
352 }
353
354 int i915_sched_node_add_dependency(struct i915_sched_node *node,
355                                    struct i915_sched_node *signal,
356                                    unsigned long flags)
357 {
358         struct i915_dependency *dep;
359
360         dep = i915_dependency_alloc();
361         if (!dep)
362                 return -ENOMEM;
363
364         if (!__i915_sched_node_add_dependency(node, signal, dep,
365                                               flags | I915_DEPENDENCY_ALLOC))
366                 i915_dependency_free(dep);
367
368         return 0;
369 }
370
371 void i915_sched_node_fini(struct i915_sched_node *node)
372 {
373         struct i915_dependency *dep, *tmp;
374
375         spin_lock_irq(&schedule_lock);
376
377         /*
378          * Everyone we depended upon (the fences we wait to be signaled)
379          * should retire before us and remove themselves from our list.
380          * However, retirement is run independently on each timeline and
381          * so we may be called out-of-order.
382          */
383         list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
384                 GEM_BUG_ON(!list_empty(&dep->dfs_link));
385
386                 list_del_rcu(&dep->wait_link);
387                 if (dep->flags & I915_DEPENDENCY_ALLOC)
388                         i915_dependency_free(dep);
389         }
390         INIT_LIST_HEAD(&node->signalers_list);
391
392         /* Remove ourselves from everyone who depends upon us */
393         list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
394                 GEM_BUG_ON(dep->signaler != node);
395                 GEM_BUG_ON(!list_empty(&dep->dfs_link));
396
397                 list_del_rcu(&dep->signal_link);
398                 if (dep->flags & I915_DEPENDENCY_ALLOC)
399                         i915_dependency_free(dep);
400         }
401         INIT_LIST_HEAD(&node->waiters_list);
402
403         spin_unlock_irq(&schedule_lock);
404 }
405
406 void i915_request_show_with_schedule(struct drm_printer *m,
407                                      const struct i915_request *rq,
408                                      const char *prefix,
409                                      int indent)
410 {
411         struct i915_dependency *dep;
412
413         i915_request_show(m, rq, prefix, indent);
414         if (i915_request_completed(rq))
415                 return;
416
417         rcu_read_lock();
418         for_each_signaler(dep, rq) {
419                 const struct i915_request *signaler =
420                         node_to_request(dep->signaler);
421
422                 /* Dependencies along the same timeline are expected. */
423                 if (signaler->timeline == rq->timeline)
424                         continue;
425
426                 if (__i915_request_is_complete(signaler))
427                         continue;
428
429                 i915_request_show(m, signaler, prefix, indent + 2);
430         }
431         rcu_read_unlock();
432 }
433
434 void i915_sched_engine_free(struct kref *kref)
435 {
436         struct i915_sched_engine *sched_engine =
437                 container_of(kref, typeof(*sched_engine), ref);
438
439         tasklet_kill(&sched_engine->tasklet); /* flush the callback */
440         kfree(sched_engine);
441 }
442
443 struct i915_sched_engine *
444 i915_sched_engine_create(unsigned int subclass)
445 {
446         struct i915_sched_engine *sched_engine;
447
448         sched_engine = kzalloc(sizeof(*sched_engine), GFP_KERNEL);
449         if (!sched_engine)
450                 return NULL;
451
452         kref_init(&sched_engine->ref);
453
454         sched_engine->queue = RB_ROOT_CACHED;
455         sched_engine->queue_priority_hint = INT_MIN;
456
457         INIT_LIST_HEAD(&sched_engine->requests);
458         INIT_LIST_HEAD(&sched_engine->hold);
459
460         spin_lock_init(&sched_engine->lock);
461         lockdep_set_subclass(&sched_engine->lock, subclass);
462
463         /*
464          * Due to an interesting quirk in lockdep's internal debug tracking,
465          * after setting a subclass we must ensure the lock is used. Otherwise,
466          * nr_unused_locks is incremented once too often.
467          */
468 #ifdef CONFIG_DEBUG_LOCK_ALLOC
469         local_irq_disable();
470         lock_map_acquire(&sched_engine->lock.dep_map);
471         lock_map_release(&sched_engine->lock.dep_map);
472         local_irq_enable();
473 #endif
474
475         return sched_engine;
476 }
477
478 static void i915_global_scheduler_exit(void)
479 {
480         kmem_cache_destroy(global.slab_dependencies);
481         kmem_cache_destroy(global.slab_priorities);
482 }
483
484 static struct i915_global_scheduler global = { {
485         .exit = i915_global_scheduler_exit,
486 } };
487
488 int __init i915_global_scheduler_init(void)
489 {
490         global.slab_dependencies = KMEM_CACHE(i915_dependency,
491                                               SLAB_HWCACHE_ALIGN |
492                                               SLAB_TYPESAFE_BY_RCU);
493         if (!global.slab_dependencies)
494                 return -ENOMEM;
495
496         global.slab_priorities = KMEM_CACHE(i915_priolist, 0);
497         if (!global.slab_priorities)
498                 goto err_priorities;
499
500         i915_global_register(&global.base);
501         return 0;
502
503 err_priorities:
504         kmem_cache_destroy(global.slab_priorities);
505         return -ENOMEM;
506 }