drm/i915: Keep the most recently used active-fence upon discard
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / i915_active.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2019 Intel Corporation
5  */
6
7 #include <linux/debugobjects.h>
8
9 #include "gt/intel_context.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_engine_pm.h"
12 #include "gt/intel_ring.h"
13
14 #include "i915_drv.h"
15 #include "i915_active.h"
16 #include "i915_globals.h"
17
18 /*
19  * Active refs memory management
20  *
21  * To be more economical with memory, we reap all the i915_active trees as
22  * they idle (when we know the active requests are inactive) and allocate the
23  * nodes from a local slab cache to hopefully reduce the fragmentation.
24  */
25 static struct i915_global_active {
26         struct i915_global base;
27         struct kmem_cache *slab_cache;
28 } global;
29
30 struct active_node {
31         struct rb_node node;
32         struct i915_active_fence base;
33         struct i915_active *ref;
34         u64 timeline;
35 };
36
37 #define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node)
38
39 static inline struct active_node *
40 node_from_active(struct i915_active_fence *active)
41 {
42         return container_of(active, struct active_node, base);
43 }
44
45 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
46
47 static inline bool is_barrier(const struct i915_active_fence *active)
48 {
49         return IS_ERR(rcu_access_pointer(active->fence));
50 }
51
52 static inline struct llist_node *barrier_to_ll(struct active_node *node)
53 {
54         GEM_BUG_ON(!is_barrier(&node->base));
55         return (struct llist_node *)&node->base.cb.node;
56 }
57
58 static inline struct intel_engine_cs *
59 __barrier_to_engine(struct active_node *node)
60 {
61         return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev);
62 }
63
64 static inline struct intel_engine_cs *
65 barrier_to_engine(struct active_node *node)
66 {
67         GEM_BUG_ON(!is_barrier(&node->base));
68         return __barrier_to_engine(node);
69 }
70
71 static inline struct active_node *barrier_from_ll(struct llist_node *x)
72 {
73         return container_of((struct list_head *)x,
74                             struct active_node, base.cb.node);
75 }
76
77 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
78
79 static void *active_debug_hint(void *addr)
80 {
81         struct i915_active *ref = addr;
82
83         return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
84 }
85
86 static struct debug_obj_descr active_debug_desc = {
87         .name = "i915_active",
88         .debug_hint = active_debug_hint,
89 };
90
91 static void debug_active_init(struct i915_active *ref)
92 {
93         debug_object_init(ref, &active_debug_desc);
94 }
95
96 static void debug_active_activate(struct i915_active *ref)
97 {
98         lockdep_assert_held(&ref->tree_lock);
99         if (!atomic_read(&ref->count)) /* before the first inc */
100                 debug_object_activate(ref, &active_debug_desc);
101 }
102
103 static void debug_active_deactivate(struct i915_active *ref)
104 {
105         lockdep_assert_held(&ref->tree_lock);
106         if (!atomic_read(&ref->count)) /* after the last dec */
107                 debug_object_deactivate(ref, &active_debug_desc);
108 }
109
110 static void debug_active_fini(struct i915_active *ref)
111 {
112         debug_object_free(ref, &active_debug_desc);
113 }
114
115 static void debug_active_assert(struct i915_active *ref)
116 {
117         debug_object_assert_init(ref, &active_debug_desc);
118 }
119
120 #else
121
122 static inline void debug_active_init(struct i915_active *ref) { }
123 static inline void debug_active_activate(struct i915_active *ref) { }
124 static inline void debug_active_deactivate(struct i915_active *ref) { }
125 static inline void debug_active_fini(struct i915_active *ref) { }
126 static inline void debug_active_assert(struct i915_active *ref) { }
127
128 #endif
129
130 static void
131 __active_retire(struct i915_active *ref)
132 {
133         struct rb_root root = RB_ROOT;
134         struct active_node *it, *n;
135         unsigned long flags;
136
137         GEM_BUG_ON(i915_active_is_idle(ref));
138
139         /* return the unused nodes to our slabcache -- flushing the allocator */
140         if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
141                 return;
142
143         GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
144         debug_active_deactivate(ref);
145
146         /* Even if we have not used the cache, we may still have a barrier */
147         if (!ref->cache)
148                 ref->cache = fetch_node(ref->tree.rb_node);
149
150         /* Keep the MRU cached node for reuse */
151         if (ref->cache) {
152                 /* Discard all other nodes in the tree */
153                 rb_erase(&ref->cache->node, &ref->tree);
154                 root = ref->tree;
155
156                 /* Rebuild the tree with only the cached node */
157                 rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node);
158                 rb_insert_color(&ref->cache->node, &ref->tree);
159                 GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node);
160         }
161
162         spin_unlock_irqrestore(&ref->tree_lock, flags);
163
164         /* After the final retire, the entire struct may be freed */
165         if (ref->retire)
166                 ref->retire(ref);
167
168         /* ... except if you wait on it, you must manage your own references! */
169         wake_up_var(ref);
170
171         /* Finally free the discarded timeline tree  */
172         rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
173                 GEM_BUG_ON(i915_active_fence_isset(&it->base));
174                 kmem_cache_free(global.slab_cache, it);
175         }
176 }
177
178 static void
179 active_work(struct work_struct *wrk)
180 {
181         struct i915_active *ref = container_of(wrk, typeof(*ref), work);
182
183         GEM_BUG_ON(!atomic_read(&ref->count));
184         if (atomic_add_unless(&ref->count, -1, 1))
185                 return;
186
187         __active_retire(ref);
188 }
189
190 static void
191 active_retire(struct i915_active *ref)
192 {
193         GEM_BUG_ON(!atomic_read(&ref->count));
194         if (atomic_add_unless(&ref->count, -1, 1))
195                 return;
196
197         if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) {
198                 queue_work(system_unbound_wq, &ref->work);
199                 return;
200         }
201
202         __active_retire(ref);
203 }
204
205 static inline struct dma_fence **
206 __active_fence_slot(struct i915_active_fence *active)
207 {
208         return (struct dma_fence ** __force)&active->fence;
209 }
210
211 static inline bool
212 active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
213 {
214         struct i915_active_fence *active =
215                 container_of(cb, typeof(*active), cb);
216
217         return cmpxchg(__active_fence_slot(active), fence, NULL) == fence;
218 }
219
220 static void
221 node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
222 {
223         if (active_fence_cb(fence, cb))
224                 active_retire(container_of(cb, struct active_node, base.cb)->ref);
225 }
226
227 static void
228 excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
229 {
230         if (active_fence_cb(fence, cb))
231                 active_retire(container_of(cb, struct i915_active, excl.cb));
232 }
233
234 static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
235 {
236         struct active_node *it;
237
238         /*
239          * We track the most recently used timeline to skip a rbtree search
240          * for the common case, under typical loads we never need the rbtree
241          * at all. We can reuse the last slot if it is empty, that is
242          * after the previous activity has been retired, or if it matches the
243          * current timeline.
244          */
245         it = READ_ONCE(ref->cache);
246         if (it && it->timeline == idx)
247                 return it;
248
249         BUILD_BUG_ON(offsetof(typeof(*it), node));
250
251         /* While active, the tree can only be built; not destroyed */
252         GEM_BUG_ON(i915_active_is_idle(ref));
253
254         it = fetch_node(ref->tree.rb_node);
255         while (it) {
256                 if (it->timeline < idx) {
257                         it = fetch_node(it->node.rb_right);
258                 } else if (it->timeline > idx) {
259                         it = fetch_node(it->node.rb_left);
260                 } else {
261                         WRITE_ONCE(ref->cache, it);
262                         break;
263                 }
264         }
265
266         /* NB: If the tree rotated beneath us, we may miss our target. */
267         return it;
268 }
269
270 static struct i915_active_fence *
271 active_instance(struct i915_active *ref, u64 idx)
272 {
273         struct active_node *node, *prealloc;
274         struct rb_node **p, *parent;
275
276         node = __active_lookup(ref, idx);
277         if (likely(node))
278                 return &node->base;
279
280         /* Preallocate a replacement, just in case */
281         prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
282         if (!prealloc)
283                 return NULL;
284
285         spin_lock_irq(&ref->tree_lock);
286         GEM_BUG_ON(i915_active_is_idle(ref));
287
288         parent = NULL;
289         p = &ref->tree.rb_node;
290         while (*p) {
291                 parent = *p;
292
293                 node = rb_entry(parent, struct active_node, node);
294                 if (node->timeline == idx) {
295                         kmem_cache_free(global.slab_cache, prealloc);
296                         goto out;
297                 }
298
299                 if (node->timeline < idx)
300                         p = &parent->rb_right;
301                 else
302                         p = &parent->rb_left;
303         }
304
305         node = prealloc;
306         __i915_active_fence_init(&node->base, NULL, node_retire);
307         node->ref = ref;
308         node->timeline = idx;
309
310         rb_link_node(&node->node, parent, p);
311         rb_insert_color(&node->node, &ref->tree);
312
313 out:
314         WRITE_ONCE(ref->cache, node);
315         spin_unlock_irq(&ref->tree_lock);
316
317         return &node->base;
318 }
319
320 void __i915_active_init(struct i915_active *ref,
321                         int (*active)(struct i915_active *ref),
322                         void (*retire)(struct i915_active *ref),
323                         struct lock_class_key *mkey,
324                         struct lock_class_key *wkey)
325 {
326         unsigned long bits;
327
328         debug_active_init(ref);
329
330         ref->flags = 0;
331         ref->active = active;
332         ref->retire = ptr_unpack_bits(retire, &bits, 2);
333         if (bits & I915_ACTIVE_MAY_SLEEP)
334                 ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
335
336         spin_lock_init(&ref->tree_lock);
337         ref->tree = RB_ROOT;
338         ref->cache = NULL;
339
340         init_llist_head(&ref->preallocated_barriers);
341         atomic_set(&ref->count, 0);
342         __mutex_init(&ref->mutex, "i915_active", mkey);
343         __i915_active_fence_init(&ref->excl, NULL, excl_retire);
344         INIT_WORK(&ref->work, active_work);
345 #if IS_ENABLED(CONFIG_LOCKDEP)
346         lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0);
347 #endif
348 }
349
350 static bool ____active_del_barrier(struct i915_active *ref,
351                                    struct active_node *node,
352                                    struct intel_engine_cs *engine)
353
354 {
355         struct llist_node *head = NULL, *tail = NULL;
356         struct llist_node *pos, *next;
357
358         GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
359
360         /*
361          * Rebuild the llist excluding our node. We may perform this
362          * outside of the kernel_context timeline mutex and so someone
363          * else may be manipulating the engine->barrier_tasks, in
364          * which case either we or they will be upset :)
365          *
366          * A second __active_del_barrier() will report failure to claim
367          * the active_node and the caller will just shrug and know not to
368          * claim ownership of its node.
369          *
370          * A concurrent i915_request_add_active_barriers() will miss adding
371          * any of the tasks, but we will try again on the next -- and since
372          * we are actively using the barrier, we know that there will be
373          * at least another opportunity when we idle.
374          */
375         llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
376                 if (node == barrier_from_ll(pos)) {
377                         node = NULL;
378                         continue;
379                 }
380
381                 pos->next = head;
382                 head = pos;
383                 if (!tail)
384                         tail = pos;
385         }
386         if (head)
387                 llist_add_batch(head, tail, &engine->barrier_tasks);
388
389         return !node;
390 }
391
392 static bool
393 __active_del_barrier(struct i915_active *ref, struct active_node *node)
394 {
395         return ____active_del_barrier(ref, node, barrier_to_engine(node));
396 }
397
398 static bool
399 replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
400 {
401         if (!is_barrier(active)) /* proto-node used by our idle barrier? */
402                 return false;
403
404         /*
405          * This request is on the kernel_context timeline, and so
406          * we can use it to substitute for the pending idle-barrer
407          * request that we want to emit on the kernel_context.
408          */
409         __active_del_barrier(ref, node_from_active(active));
410         return true;
411 }
412
413 int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
414 {
415         struct i915_active_fence *active;
416         int err;
417
418         /* Prevent reaping in case we malloc/wait while building the tree */
419         err = i915_active_acquire(ref);
420         if (err)
421                 return err;
422
423         active = active_instance(ref, idx);
424         if (!active) {
425                 err = -ENOMEM;
426                 goto out;
427         }
428
429         if (replace_barrier(ref, active)) {
430                 RCU_INIT_POINTER(active->fence, NULL);
431                 atomic_dec(&ref->count);
432         }
433         if (!__i915_active_fence_set(active, fence))
434                 __i915_active_acquire(ref);
435
436 out:
437         i915_active_release(ref);
438         return err;
439 }
440
441 static struct dma_fence *
442 __i915_active_set_fence(struct i915_active *ref,
443                         struct i915_active_fence *active,
444                         struct dma_fence *fence)
445 {
446         struct dma_fence *prev;
447
448         if (replace_barrier(ref, active)) {
449                 RCU_INIT_POINTER(active->fence, fence);
450                 return NULL;
451         }
452
453         rcu_read_lock();
454         prev = __i915_active_fence_set(active, fence);
455         if (prev)
456                 prev = dma_fence_get_rcu(prev);
457         else
458                 __i915_active_acquire(ref);
459         rcu_read_unlock();
460
461         return prev;
462 }
463
464 static struct i915_active_fence *
465 __active_fence(struct i915_active *ref, u64 idx)
466 {
467         struct active_node *it;
468
469         it = __active_lookup(ref, idx);
470         if (unlikely(!it)) { /* Contention with parallel tree builders! */
471                 spin_lock_irq(&ref->tree_lock);
472                 it = __active_lookup(ref, idx);
473                 spin_unlock_irq(&ref->tree_lock);
474         }
475         GEM_BUG_ON(!it); /* slot must be preallocated */
476
477         return &it->base;
478 }
479
480 struct dma_fence *
481 __i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
482 {
483         /* Only valid while active, see i915_active_acquire_for_context() */
484         return __i915_active_set_fence(ref, __active_fence(ref, idx), fence);
485 }
486
487 struct dma_fence *
488 i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
489 {
490         /* We expect the caller to manage the exclusive timeline ordering */
491         return __i915_active_set_fence(ref, &ref->excl, f);
492 }
493
494 bool i915_active_acquire_if_busy(struct i915_active *ref)
495 {
496         debug_active_assert(ref);
497         return atomic_add_unless(&ref->count, 1, 0);
498 }
499
500 static void __i915_active_activate(struct i915_active *ref)
501 {
502         spin_lock_irq(&ref->tree_lock); /* __active_retire() */
503         if (!atomic_fetch_inc(&ref->count))
504                 debug_active_activate(ref);
505         spin_unlock_irq(&ref->tree_lock);
506 }
507
508 int i915_active_acquire(struct i915_active *ref)
509 {
510         int err;
511
512         if (i915_active_acquire_if_busy(ref))
513                 return 0;
514
515         if (!ref->active) {
516                 __i915_active_activate(ref);
517                 return 0;
518         }
519
520         err = mutex_lock_interruptible(&ref->mutex);
521         if (err)
522                 return err;
523
524         if (likely(!i915_active_acquire_if_busy(ref))) {
525                 err = ref->active(ref);
526                 if (!err)
527                         __i915_active_activate(ref);
528         }
529
530         mutex_unlock(&ref->mutex);
531
532         return err;
533 }
534
535 int i915_active_acquire_for_context(struct i915_active *ref, u64 idx)
536 {
537         struct i915_active_fence *active;
538         int err;
539
540         err = i915_active_acquire(ref);
541         if (err)
542                 return err;
543
544         active = active_instance(ref, idx);
545         if (!active) {
546                 i915_active_release(ref);
547                 return -ENOMEM;
548         }
549
550         return 0; /* return with active ref */
551 }
552
553 void i915_active_release(struct i915_active *ref)
554 {
555         debug_active_assert(ref);
556         active_retire(ref);
557 }
558
559 static void enable_signaling(struct i915_active_fence *active)
560 {
561         struct dma_fence *fence;
562
563         if (unlikely(is_barrier(active)))
564                 return;
565
566         fence = i915_active_fence_get(active);
567         if (!fence)
568                 return;
569
570         dma_fence_enable_sw_signaling(fence);
571         dma_fence_put(fence);
572 }
573
574 static int flush_barrier(struct active_node *it)
575 {
576         struct intel_engine_cs *engine;
577
578         if (likely(!is_barrier(&it->base)))
579                 return 0;
580
581         engine = __barrier_to_engine(it);
582         smp_rmb(); /* serialise with add_active_barriers */
583         if (!is_barrier(&it->base))
584                 return 0;
585
586         return intel_engine_flush_barriers(engine);
587 }
588
589 static int flush_lazy_signals(struct i915_active *ref)
590 {
591         struct active_node *it, *n;
592         int err = 0;
593
594         enable_signaling(&ref->excl);
595         rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
596                 err = flush_barrier(it); /* unconnected idle barrier? */
597                 if (err)
598                         break;
599
600                 enable_signaling(&it->base);
601         }
602
603         return err;
604 }
605
606 int __i915_active_wait(struct i915_active *ref, int state)
607 {
608         int err;
609
610         might_sleep();
611
612         if (!i915_active_acquire_if_busy(ref))
613                 return 0;
614
615         /* Any fence added after the wait begins will not be auto-signaled */
616         err = flush_lazy_signals(ref);
617         i915_active_release(ref);
618         if (err)
619                 return err;
620
621         if (!i915_active_is_idle(ref) &&
622             ___wait_var_event(ref, i915_active_is_idle(ref),
623                               state, 0, 0, schedule()))
624                 return -EINTR;
625
626         flush_work(&ref->work);
627         return 0;
628 }
629
630 static int __await_active(struct i915_active_fence *active,
631                           int (*fn)(void *arg, struct dma_fence *fence),
632                           void *arg)
633 {
634         struct dma_fence *fence;
635
636         if (is_barrier(active)) /* XXX flush the barrier? */
637                 return 0;
638
639         fence = i915_active_fence_get(active);
640         if (fence) {
641                 int err;
642
643                 err = fn(arg, fence);
644                 dma_fence_put(fence);
645                 if (err < 0)
646                         return err;
647         }
648
649         return 0;
650 }
651
652 struct wait_barrier {
653         struct wait_queue_entry base;
654         struct i915_active *ref;
655 };
656
657 static int
658 barrier_wake(wait_queue_entry_t *wq, unsigned int mode, int flags, void *key)
659 {
660         struct wait_barrier *wb = container_of(wq, typeof(*wb), base);
661
662         if (i915_active_is_idle(wb->ref)) {
663                 list_del(&wq->entry);
664                 i915_sw_fence_complete(wq->private);
665                 kfree(wq);
666         }
667
668         return 0;
669 }
670
671 static int __await_barrier(struct i915_active *ref, struct i915_sw_fence *fence)
672 {
673         struct wait_barrier *wb;
674
675         wb = kmalloc(sizeof(*wb), GFP_KERNEL);
676         if (unlikely(!wb))
677                 return -ENOMEM;
678
679         GEM_BUG_ON(i915_active_is_idle(ref));
680         if (!i915_sw_fence_await(fence)) {
681                 kfree(wb);
682                 return -EINVAL;
683         }
684
685         wb->base.flags = 0;
686         wb->base.func = barrier_wake;
687         wb->base.private = fence;
688         wb->ref = ref;
689
690         add_wait_queue(__var_waitqueue(ref), &wb->base);
691         return 0;
692 }
693
694 static int await_active(struct i915_active *ref,
695                         unsigned int flags,
696                         int (*fn)(void *arg, struct dma_fence *fence),
697                         void *arg, struct i915_sw_fence *barrier)
698 {
699         int err = 0;
700
701         if (!i915_active_acquire_if_busy(ref))
702                 return 0;
703
704         if (flags & I915_ACTIVE_AWAIT_EXCL &&
705             rcu_access_pointer(ref->excl.fence)) {
706                 err = __await_active(&ref->excl, fn, arg);
707                 if (err)
708                         goto out;
709         }
710
711         if (flags & I915_ACTIVE_AWAIT_ACTIVE) {
712                 struct active_node *it, *n;
713
714                 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
715                         err = __await_active(&it->base, fn, arg);
716                         if (err)
717                                 goto out;
718                 }
719         }
720
721         if (flags & I915_ACTIVE_AWAIT_BARRIER) {
722                 err = flush_lazy_signals(ref);
723                 if (err)
724                         goto out;
725
726                 err = __await_barrier(ref, barrier);
727                 if (err)
728                         goto out;
729         }
730
731 out:
732         i915_active_release(ref);
733         return err;
734 }
735
736 static int rq_await_fence(void *arg, struct dma_fence *fence)
737 {
738         return i915_request_await_dma_fence(arg, fence);
739 }
740
741 int i915_request_await_active(struct i915_request *rq,
742                               struct i915_active *ref,
743                               unsigned int flags)
744 {
745         return await_active(ref, flags, rq_await_fence, rq, &rq->submit);
746 }
747
748 static int sw_await_fence(void *arg, struct dma_fence *fence)
749 {
750         return i915_sw_fence_await_dma_fence(arg, fence, 0,
751                                              GFP_NOWAIT | __GFP_NOWARN);
752 }
753
754 int i915_sw_fence_await_active(struct i915_sw_fence *fence,
755                                struct i915_active *ref,
756                                unsigned int flags)
757 {
758         return await_active(ref, flags, sw_await_fence, fence, fence);
759 }
760
761 void i915_active_fini(struct i915_active *ref)
762 {
763         debug_active_fini(ref);
764         GEM_BUG_ON(atomic_read(&ref->count));
765         GEM_BUG_ON(work_pending(&ref->work));
766         mutex_destroy(&ref->mutex);
767
768         if (ref->cache)
769                 kmem_cache_free(global.slab_cache, ref->cache);
770 }
771
772 static inline bool is_idle_barrier(struct active_node *node, u64 idx)
773 {
774         return node->timeline == idx && !i915_active_fence_isset(&node->base);
775 }
776
777 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
778 {
779         struct rb_node *prev, *p;
780
781         if (RB_EMPTY_ROOT(&ref->tree))
782                 return NULL;
783
784         spin_lock_irq(&ref->tree_lock);
785         GEM_BUG_ON(i915_active_is_idle(ref));
786
787         /*
788          * Try to reuse any existing barrier nodes already allocated for this
789          * i915_active, due to overlapping active phases there is likely a
790          * node kept alive (as we reuse before parking). We prefer to reuse
791          * completely idle barriers (less hassle in manipulating the llists),
792          * but otherwise any will do.
793          */
794         if (ref->cache && is_idle_barrier(ref->cache, idx)) {
795                 p = &ref->cache->node;
796                 goto match;
797         }
798
799         prev = NULL;
800         p = ref->tree.rb_node;
801         while (p) {
802                 struct active_node *node =
803                         rb_entry(p, struct active_node, node);
804
805                 if (is_idle_barrier(node, idx))
806                         goto match;
807
808                 prev = p;
809                 if (node->timeline < idx)
810                         p = p->rb_right;
811                 else
812                         p = p->rb_left;
813         }
814
815         /*
816          * No quick match, but we did find the leftmost rb_node for the
817          * kernel_context. Walk the rb_tree in-order to see if there were
818          * any idle-barriers on this timeline that we missed, or just use
819          * the first pending barrier.
820          */
821         for (p = prev; p; p = rb_next(p)) {
822                 struct active_node *node =
823                         rb_entry(p, struct active_node, node);
824                 struct intel_engine_cs *engine;
825
826                 if (node->timeline > idx)
827                         break;
828
829                 if (node->timeline < idx)
830                         continue;
831
832                 if (is_idle_barrier(node, idx))
833                         goto match;
834
835                 /*
836                  * The list of pending barriers is protected by the
837                  * kernel_context timeline, which notably we do not hold
838                  * here. i915_request_add_active_barriers() may consume
839                  * the barrier before we claim it, so we have to check
840                  * for success.
841                  */
842                 engine = __barrier_to_engine(node);
843                 smp_rmb(); /* serialise with add_active_barriers */
844                 if (is_barrier(&node->base) &&
845                     ____active_del_barrier(ref, node, engine))
846                         goto match;
847         }
848
849         spin_unlock_irq(&ref->tree_lock);
850
851         return NULL;
852
853 match:
854         rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
855         if (p == &ref->cache->node)
856                 WRITE_ONCE(ref->cache, NULL);
857         spin_unlock_irq(&ref->tree_lock);
858
859         return rb_entry(p, struct active_node, node);
860 }
861
862 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
863                                             struct intel_engine_cs *engine)
864 {
865         intel_engine_mask_t tmp, mask = engine->mask;
866         struct llist_node *first = NULL, *last = NULL;
867         struct intel_gt *gt = engine->gt;
868
869         GEM_BUG_ON(i915_active_is_idle(ref));
870
871         /* Wait until the previous preallocation is completed */
872         while (!llist_empty(&ref->preallocated_barriers))
873                 cond_resched();
874
875         /*
876          * Preallocate a node for each physical engine supporting the target
877          * engine (remember virtual engines have more than one sibling).
878          * We can then use the preallocated nodes in
879          * i915_active_acquire_barrier()
880          */
881         GEM_BUG_ON(!mask);
882         for_each_engine_masked(engine, gt, mask, tmp) {
883                 u64 idx = engine->kernel_context->timeline->fence_context;
884                 struct llist_node *prev = first;
885                 struct active_node *node;
886
887                 node = reuse_idle_barrier(ref, idx);
888                 if (!node) {
889                         node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
890                         if (!node)
891                                 goto unwind;
892
893                         RCU_INIT_POINTER(node->base.fence, NULL);
894                         node->base.cb.func = node_retire;
895                         node->timeline = idx;
896                         node->ref = ref;
897                 }
898
899                 if (!i915_active_fence_isset(&node->base)) {
900                         /*
901                          * Mark this as being *our* unconnected proto-node.
902                          *
903                          * Since this node is not in any list, and we have
904                          * decoupled it from the rbtree, we can reuse the
905                          * request to indicate this is an idle-barrier node
906                          * and then we can use the rb_node and list pointers
907                          * for our tracking of the pending barrier.
908                          */
909                         RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
910                         node->base.cb.node.prev = (void *)engine;
911                         __i915_active_acquire(ref);
912                 }
913                 GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
914
915                 GEM_BUG_ON(barrier_to_engine(node) != engine);
916                 first = barrier_to_ll(node);
917                 first->next = prev;
918                 if (!last)
919                         last = first;
920                 intel_engine_pm_get(engine);
921         }
922
923         GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
924         llist_add_batch(first, last, &ref->preallocated_barriers);
925
926         return 0;
927
928 unwind:
929         while (first) {
930                 struct active_node *node = barrier_from_ll(first);
931
932                 first = first->next;
933
934                 atomic_dec(&ref->count);
935                 intel_engine_pm_put(barrier_to_engine(node));
936
937                 kmem_cache_free(global.slab_cache, node);
938         }
939         return -ENOMEM;
940 }
941
942 void i915_active_acquire_barrier(struct i915_active *ref)
943 {
944         struct llist_node *pos, *next;
945         unsigned long flags;
946
947         GEM_BUG_ON(i915_active_is_idle(ref));
948
949         /*
950          * Transfer the list of preallocated barriers into the
951          * i915_active rbtree, but only as proto-nodes. They will be
952          * populated by i915_request_add_active_barriers() to point to the
953          * request that will eventually release them.
954          */
955         llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
956                 struct active_node *node = barrier_from_ll(pos);
957                 struct intel_engine_cs *engine = barrier_to_engine(node);
958                 struct rb_node **p, *parent;
959
960                 spin_lock_irqsave_nested(&ref->tree_lock, flags,
961                                          SINGLE_DEPTH_NESTING);
962                 parent = NULL;
963                 p = &ref->tree.rb_node;
964                 while (*p) {
965                         struct active_node *it;
966
967                         parent = *p;
968
969                         it = rb_entry(parent, struct active_node, node);
970                         if (it->timeline < node->timeline)
971                                 p = &parent->rb_right;
972                         else
973                                 p = &parent->rb_left;
974                 }
975                 rb_link_node(&node->node, parent, p);
976                 rb_insert_color(&node->node, &ref->tree);
977                 spin_unlock_irqrestore(&ref->tree_lock, flags);
978
979                 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
980                 llist_add(barrier_to_ll(node), &engine->barrier_tasks);
981                 intel_engine_pm_put_delay(engine, 1);
982         }
983 }
984
985 static struct dma_fence **ll_to_fence_slot(struct llist_node *node)
986 {
987         return __active_fence_slot(&barrier_from_ll(node)->base);
988 }
989
990 void i915_request_add_active_barriers(struct i915_request *rq)
991 {
992         struct intel_engine_cs *engine = rq->engine;
993         struct llist_node *node, *next;
994         unsigned long flags;
995
996         GEM_BUG_ON(!intel_context_is_barrier(rq->context));
997         GEM_BUG_ON(intel_engine_is_virtual(engine));
998         GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
999
1000         node = llist_del_all(&engine->barrier_tasks);
1001         if (!node)
1002                 return;
1003         /*
1004          * Attach the list of proto-fences to the in-flight request such
1005          * that the parent i915_active will be released when this request
1006          * is retired.
1007          */
1008         spin_lock_irqsave(&rq->lock, flags);
1009         llist_for_each_safe(node, next, node) {
1010                 /* serialise with reuse_idle_barrier */
1011                 smp_store_mb(*ll_to_fence_slot(node), &rq->fence);
1012                 list_add_tail((struct list_head *)node, &rq->fence.cb_list);
1013         }
1014         spin_unlock_irqrestore(&rq->lock, flags);
1015 }
1016
1017 /*
1018  * __i915_active_fence_set: Update the last active fence along its timeline
1019  * @active: the active tracker
1020  * @fence: the new fence (under construction)
1021  *
1022  * Records the new @fence as the last active fence along its timeline in
1023  * this active tracker, moving the tracking callbacks from the previous
1024  * fence onto this one. Returns the previous fence (if not already completed),
1025  * which the caller must ensure is executed before the new fence. To ensure
1026  * that the order of fences within the timeline of the i915_active_fence is
1027  * understood, it should be locked by the caller.
1028  */
1029 struct dma_fence *
1030 __i915_active_fence_set(struct i915_active_fence *active,
1031                         struct dma_fence *fence)
1032 {
1033         struct dma_fence *prev;
1034         unsigned long flags;
1035
1036         if (fence == rcu_access_pointer(active->fence))
1037                 return fence;
1038
1039         GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
1040
1041         /*
1042          * Consider that we have two threads arriving (A and B), with
1043          * C already resident as the active->fence.
1044          *
1045          * A does the xchg first, and so it sees C or NULL depending
1046          * on the timing of the interrupt handler. If it is NULL, the
1047          * previous fence must have been signaled and we know that
1048          * we are first on the timeline. If it is still present,
1049          * we acquire the lock on that fence and serialise with the interrupt
1050          * handler, in the process removing it from any future interrupt
1051          * callback. A will then wait on C before executing (if present).
1052          *
1053          * As B is second, it sees A as the previous fence and so waits for
1054          * it to complete its transition and takes over the occupancy for
1055          * itself -- remembering that it needs to wait on A before executing.
1056          *
1057          * Note the strong ordering of the timeline also provides consistent
1058          * nesting rules for the fence->lock; the inner lock is always the
1059          * older lock.
1060          */
1061         spin_lock_irqsave(fence->lock, flags);
1062         prev = xchg(__active_fence_slot(active), fence);
1063         if (prev) {
1064                 GEM_BUG_ON(prev == fence);
1065                 spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
1066                 __list_del_entry(&active->cb.node);
1067                 spin_unlock(prev->lock); /* serialise with prev->cb_list */
1068         }
1069         list_add_tail(&active->cb.node, &fence->cb_list);
1070         spin_unlock_irqrestore(fence->lock, flags);
1071
1072         return prev;
1073 }
1074
1075 int i915_active_fence_set(struct i915_active_fence *active,
1076                           struct i915_request *rq)
1077 {
1078         struct dma_fence *fence;
1079         int err = 0;
1080
1081         /* Must maintain timeline ordering wrt previous active requests */
1082         rcu_read_lock();
1083         fence = __i915_active_fence_set(active, &rq->fence);
1084         if (fence) /* but the previous fence may not belong to that timeline! */
1085                 fence = dma_fence_get_rcu(fence);
1086         rcu_read_unlock();
1087         if (fence) {
1088                 err = i915_request_await_dma_fence(rq, fence);
1089                 dma_fence_put(fence);
1090         }
1091
1092         return err;
1093 }
1094
1095 void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
1096 {
1097         active_fence_cb(fence, cb);
1098 }
1099
1100 struct auto_active {
1101         struct i915_active base;
1102         struct kref ref;
1103 };
1104
1105 struct i915_active *i915_active_get(struct i915_active *ref)
1106 {
1107         struct auto_active *aa = container_of(ref, typeof(*aa), base);
1108
1109         kref_get(&aa->ref);
1110         return &aa->base;
1111 }
1112
1113 static void auto_release(struct kref *ref)
1114 {
1115         struct auto_active *aa = container_of(ref, typeof(*aa), ref);
1116
1117         i915_active_fini(&aa->base);
1118         kfree(aa);
1119 }
1120
1121 void i915_active_put(struct i915_active *ref)
1122 {
1123         struct auto_active *aa = container_of(ref, typeof(*aa), base);
1124
1125         kref_put(&aa->ref, auto_release);
1126 }
1127
1128 static int auto_active(struct i915_active *ref)
1129 {
1130         i915_active_get(ref);
1131         return 0;
1132 }
1133
1134 static void auto_retire(struct i915_active *ref)
1135 {
1136         i915_active_put(ref);
1137 }
1138
1139 struct i915_active *i915_active_create(void)
1140 {
1141         struct auto_active *aa;
1142
1143         aa = kmalloc(sizeof(*aa), GFP_KERNEL);
1144         if (!aa)
1145                 return NULL;
1146
1147         kref_init(&aa->ref);
1148         i915_active_init(&aa->base, auto_active, auto_retire);
1149
1150         return &aa->base;
1151 }
1152
1153 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1154 #include "selftests/i915_active.c"
1155 #endif
1156
1157 static void i915_global_active_shrink(void)
1158 {
1159         kmem_cache_shrink(global.slab_cache);
1160 }
1161
1162 static void i915_global_active_exit(void)
1163 {
1164         kmem_cache_destroy(global.slab_cache);
1165 }
1166
1167 static struct i915_global_active global = { {
1168         .shrink = i915_global_active_shrink,
1169         .exit = i915_global_active_exit,
1170 } };
1171
1172 int __init i915_global_active_init(void)
1173 {
1174         global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
1175         if (!global.slab_cache)
1176                 return -ENOMEM;
1177
1178         i915_global_register(&global.base);
1179         return 0;
1180 }