Merge branch 'asoc-5.3' into asoc-5.4
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / i915_active.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2019 Intel Corporation
5  */
6
7 #include "gt/intel_engine_pm.h"
8
9 #include "i915_drv.h"
10 #include "i915_active.h"
11 #include "i915_globals.h"
12
13 #define BKL(ref) (&(ref)->i915->drm.struct_mutex)
14
15 /*
16  * Active refs memory management
17  *
18  * To be more economical with memory, we reap all the i915_active trees as
19  * they idle (when we know the active requests are inactive) and allocate the
20  * nodes from a local slab cache to hopefully reduce the fragmentation.
21  */
22 static struct i915_global_active {
23         struct i915_global base;
24         struct kmem_cache *slab_cache;
25 } global;
26
27 struct active_node {
28         struct i915_active_request base;
29         struct i915_active *ref;
30         struct rb_node node;
31         u64 timeline;
32 };
33
34 static void
35 __active_park(struct i915_active *ref)
36 {
37         struct active_node *it, *n;
38
39         rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
40                 GEM_BUG_ON(i915_active_request_isset(&it->base));
41                 kmem_cache_free(global.slab_cache, it);
42         }
43         ref->tree = RB_ROOT;
44 }
45
46 static void
47 __active_retire(struct i915_active *ref)
48 {
49         GEM_BUG_ON(!ref->count);
50         if (--ref->count)
51                 return;
52
53         /* return the unused nodes to our slabcache */
54         __active_park(ref);
55
56         ref->retire(ref);
57 }
58
59 static void
60 node_retire(struct i915_active_request *base, struct i915_request *rq)
61 {
62         __active_retire(container_of(base, struct active_node, base)->ref);
63 }
64
65 static void
66 last_retire(struct i915_active_request *base, struct i915_request *rq)
67 {
68         __active_retire(container_of(base, struct i915_active, last));
69 }
70
71 static struct i915_active_request *
72 active_instance(struct i915_active *ref, u64 idx)
73 {
74         struct active_node *node;
75         struct rb_node **p, *parent;
76         struct i915_request *old;
77
78         /*
79          * We track the most recently used timeline to skip a rbtree search
80          * for the common case, under typical loads we never need the rbtree
81          * at all. We can reuse the last slot if it is empty, that is
82          * after the previous activity has been retired, or if it matches the
83          * current timeline.
84          *
85          * Note that we allow the timeline to be active simultaneously in
86          * the rbtree and the last cache. We do this to avoid having
87          * to search and replace the rbtree element for a new timeline, with
88          * the cost being that we must be aware that the ref may be retired
89          * twice for the same timeline (as the older rbtree element will be
90          * retired before the new request added to last).
91          */
92         old = i915_active_request_raw(&ref->last, BKL(ref));
93         if (!old || old->fence.context == idx)
94                 goto out;
95
96         /* Move the currently active fence into the rbtree */
97         idx = old->fence.context;
98
99         parent = NULL;
100         p = &ref->tree.rb_node;
101         while (*p) {
102                 parent = *p;
103
104                 node = rb_entry(parent, struct active_node, node);
105                 if (node->timeline == idx)
106                         goto replace;
107
108                 if (node->timeline < idx)
109                         p = &parent->rb_right;
110                 else
111                         p = &parent->rb_left;
112         }
113
114         node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
115
116         /* kmalloc may retire the ref->last (thanks shrinker)! */
117         if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) {
118                 kmem_cache_free(global.slab_cache, node);
119                 goto out;
120         }
121
122         if (unlikely(!node))
123                 return ERR_PTR(-ENOMEM);
124
125         i915_active_request_init(&node->base, NULL, node_retire);
126         node->ref = ref;
127         node->timeline = idx;
128
129         rb_link_node(&node->node, parent, p);
130         rb_insert_color(&node->node, &ref->tree);
131
132 replace:
133         /*
134          * Overwrite the previous active slot in the rbtree with last,
135          * leaving last zeroed. If the previous slot is still active,
136          * we must be careful as we now only expect to receive one retire
137          * callback not two, and so much undo the active counting for the
138          * overwritten slot.
139          */
140         if (i915_active_request_isset(&node->base)) {
141                 /* Retire ourselves from the old rq->active_list */
142                 __list_del_entry(&node->base.link);
143                 ref->count--;
144                 GEM_BUG_ON(!ref->count);
145         }
146         GEM_BUG_ON(list_empty(&ref->last.link));
147         list_replace_init(&ref->last.link, &node->base.link);
148         node->base.request = fetch_and_zero(&ref->last.request);
149
150 out:
151         return &ref->last;
152 }
153
154 void i915_active_init(struct drm_i915_private *i915,
155                       struct i915_active *ref,
156                       void (*retire)(struct i915_active *ref))
157 {
158         ref->i915 = i915;
159         ref->retire = retire;
160         ref->tree = RB_ROOT;
161         i915_active_request_init(&ref->last, NULL, last_retire);
162         init_llist_head(&ref->barriers);
163         ref->count = 0;
164 }
165
166 int i915_active_ref(struct i915_active *ref,
167                     u64 timeline,
168                     struct i915_request *rq)
169 {
170         struct i915_active_request *active;
171         int err = 0;
172
173         /* Prevent reaping in case we malloc/wait while building the tree */
174         i915_active_acquire(ref);
175
176         active = active_instance(ref, timeline);
177         if (IS_ERR(active)) {
178                 err = PTR_ERR(active);
179                 goto out;
180         }
181
182         if (!i915_active_request_isset(active))
183                 ref->count++;
184         __i915_active_request_set(active, rq);
185
186         GEM_BUG_ON(!ref->count);
187 out:
188         i915_active_release(ref);
189         return err;
190 }
191
192 bool i915_active_acquire(struct i915_active *ref)
193 {
194         lockdep_assert_held(BKL(ref));
195         return !ref->count++;
196 }
197
198 void i915_active_release(struct i915_active *ref)
199 {
200         lockdep_assert_held(BKL(ref));
201         __active_retire(ref);
202 }
203
204 int i915_active_wait(struct i915_active *ref)
205 {
206         struct active_node *it, *n;
207         int ret = 0;
208
209         if (i915_active_acquire(ref))
210                 goto out_release;
211
212         ret = i915_active_request_retire(&ref->last, BKL(ref));
213         if (ret)
214                 goto out_release;
215
216         rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
217                 ret = i915_active_request_retire(&it->base, BKL(ref));
218                 if (ret)
219                         break;
220         }
221
222 out_release:
223         i915_active_release(ref);
224         return ret;
225 }
226
227 int i915_request_await_active_request(struct i915_request *rq,
228                                       struct i915_active_request *active)
229 {
230         struct i915_request *barrier =
231                 i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
232
233         return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
234 }
235
236 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
237 {
238         struct active_node *it, *n;
239         int err = 0;
240
241         /* await allocates and so we need to avoid hitting the shrinker */
242         if (i915_active_acquire(ref))
243                 goto out; /* was idle */
244
245         err = i915_request_await_active_request(rq, &ref->last);
246         if (err)
247                 goto out;
248
249         rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
250                 err = i915_request_await_active_request(rq, &it->base);
251                 if (err)
252                         goto out;
253         }
254
255 out:
256         i915_active_release(ref);
257         return err;
258 }
259
260 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
261 void i915_active_fini(struct i915_active *ref)
262 {
263         GEM_BUG_ON(i915_active_request_isset(&ref->last));
264         GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
265         GEM_BUG_ON(ref->count);
266 }
267 #endif
268
269 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
270                                             struct intel_engine_cs *engine)
271 {
272         struct drm_i915_private *i915 = engine->i915;
273         struct llist_node *pos, *next;
274         unsigned long tmp;
275         int err;
276
277         GEM_BUG_ON(!engine->mask);
278         for_each_engine_masked(engine, i915, engine->mask, tmp) {
279                 struct intel_context *kctx = engine->kernel_context;
280                 struct active_node *node;
281
282                 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
283                 if (unlikely(!node)) {
284                         err = -ENOMEM;
285                         goto unwind;
286                 }
287
288                 i915_active_request_init(&node->base,
289                                          (void *)engine, node_retire);
290                 node->timeline = kctx->ring->timeline->fence_context;
291                 node->ref = ref;
292                 ref->count++;
293
294                 intel_engine_pm_get(engine);
295                 llist_add((struct llist_node *)&node->base.link,
296                           &ref->barriers);
297         }
298
299         return 0;
300
301 unwind:
302         llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
303                 struct active_node *node;
304
305                 node = container_of((struct list_head *)pos,
306                                     typeof(*node), base.link);
307                 engine = (void *)rcu_access_pointer(node->base.request);
308
309                 intel_engine_pm_put(engine);
310                 kmem_cache_free(global.slab_cache, node);
311         }
312         return err;
313 }
314
315 void i915_active_acquire_barrier(struct i915_active *ref)
316 {
317         struct llist_node *pos, *next;
318
319         i915_active_acquire(ref);
320
321         llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
322                 struct intel_engine_cs *engine;
323                 struct active_node *node;
324                 struct rb_node **p, *parent;
325
326                 node = container_of((struct list_head *)pos,
327                                     typeof(*node), base.link);
328
329                 engine = (void *)rcu_access_pointer(node->base.request);
330                 RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
331
332                 parent = NULL;
333                 p = &ref->tree.rb_node;
334                 while (*p) {
335                         parent = *p;
336                         if (rb_entry(parent,
337                                      struct active_node,
338                                      node)->timeline < node->timeline)
339                                 p = &parent->rb_right;
340                         else
341                                 p = &parent->rb_left;
342                 }
343                 rb_link_node(&node->node, parent, p);
344                 rb_insert_color(&node->node, &ref->tree);
345
346                 llist_add((struct llist_node *)&node->base.link,
347                           &engine->barrier_tasks);
348                 intel_engine_pm_put(engine);
349         }
350         i915_active_release(ref);
351 }
352
353 void i915_request_add_barriers(struct i915_request *rq)
354 {
355         struct intel_engine_cs *engine = rq->engine;
356         struct llist_node *node, *next;
357
358         llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
359                 list_add_tail((struct list_head *)node, &rq->active_list);
360 }
361
362 int i915_active_request_set(struct i915_active_request *active,
363                             struct i915_request *rq)
364 {
365         int err;
366
367         /* Must maintain ordering wrt previous active requests */
368         err = i915_request_await_active_request(rq, active);
369         if (err)
370                 return err;
371
372         __i915_active_request_set(active, rq);
373         return 0;
374 }
375
376 void i915_active_retire_noop(struct i915_active_request *active,
377                              struct i915_request *request)
378 {
379         /* Space left intentionally blank */
380 }
381
382 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
383 #include "selftests/i915_active.c"
384 #endif
385
386 static void i915_global_active_shrink(void)
387 {
388         kmem_cache_shrink(global.slab_cache);
389 }
390
391 static void i915_global_active_exit(void)
392 {
393         kmem_cache_destroy(global.slab_cache);
394 }
395
396 static struct i915_global_active global = { {
397         .shrink = i915_global_active_shrink,
398         .exit = i915_global_active_exit,
399 } };
400
401 int __init i915_global_active_init(void)
402 {
403         global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
404         if (!global.slab_cache)
405                 return -ENOMEM;
406
407         i915_global_register(&global.base);
408         return 0;
409 }