Merge tag 'pinctrl-v5.8-4' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / selftest_timeline.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6
7 #include <linux/prime_numbers.h>
8
9 #include "intel_context.h"
10 #include "intel_engine_heartbeat.h"
11 #include "intel_engine_pm.h"
12 #include "intel_gt.h"
13 #include "intel_gt_requests.h"
14 #include "intel_ring.h"
15
16 #include "../selftests/i915_random.h"
17 #include "../i915_selftest.h"
18
19 #include "../selftests/igt_flush_test.h"
20 #include "../selftests/mock_gem_device.h"
21 #include "selftests/mock_timeline.h"
22
23 static struct page *hwsp_page(struct intel_timeline *tl)
24 {
25         struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
26
27         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
28         return sg_page(obj->mm.pages->sgl);
29 }
30
31 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
32 {
33         unsigned long address = (unsigned long)page_address(hwsp_page(tl));
34
35         return (address + tl->hwsp_offset) / CACHELINE_BYTES;
36 }
37
38 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
39
40 struct mock_hwsp_freelist {
41         struct intel_gt *gt;
42         struct radix_tree_root cachelines;
43         struct intel_timeline **history;
44         unsigned long count, max;
45         struct rnd_state prng;
46 };
47
48 enum {
49         SHUFFLE = BIT(0),
50 };
51
52 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
53                                unsigned int idx,
54                                struct intel_timeline *tl)
55 {
56         tl = xchg(&state->history[idx], tl);
57         if (tl) {
58                 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
59                 intel_timeline_put(tl);
60         }
61 }
62
63 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
64                                 unsigned int count,
65                                 unsigned int flags)
66 {
67         struct intel_timeline *tl;
68         unsigned int idx;
69
70         while (count--) {
71                 unsigned long cacheline;
72                 int err;
73
74                 tl = intel_timeline_create(state->gt, NULL);
75                 if (IS_ERR(tl))
76                         return PTR_ERR(tl);
77
78                 cacheline = hwsp_cacheline(tl);
79                 err = radix_tree_insert(&state->cachelines, cacheline, tl);
80                 if (err) {
81                         if (err == -EEXIST) {
82                                 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
83                                        cacheline);
84                         }
85                         intel_timeline_put(tl);
86                         return err;
87                 }
88
89                 idx = state->count++ % state->max;
90                 __mock_hwsp_record(state, idx, tl);
91         }
92
93         if (flags & SHUFFLE)
94                 i915_prandom_shuffle(state->history,
95                                      sizeof(*state->history),
96                                      min(state->count, state->max),
97                                      &state->prng);
98
99         count = i915_prandom_u32_max_state(min(state->count, state->max),
100                                            &state->prng);
101         while (count--) {
102                 idx = --state->count % state->max;
103                 __mock_hwsp_record(state, idx, NULL);
104         }
105
106         return 0;
107 }
108
109 static int mock_hwsp_freelist(void *arg)
110 {
111         struct mock_hwsp_freelist state;
112         struct drm_i915_private *i915;
113         const struct {
114                 const char *name;
115                 unsigned int flags;
116         } phases[] = {
117                 { "linear", 0 },
118                 { "shuffled", SHUFFLE },
119                 { },
120         }, *p;
121         unsigned int na;
122         int err = 0;
123
124         i915 = mock_gem_device();
125         if (!i915)
126                 return -ENOMEM;
127
128         INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
129         state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
130
131         state.gt = &i915->gt;
132
133         /*
134          * Create a bunch of timelines and check that their HWSP do not overlap.
135          * Free some, and try again.
136          */
137
138         state.max = PAGE_SIZE / sizeof(*state.history);
139         state.count = 0;
140         state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
141         if (!state.history) {
142                 err = -ENOMEM;
143                 goto err_put;
144         }
145
146         for (p = phases; p->name; p++) {
147                 pr_debug("%s(%s)\n", __func__, p->name);
148                 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
149                         err = __mock_hwsp_timeline(&state, na, p->flags);
150                         if (err)
151                                 goto out;
152                 }
153         }
154
155 out:
156         for (na = 0; na < state.max; na++)
157                 __mock_hwsp_record(&state, na, NULL);
158         kfree(state.history);
159 err_put:
160         drm_dev_put(&i915->drm);
161         return err;
162 }
163
164 struct __igt_sync {
165         const char *name;
166         u32 seqno;
167         bool expected;
168         bool set;
169 };
170
171 static int __igt_sync(struct intel_timeline *tl,
172                       u64 ctx,
173                       const struct __igt_sync *p,
174                       const char *name)
175 {
176         int ret;
177
178         if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
179                 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
180                        name, p->name, ctx, p->seqno, yesno(p->expected));
181                 return -EINVAL;
182         }
183
184         if (p->set) {
185                 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
186                 if (ret)
187                         return ret;
188         }
189
190         return 0;
191 }
192
193 static int igt_sync(void *arg)
194 {
195         const struct __igt_sync pass[] = {
196                 { "unset", 0, false, false },
197                 { "new", 0, false, true },
198                 { "0a", 0, true, true },
199                 { "1a", 1, false, true },
200                 { "1b", 1, true, true },
201                 { "0b", 0, true, false },
202                 { "2a", 2, false, true },
203                 { "4", 4, false, true },
204                 { "INT_MAX", INT_MAX, false, true },
205                 { "INT_MAX-1", INT_MAX-1, true, false },
206                 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
207                 { "INT_MAX", INT_MAX, true, false },
208                 { "UINT_MAX", UINT_MAX, false, true },
209                 { "wrap", 0, false, true },
210                 { "unwrap", UINT_MAX, true, false },
211                 {},
212         }, *p;
213         struct intel_timeline tl;
214         int order, offset;
215         int ret = -ENODEV;
216
217         mock_timeline_init(&tl, 0);
218         for (p = pass; p->name; p++) {
219                 for (order = 1; order < 64; order++) {
220                         for (offset = -1; offset <= (order > 1); offset++) {
221                                 u64 ctx = BIT_ULL(order) + offset;
222
223                                 ret = __igt_sync(&tl, ctx, p, "1");
224                                 if (ret)
225                                         goto out;
226                         }
227                 }
228         }
229         mock_timeline_fini(&tl);
230
231         mock_timeline_init(&tl, 0);
232         for (order = 1; order < 64; order++) {
233                 for (offset = -1; offset <= (order > 1); offset++) {
234                         u64 ctx = BIT_ULL(order) + offset;
235
236                         for (p = pass; p->name; p++) {
237                                 ret = __igt_sync(&tl, ctx, p, "2");
238                                 if (ret)
239                                         goto out;
240                         }
241                 }
242         }
243
244 out:
245         mock_timeline_fini(&tl);
246         return ret;
247 }
248
249 static unsigned int random_engine(struct rnd_state *rnd)
250 {
251         return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
252 }
253
254 static int bench_sync(void *arg)
255 {
256         struct rnd_state prng;
257         struct intel_timeline tl;
258         unsigned long end_time, count;
259         u64 prng32_1M;
260         ktime_t kt;
261         int order, last_order;
262
263         mock_timeline_init(&tl, 0);
264
265         /* Lookups from cache are very fast and so the random number generation
266          * and the loop itself becomes a significant factor in the per-iteration
267          * timings. We try to compensate the results by measuring the overhead
268          * of the prng and subtract it from the reported results.
269          */
270         prandom_seed_state(&prng, i915_selftest.random_seed);
271         count = 0;
272         kt = ktime_get();
273         end_time = jiffies + HZ/10;
274         do {
275                 u32 x;
276
277                 /* Make sure the compiler doesn't optimise away the prng call */
278                 WRITE_ONCE(x, prandom_u32_state(&prng));
279
280                 count++;
281         } while (!time_after(jiffies, end_time));
282         kt = ktime_sub(ktime_get(), kt);
283         pr_debug("%s: %lu random evaluations, %lluns/prng\n",
284                  __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
285         prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
286
287         /* Benchmark (only) setting random context ids */
288         prandom_seed_state(&prng, i915_selftest.random_seed);
289         count = 0;
290         kt = ktime_get();
291         end_time = jiffies + HZ/10;
292         do {
293                 u64 id = i915_prandom_u64_state(&prng);
294
295                 __intel_timeline_sync_set(&tl, id, 0);
296                 count++;
297         } while (!time_after(jiffies, end_time));
298         kt = ktime_sub(ktime_get(), kt);
299         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
300         pr_info("%s: %lu random insertions, %lluns/insert\n",
301                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
302
303         /* Benchmark looking up the exact same context ids as we just set */
304         prandom_seed_state(&prng, i915_selftest.random_seed);
305         end_time = count;
306         kt = ktime_get();
307         while (end_time--) {
308                 u64 id = i915_prandom_u64_state(&prng);
309
310                 if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
311                         mock_timeline_fini(&tl);
312                         pr_err("Lookup of %llu failed\n", id);
313                         return -EINVAL;
314                 }
315         }
316         kt = ktime_sub(ktime_get(), kt);
317         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
318         pr_info("%s: %lu random lookups, %lluns/lookup\n",
319                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
320
321         mock_timeline_fini(&tl);
322         cond_resched();
323
324         mock_timeline_init(&tl, 0);
325
326         /* Benchmark setting the first N (in order) contexts */
327         count = 0;
328         kt = ktime_get();
329         end_time = jiffies + HZ/10;
330         do {
331                 __intel_timeline_sync_set(&tl, count++, 0);
332         } while (!time_after(jiffies, end_time));
333         kt = ktime_sub(ktime_get(), kt);
334         pr_info("%s: %lu in-order insertions, %lluns/insert\n",
335                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
336
337         /* Benchmark looking up the exact same context ids as we just set */
338         end_time = count;
339         kt = ktime_get();
340         while (end_time--) {
341                 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
342                         pr_err("Lookup of %lu failed\n", end_time);
343                         mock_timeline_fini(&tl);
344                         return -EINVAL;
345                 }
346         }
347         kt = ktime_sub(ktime_get(), kt);
348         pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
349                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
350
351         mock_timeline_fini(&tl);
352         cond_resched();
353
354         mock_timeline_init(&tl, 0);
355
356         /* Benchmark searching for a random context id and maybe changing it */
357         prandom_seed_state(&prng, i915_selftest.random_seed);
358         count = 0;
359         kt = ktime_get();
360         end_time = jiffies + HZ/10;
361         do {
362                 u32 id = random_engine(&prng);
363                 u32 seqno = prandom_u32_state(&prng);
364
365                 if (!__intel_timeline_sync_is_later(&tl, id, seqno))
366                         __intel_timeline_sync_set(&tl, id, seqno);
367
368                 count++;
369         } while (!time_after(jiffies, end_time));
370         kt = ktime_sub(ktime_get(), kt);
371         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
372         pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
373                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
374         mock_timeline_fini(&tl);
375         cond_resched();
376
377         /* Benchmark searching for a known context id and changing the seqno */
378         for (last_order = 1, order = 1; order < 32;
379              ({ int tmp = last_order; last_order = order; order += tmp; })) {
380                 unsigned int mask = BIT(order) - 1;
381
382                 mock_timeline_init(&tl, 0);
383
384                 count = 0;
385                 kt = ktime_get();
386                 end_time = jiffies + HZ/10;
387                 do {
388                         /* Without assuming too many details of the underlying
389                          * implementation, try to identify its phase-changes
390                          * (if any)!
391                          */
392                         u64 id = (u64)(count & mask) << order;
393
394                         __intel_timeline_sync_is_later(&tl, id, 0);
395                         __intel_timeline_sync_set(&tl, id, 0);
396
397                         count++;
398                 } while (!time_after(jiffies, end_time));
399                 kt = ktime_sub(ktime_get(), kt);
400                 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
401                         __func__, count, order,
402                         (long long)div64_ul(ktime_to_ns(kt), count));
403                 mock_timeline_fini(&tl);
404                 cond_resched();
405         }
406
407         return 0;
408 }
409
410 int intel_timeline_mock_selftests(void)
411 {
412         static const struct i915_subtest tests[] = {
413                 SUBTEST(mock_hwsp_freelist),
414                 SUBTEST(igt_sync),
415                 SUBTEST(bench_sync),
416         };
417
418         return i915_subtests(tests, NULL);
419 }
420
421 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
422 {
423         u32 *cs;
424
425         cs = intel_ring_begin(rq, 4);
426         if (IS_ERR(cs))
427                 return PTR_ERR(cs);
428
429         if (INTEL_GEN(rq->i915) >= 8) {
430                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
431                 *cs++ = addr;
432                 *cs++ = 0;
433                 *cs++ = value;
434         } else if (INTEL_GEN(rq->i915) >= 4) {
435                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
436                 *cs++ = 0;
437                 *cs++ = addr;
438                 *cs++ = value;
439         } else {
440                 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
441                 *cs++ = addr;
442                 *cs++ = value;
443                 *cs++ = MI_NOOP;
444         }
445
446         intel_ring_advance(rq, cs);
447
448         return 0;
449 }
450
451 static struct i915_request *
452 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
453 {
454         struct i915_request *rq;
455         int err;
456
457         err = intel_timeline_pin(tl);
458         if (err) {
459                 rq = ERR_PTR(err);
460                 goto out;
461         }
462
463         rq = intel_engine_create_kernel_request(engine);
464         if (IS_ERR(rq))
465                 goto out_unpin;
466
467         i915_request_get(rq);
468
469         err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
470         i915_request_add(rq);
471         if (err) {
472                 i915_request_put(rq);
473                 rq = ERR_PTR(err);
474         }
475
476 out_unpin:
477         intel_timeline_unpin(tl);
478 out:
479         if (IS_ERR(rq))
480                 pr_err("Failed to write to timeline!\n");
481         return rq;
482 }
483
484 static struct intel_timeline *
485 checked_intel_timeline_create(struct intel_gt *gt)
486 {
487         struct intel_timeline *tl;
488
489         tl = intel_timeline_create(gt, NULL);
490         if (IS_ERR(tl))
491                 return tl;
492
493         if (*tl->hwsp_seqno != tl->seqno) {
494                 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
495                        *tl->hwsp_seqno, tl->seqno);
496                 intel_timeline_put(tl);
497                 return ERR_PTR(-EINVAL);
498         }
499
500         return tl;
501 }
502
503 static int live_hwsp_engine(void *arg)
504 {
505 #define NUM_TIMELINES 4096
506         struct intel_gt *gt = arg;
507         struct intel_timeline **timelines;
508         struct intel_engine_cs *engine;
509         enum intel_engine_id id;
510         unsigned long count, n;
511         int err = 0;
512
513         /*
514          * Create a bunch of timelines and check we can write
515          * independently to each of their breadcrumb slots.
516          */
517
518         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
519                                    sizeof(*timelines),
520                                    GFP_KERNEL);
521         if (!timelines)
522                 return -ENOMEM;
523
524         count = 0;
525         for_each_engine(engine, gt, id) {
526                 if (!intel_engine_can_store_dword(engine))
527                         continue;
528
529                 intel_engine_pm_get(engine);
530
531                 for (n = 0; n < NUM_TIMELINES; n++) {
532                         struct intel_timeline *tl;
533                         struct i915_request *rq;
534
535                         tl = checked_intel_timeline_create(gt);
536                         if (IS_ERR(tl)) {
537                                 err = PTR_ERR(tl);
538                                 break;
539                         }
540
541                         rq = tl_write(tl, engine, count);
542                         if (IS_ERR(rq)) {
543                                 intel_timeline_put(tl);
544                                 err = PTR_ERR(rq);
545                                 break;
546                         }
547
548                         timelines[count++] = tl;
549                         i915_request_put(rq);
550                 }
551
552                 intel_engine_pm_put(engine);
553                 if (err)
554                         break;
555         }
556
557         if (igt_flush_test(gt->i915))
558                 err = -EIO;
559
560         for (n = 0; n < count; n++) {
561                 struct intel_timeline *tl = timelines[n];
562
563                 if (!err && *tl->hwsp_seqno != n) {
564                         pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
565                                n, *tl->hwsp_seqno);
566                         err = -EINVAL;
567                 }
568                 intel_timeline_put(tl);
569         }
570
571         kvfree(timelines);
572         return err;
573 #undef NUM_TIMELINES
574 }
575
576 static int live_hwsp_alternate(void *arg)
577 {
578 #define NUM_TIMELINES 4096
579         struct intel_gt *gt = arg;
580         struct intel_timeline **timelines;
581         struct intel_engine_cs *engine;
582         enum intel_engine_id id;
583         unsigned long count, n;
584         int err = 0;
585
586         /*
587          * Create a bunch of timelines and check we can write
588          * independently to each of their breadcrumb slots with adjacent
589          * engines.
590          */
591
592         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
593                                    sizeof(*timelines),
594                                    GFP_KERNEL);
595         if (!timelines)
596                 return -ENOMEM;
597
598         count = 0;
599         for (n = 0; n < NUM_TIMELINES; n++) {
600                 for_each_engine(engine, gt, id) {
601                         struct intel_timeline *tl;
602                         struct i915_request *rq;
603
604                         if (!intel_engine_can_store_dword(engine))
605                                 continue;
606
607                         tl = checked_intel_timeline_create(gt);
608                         if (IS_ERR(tl)) {
609                                 err = PTR_ERR(tl);
610                                 goto out;
611                         }
612
613                         intel_engine_pm_get(engine);
614                         rq = tl_write(tl, engine, count);
615                         intel_engine_pm_put(engine);
616                         if (IS_ERR(rq)) {
617                                 intel_timeline_put(tl);
618                                 err = PTR_ERR(rq);
619                                 goto out;
620                         }
621
622                         timelines[count++] = tl;
623                         i915_request_put(rq);
624                 }
625         }
626
627 out:
628         if (igt_flush_test(gt->i915))
629                 err = -EIO;
630
631         for (n = 0; n < count; n++) {
632                 struct intel_timeline *tl = timelines[n];
633
634                 if (!err && *tl->hwsp_seqno != n) {
635                         pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
636                                n, *tl->hwsp_seqno);
637                         err = -EINVAL;
638                 }
639                 intel_timeline_put(tl);
640         }
641
642         kvfree(timelines);
643         return err;
644 #undef NUM_TIMELINES
645 }
646
647 static int live_hwsp_wrap(void *arg)
648 {
649         struct intel_gt *gt = arg;
650         struct intel_engine_cs *engine;
651         struct intel_timeline *tl;
652         enum intel_engine_id id;
653         int err = 0;
654
655         /*
656          * Across a seqno wrap, we need to keep the old cacheline alive for
657          * foreign GPU references.
658          */
659
660         tl = intel_timeline_create(gt, NULL);
661         if (IS_ERR(tl))
662                 return PTR_ERR(tl);
663
664         if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
665                 goto out_free;
666
667         err = intel_timeline_pin(tl);
668         if (err)
669                 goto out_free;
670
671         for_each_engine(engine, gt, id) {
672                 const u32 *hwsp_seqno[2];
673                 struct i915_request *rq;
674                 u32 seqno[2];
675
676                 if (!intel_engine_can_store_dword(engine))
677                         continue;
678
679                 rq = intel_engine_create_kernel_request(engine);
680                 if (IS_ERR(rq)) {
681                         err = PTR_ERR(rq);
682                         goto out;
683                 }
684
685                 tl->seqno = -4u;
686
687                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
688                 err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
689                 mutex_unlock(&tl->mutex);
690                 if (err) {
691                         i915_request_add(rq);
692                         goto out;
693                 }
694                 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
695                          seqno[0], tl->hwsp_offset);
696
697                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
698                 if (err) {
699                         i915_request_add(rq);
700                         goto out;
701                 }
702                 hwsp_seqno[0] = tl->hwsp_seqno;
703
704                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
705                 err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
706                 mutex_unlock(&tl->mutex);
707                 if (err) {
708                         i915_request_add(rq);
709                         goto out;
710                 }
711                 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
712                          seqno[1], tl->hwsp_offset);
713
714                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
715                 if (err) {
716                         i915_request_add(rq);
717                         goto out;
718                 }
719                 hwsp_seqno[1] = tl->hwsp_seqno;
720
721                 /* With wrap should come a new hwsp */
722                 GEM_BUG_ON(seqno[1] >= seqno[0]);
723                 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
724
725                 i915_request_add(rq);
726
727                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
728                         pr_err("Wait for timeline writes timed out!\n");
729                         err = -EIO;
730                         goto out;
731                 }
732
733                 if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
734                         pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
735                                *hwsp_seqno[0], *hwsp_seqno[1],
736                                seqno[0], seqno[1]);
737                         err = -EINVAL;
738                         goto out;
739                 }
740
741                 intel_gt_retire_requests(gt); /* recycle HWSP */
742         }
743
744 out:
745         if (igt_flush_test(gt->i915))
746                 err = -EIO;
747
748         intel_timeline_unpin(tl);
749 out_free:
750         intel_timeline_put(tl);
751         return err;
752 }
753
754 static void engine_heartbeat_disable(struct intel_engine_cs *engine)
755 {
756         engine->props.heartbeat_interval_ms = 0;
757
758         intel_engine_pm_get(engine);
759         intel_engine_park_heartbeat(engine);
760 }
761
762 static void engine_heartbeat_enable(struct intel_engine_cs *engine)
763 {
764         intel_engine_pm_put(engine);
765
766         engine->props.heartbeat_interval_ms =
767                 engine->defaults.heartbeat_interval_ms;
768 }
769
770 static int live_hwsp_rollover_kernel(void *arg)
771 {
772         struct intel_gt *gt = arg;
773         struct intel_engine_cs *engine;
774         enum intel_engine_id id;
775         int err = 0;
776
777         /*
778          * Run the host for long enough, and even the kernel context will
779          * see a seqno rollover.
780          */
781
782         for_each_engine(engine, gt, id) {
783                 struct intel_context *ce = engine->kernel_context;
784                 struct intel_timeline *tl = ce->timeline;
785                 struct i915_request *rq[3] = {};
786                 int i;
787
788                 engine_heartbeat_disable(engine);
789                 if (intel_gt_wait_for_idle(gt, HZ / 2)) {
790                         err = -EIO;
791                         goto out;
792                 }
793
794                 GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
795                 tl->seqno = 0;
796                 timeline_rollback(tl);
797                 timeline_rollback(tl);
798                 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
799
800                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
801                         struct i915_request *this;
802
803                         this = i915_request_create(ce);
804                         if (IS_ERR(this)) {
805                                 err = PTR_ERR(this);
806                                 goto out;
807                         }
808
809                         pr_debug("%s: create fence.seqnp:%d\n",
810                                  engine->name,
811                                  lower_32_bits(this->fence.seqno));
812
813                         GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
814
815                         rq[i] = i915_request_get(this);
816                         i915_request_add(this);
817                 }
818
819                 /* We expected a wrap! */
820                 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
821
822                 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
823                         pr_err("Wait for timeline wrap timed out!\n");
824                         err = -EIO;
825                         goto out;
826                 }
827
828                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
829                         if (!i915_request_completed(rq[i])) {
830                                 pr_err("Pre-wrap request not completed!\n");
831                                 err = -EINVAL;
832                                 goto out;
833                         }
834                 }
835
836 out:
837                 for (i = 0; i < ARRAY_SIZE(rq); i++)
838                         i915_request_put(rq[i]);
839                 engine_heartbeat_enable(engine);
840                 if (err)
841                         break;
842         }
843
844         if (igt_flush_test(gt->i915))
845                 err = -EIO;
846
847         return err;
848 }
849
850 static int live_hwsp_rollover_user(void *arg)
851 {
852         struct intel_gt *gt = arg;
853         struct intel_engine_cs *engine;
854         enum intel_engine_id id;
855         int err = 0;
856
857         /*
858          * Simulate a long running user context, and force the seqno wrap
859          * on the user's timeline.
860          */
861
862         for_each_engine(engine, gt, id) {
863                 struct i915_request *rq[3] = {};
864                 struct intel_timeline *tl;
865                 struct intel_context *ce;
866                 int i;
867
868                 ce = intel_context_create(engine);
869                 if (IS_ERR(ce))
870                         return PTR_ERR(ce);
871
872                 err = intel_context_alloc_state(ce);
873                 if (err)
874                         goto out;
875
876                 tl = ce->timeline;
877                 if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
878                         goto out;
879
880                 timeline_rollback(tl);
881                 timeline_rollback(tl);
882                 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
883
884                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
885                         struct i915_request *this;
886
887                         this = intel_context_create_request(ce);
888                         if (IS_ERR(this)) {
889                                 err = PTR_ERR(this);
890                                 goto out;
891                         }
892
893                         pr_debug("%s: create fence.seqnp:%d\n",
894                                  engine->name,
895                                  lower_32_bits(this->fence.seqno));
896
897                         GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
898
899                         rq[i] = i915_request_get(this);
900                         i915_request_add(this);
901                 }
902
903                 /* We expected a wrap! */
904                 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
905
906                 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
907                         pr_err("Wait for timeline wrap timed out!\n");
908                         err = -EIO;
909                         goto out;
910                 }
911
912                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
913                         if (!i915_request_completed(rq[i])) {
914                                 pr_err("Pre-wrap request not completed!\n");
915                                 err = -EINVAL;
916                                 goto out;
917                         }
918                 }
919
920 out:
921                 for (i = 0; i < ARRAY_SIZE(rq); i++)
922                         i915_request_put(rq[i]);
923                 intel_context_put(ce);
924                 if (err)
925                         break;
926         }
927
928         if (igt_flush_test(gt->i915))
929                 err = -EIO;
930
931         return err;
932 }
933
934 static int live_hwsp_recycle(void *arg)
935 {
936         struct intel_gt *gt = arg;
937         struct intel_engine_cs *engine;
938         enum intel_engine_id id;
939         unsigned long count;
940         int err = 0;
941
942         /*
943          * Check seqno writes into one timeline at a time. We expect to
944          * recycle the breadcrumb slot between iterations and neither
945          * want to confuse ourselves or the GPU.
946          */
947
948         count = 0;
949         for_each_engine(engine, gt, id) {
950                 IGT_TIMEOUT(end_time);
951
952                 if (!intel_engine_can_store_dword(engine))
953                         continue;
954
955                 intel_engine_pm_get(engine);
956
957                 do {
958                         struct intel_timeline *tl;
959                         struct i915_request *rq;
960
961                         tl = checked_intel_timeline_create(gt);
962                         if (IS_ERR(tl)) {
963                                 err = PTR_ERR(tl);
964                                 break;
965                         }
966
967                         rq = tl_write(tl, engine, count);
968                         if (IS_ERR(rq)) {
969                                 intel_timeline_put(tl);
970                                 err = PTR_ERR(rq);
971                                 break;
972                         }
973
974                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
975                                 pr_err("Wait for timeline writes timed out!\n");
976                                 i915_request_put(rq);
977                                 intel_timeline_put(tl);
978                                 err = -EIO;
979                                 break;
980                         }
981
982                         if (*tl->hwsp_seqno != count) {
983                                 pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
984                                        count, *tl->hwsp_seqno);
985                                 err = -EINVAL;
986                         }
987
988                         i915_request_put(rq);
989                         intel_timeline_put(tl);
990                         count++;
991
992                         if (err)
993                                 break;
994                 } while (!__igt_timeout(end_time, NULL));
995
996                 intel_engine_pm_put(engine);
997                 if (err)
998                         break;
999         }
1000
1001         return err;
1002 }
1003
1004 int intel_timeline_live_selftests(struct drm_i915_private *i915)
1005 {
1006         static const struct i915_subtest tests[] = {
1007                 SUBTEST(live_hwsp_recycle),
1008                 SUBTEST(live_hwsp_engine),
1009                 SUBTEST(live_hwsp_alternate),
1010                 SUBTEST(live_hwsp_wrap),
1011                 SUBTEST(live_hwsp_rollover_kernel),
1012                 SUBTEST(live_hwsp_rollover_user),
1013         };
1014
1015         if (intel_gt_is_wedged(&i915->gt))
1016                 return 0;
1017
1018         return intel_gt_live_subtests(tests, &i915->gt);
1019 }