scsi: core: Run queue in case of I/O resource contention failure
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / selftest_timeline.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6
7 #include <linux/prime_numbers.h>
8
9 #include "intel_context.h"
10 #include "intel_engine_heartbeat.h"
11 #include "intel_engine_pm.h"
12 #include "intel_gt.h"
13 #include "intel_gt_requests.h"
14 #include "intel_ring.h"
15
16 #include "../selftests/i915_random.h"
17 #include "../i915_selftest.h"
18
19 #include "../selftests/igt_flush_test.h"
20 #include "../selftests/mock_gem_device.h"
21 #include "selftests/mock_timeline.h"
22
23 static struct page *hwsp_page(struct intel_timeline *tl)
24 {
25         struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
26
27         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
28         return sg_page(obj->mm.pages->sgl);
29 }
30
31 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
32 {
33         unsigned long address = (unsigned long)page_address(hwsp_page(tl));
34
35         return (address + tl->hwsp_offset) / CACHELINE_BYTES;
36 }
37
38 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
39
40 struct mock_hwsp_freelist {
41         struct intel_gt *gt;
42         struct radix_tree_root cachelines;
43         struct intel_timeline **history;
44         unsigned long count, max;
45         struct rnd_state prng;
46 };
47
48 enum {
49         SHUFFLE = BIT(0),
50 };
51
52 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
53                                unsigned int idx,
54                                struct intel_timeline *tl)
55 {
56         tl = xchg(&state->history[idx], tl);
57         if (tl) {
58                 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
59                 intel_timeline_put(tl);
60         }
61 }
62
63 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
64                                 unsigned int count,
65                                 unsigned int flags)
66 {
67         struct intel_timeline *tl;
68         unsigned int idx;
69
70         while (count--) {
71                 unsigned long cacheline;
72                 int err;
73
74                 tl = intel_timeline_create(state->gt, NULL);
75                 if (IS_ERR(tl))
76                         return PTR_ERR(tl);
77
78                 cacheline = hwsp_cacheline(tl);
79                 err = radix_tree_insert(&state->cachelines, cacheline, tl);
80                 if (err) {
81                         if (err == -EEXIST) {
82                                 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
83                                        cacheline);
84                         }
85                         intel_timeline_put(tl);
86                         return err;
87                 }
88
89                 idx = state->count++ % state->max;
90                 __mock_hwsp_record(state, idx, tl);
91         }
92
93         if (flags & SHUFFLE)
94                 i915_prandom_shuffle(state->history,
95                                      sizeof(*state->history),
96                                      min(state->count, state->max),
97                                      &state->prng);
98
99         count = i915_prandom_u32_max_state(min(state->count, state->max),
100                                            &state->prng);
101         while (count--) {
102                 idx = --state->count % state->max;
103                 __mock_hwsp_record(state, idx, NULL);
104         }
105
106         return 0;
107 }
108
109 static int mock_hwsp_freelist(void *arg)
110 {
111         struct mock_hwsp_freelist state;
112         struct drm_i915_private *i915;
113         const struct {
114                 const char *name;
115                 unsigned int flags;
116         } phases[] = {
117                 { "linear", 0 },
118                 { "shuffled", SHUFFLE },
119                 { },
120         }, *p;
121         unsigned int na;
122         int err = 0;
123
124         i915 = mock_gem_device();
125         if (!i915)
126                 return -ENOMEM;
127
128         INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
129         state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
130
131         state.gt = &i915->gt;
132
133         /*
134          * Create a bunch of timelines and check that their HWSP do not overlap.
135          * Free some, and try again.
136          */
137
138         state.max = PAGE_SIZE / sizeof(*state.history);
139         state.count = 0;
140         state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
141         if (!state.history) {
142                 err = -ENOMEM;
143                 goto err_put;
144         }
145
146         for (p = phases; p->name; p++) {
147                 pr_debug("%s(%s)\n", __func__, p->name);
148                 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
149                         err = __mock_hwsp_timeline(&state, na, p->flags);
150                         if (err)
151                                 goto out;
152                 }
153         }
154
155 out:
156         for (na = 0; na < state.max; na++)
157                 __mock_hwsp_record(&state, na, NULL);
158         kfree(state.history);
159 err_put:
160         drm_dev_put(&i915->drm);
161         return err;
162 }
163
164 struct __igt_sync {
165         const char *name;
166         u32 seqno;
167         bool expected;
168         bool set;
169 };
170
171 static int __igt_sync(struct intel_timeline *tl,
172                       u64 ctx,
173                       const struct __igt_sync *p,
174                       const char *name)
175 {
176         int ret;
177
178         if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
179                 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
180                        name, p->name, ctx, p->seqno, yesno(p->expected));
181                 return -EINVAL;
182         }
183
184         if (p->set) {
185                 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
186                 if (ret)
187                         return ret;
188         }
189
190         return 0;
191 }
192
193 static int igt_sync(void *arg)
194 {
195         const struct __igt_sync pass[] = {
196                 { "unset", 0, false, false },
197                 { "new", 0, false, true },
198                 { "0a", 0, true, true },
199                 { "1a", 1, false, true },
200                 { "1b", 1, true, true },
201                 { "0b", 0, true, false },
202                 { "2a", 2, false, true },
203                 { "4", 4, false, true },
204                 { "INT_MAX", INT_MAX, false, true },
205                 { "INT_MAX-1", INT_MAX-1, true, false },
206                 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
207                 { "INT_MAX", INT_MAX, true, false },
208                 { "UINT_MAX", UINT_MAX, false, true },
209                 { "wrap", 0, false, true },
210                 { "unwrap", UINT_MAX, true, false },
211                 {},
212         }, *p;
213         struct intel_timeline tl;
214         int order, offset;
215         int ret = -ENODEV;
216
217         mock_timeline_init(&tl, 0);
218         for (p = pass; p->name; p++) {
219                 for (order = 1; order < 64; order++) {
220                         for (offset = -1; offset <= (order > 1); offset++) {
221                                 u64 ctx = BIT_ULL(order) + offset;
222
223                                 ret = __igt_sync(&tl, ctx, p, "1");
224                                 if (ret)
225                                         goto out;
226                         }
227                 }
228         }
229         mock_timeline_fini(&tl);
230
231         mock_timeline_init(&tl, 0);
232         for (order = 1; order < 64; order++) {
233                 for (offset = -1; offset <= (order > 1); offset++) {
234                         u64 ctx = BIT_ULL(order) + offset;
235
236                         for (p = pass; p->name; p++) {
237                                 ret = __igt_sync(&tl, ctx, p, "2");
238                                 if (ret)
239                                         goto out;
240                         }
241                 }
242         }
243
244 out:
245         mock_timeline_fini(&tl);
246         return ret;
247 }
248
249 static unsigned int random_engine(struct rnd_state *rnd)
250 {
251         return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
252 }
253
254 static int bench_sync(void *arg)
255 {
256         struct rnd_state prng;
257         struct intel_timeline tl;
258         unsigned long end_time, count;
259         u64 prng32_1M;
260         ktime_t kt;
261         int order, last_order;
262
263         mock_timeline_init(&tl, 0);
264
265         /* Lookups from cache are very fast and so the random number generation
266          * and the loop itself becomes a significant factor in the per-iteration
267          * timings. We try to compensate the results by measuring the overhead
268          * of the prng and subtract it from the reported results.
269          */
270         prandom_seed_state(&prng, i915_selftest.random_seed);
271         count = 0;
272         kt = ktime_get();
273         end_time = jiffies + HZ/10;
274         do {
275                 u32 x;
276
277                 /* Make sure the compiler doesn't optimise away the prng call */
278                 WRITE_ONCE(x, prandom_u32_state(&prng));
279
280                 count++;
281         } while (!time_after(jiffies, end_time));
282         kt = ktime_sub(ktime_get(), kt);
283         pr_debug("%s: %lu random evaluations, %lluns/prng\n",
284                  __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
285         prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
286
287         /* Benchmark (only) setting random context ids */
288         prandom_seed_state(&prng, i915_selftest.random_seed);
289         count = 0;
290         kt = ktime_get();
291         end_time = jiffies + HZ/10;
292         do {
293                 u64 id = i915_prandom_u64_state(&prng);
294
295                 __intel_timeline_sync_set(&tl, id, 0);
296                 count++;
297         } while (!time_after(jiffies, end_time));
298         kt = ktime_sub(ktime_get(), kt);
299         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
300         pr_info("%s: %lu random insertions, %lluns/insert\n",
301                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
302
303         /* Benchmark looking up the exact same context ids as we just set */
304         prandom_seed_state(&prng, i915_selftest.random_seed);
305         end_time = count;
306         kt = ktime_get();
307         while (end_time--) {
308                 u64 id = i915_prandom_u64_state(&prng);
309
310                 if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
311                         mock_timeline_fini(&tl);
312                         pr_err("Lookup of %llu failed\n", id);
313                         return -EINVAL;
314                 }
315         }
316         kt = ktime_sub(ktime_get(), kt);
317         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
318         pr_info("%s: %lu random lookups, %lluns/lookup\n",
319                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
320
321         mock_timeline_fini(&tl);
322         cond_resched();
323
324         mock_timeline_init(&tl, 0);
325
326         /* Benchmark setting the first N (in order) contexts */
327         count = 0;
328         kt = ktime_get();
329         end_time = jiffies + HZ/10;
330         do {
331                 __intel_timeline_sync_set(&tl, count++, 0);
332         } while (!time_after(jiffies, end_time));
333         kt = ktime_sub(ktime_get(), kt);
334         pr_info("%s: %lu in-order insertions, %lluns/insert\n",
335                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
336
337         /* Benchmark looking up the exact same context ids as we just set */
338         end_time = count;
339         kt = ktime_get();
340         while (end_time--) {
341                 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
342                         pr_err("Lookup of %lu failed\n", end_time);
343                         mock_timeline_fini(&tl);
344                         return -EINVAL;
345                 }
346         }
347         kt = ktime_sub(ktime_get(), kt);
348         pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
349                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
350
351         mock_timeline_fini(&tl);
352         cond_resched();
353
354         mock_timeline_init(&tl, 0);
355
356         /* Benchmark searching for a random context id and maybe changing it */
357         prandom_seed_state(&prng, i915_selftest.random_seed);
358         count = 0;
359         kt = ktime_get();
360         end_time = jiffies + HZ/10;
361         do {
362                 u32 id = random_engine(&prng);
363                 u32 seqno = prandom_u32_state(&prng);
364
365                 if (!__intel_timeline_sync_is_later(&tl, id, seqno))
366                         __intel_timeline_sync_set(&tl, id, seqno);
367
368                 count++;
369         } while (!time_after(jiffies, end_time));
370         kt = ktime_sub(ktime_get(), kt);
371         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
372         pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
373                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
374         mock_timeline_fini(&tl);
375         cond_resched();
376
377         /* Benchmark searching for a known context id and changing the seqno */
378         for (last_order = 1, order = 1; order < 32;
379              ({ int tmp = last_order; last_order = order; order += tmp; })) {
380                 unsigned int mask = BIT(order) - 1;
381
382                 mock_timeline_init(&tl, 0);
383
384                 count = 0;
385                 kt = ktime_get();
386                 end_time = jiffies + HZ/10;
387                 do {
388                         /* Without assuming too many details of the underlying
389                          * implementation, try to identify its phase-changes
390                          * (if any)!
391                          */
392                         u64 id = (u64)(count & mask) << order;
393
394                         __intel_timeline_sync_is_later(&tl, id, 0);
395                         __intel_timeline_sync_set(&tl, id, 0);
396
397                         count++;
398                 } while (!time_after(jiffies, end_time));
399                 kt = ktime_sub(ktime_get(), kt);
400                 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
401                         __func__, count, order,
402                         (long long)div64_ul(ktime_to_ns(kt), count));
403                 mock_timeline_fini(&tl);
404                 cond_resched();
405         }
406
407         return 0;
408 }
409
410 int intel_timeline_mock_selftests(void)
411 {
412         static const struct i915_subtest tests[] = {
413                 SUBTEST(mock_hwsp_freelist),
414                 SUBTEST(igt_sync),
415                 SUBTEST(bench_sync),
416         };
417
418         return i915_subtests(tests, NULL);
419 }
420
421 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
422 {
423         u32 *cs;
424
425         cs = intel_ring_begin(rq, 4);
426         if (IS_ERR(cs))
427                 return PTR_ERR(cs);
428
429         if (INTEL_GEN(rq->i915) >= 8) {
430                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
431                 *cs++ = addr;
432                 *cs++ = 0;
433                 *cs++ = value;
434         } else if (INTEL_GEN(rq->i915) >= 4) {
435                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
436                 *cs++ = 0;
437                 *cs++ = addr;
438                 *cs++ = value;
439         } else {
440                 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
441                 *cs++ = addr;
442                 *cs++ = value;
443                 *cs++ = MI_NOOP;
444         }
445
446         intel_ring_advance(rq, cs);
447
448         return 0;
449 }
450
451 static struct i915_request *
452 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
453 {
454         struct i915_request *rq;
455         int err;
456
457         err = intel_timeline_pin(tl);
458         if (err) {
459                 rq = ERR_PTR(err);
460                 goto out;
461         }
462
463         rq = intel_engine_create_kernel_request(engine);
464         if (IS_ERR(rq))
465                 goto out_unpin;
466
467         i915_request_get(rq);
468
469         err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
470         i915_request_add(rq);
471         if (err) {
472                 i915_request_put(rq);
473                 rq = ERR_PTR(err);
474         }
475
476 out_unpin:
477         intel_timeline_unpin(tl);
478 out:
479         if (IS_ERR(rq))
480                 pr_err("Failed to write to timeline!\n");
481         return rq;
482 }
483
484 static struct intel_timeline *
485 checked_intel_timeline_create(struct intel_gt *gt)
486 {
487         struct intel_timeline *tl;
488
489         tl = intel_timeline_create(gt, NULL);
490         if (IS_ERR(tl))
491                 return tl;
492
493         if (*tl->hwsp_seqno != tl->seqno) {
494                 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
495                        *tl->hwsp_seqno, tl->seqno);
496                 intel_timeline_put(tl);
497                 return ERR_PTR(-EINVAL);
498         }
499
500         return tl;
501 }
502
503 static int live_hwsp_engine(void *arg)
504 {
505 #define NUM_TIMELINES 4096
506         struct intel_gt *gt = arg;
507         struct intel_timeline **timelines;
508         struct intel_engine_cs *engine;
509         enum intel_engine_id id;
510         unsigned long count, n;
511         int err = 0;
512
513         /*
514          * Create a bunch of timelines and check we can write
515          * independently to each of their breadcrumb slots.
516          */
517
518         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
519                                    sizeof(*timelines),
520                                    GFP_KERNEL);
521         if (!timelines)
522                 return -ENOMEM;
523
524         count = 0;
525         for_each_engine(engine, gt, id) {
526                 if (!intel_engine_can_store_dword(engine))
527                         continue;
528
529                 intel_engine_pm_get(engine);
530
531                 for (n = 0; n < NUM_TIMELINES; n++) {
532                         struct intel_timeline *tl;
533                         struct i915_request *rq;
534
535                         tl = checked_intel_timeline_create(gt);
536                         if (IS_ERR(tl)) {
537                                 err = PTR_ERR(tl);
538                                 break;
539                         }
540
541                         rq = tl_write(tl, engine, count);
542                         if (IS_ERR(rq)) {
543                                 intel_timeline_put(tl);
544                                 err = PTR_ERR(rq);
545                                 break;
546                         }
547
548                         timelines[count++] = tl;
549                         i915_request_put(rq);
550                 }
551
552                 intel_engine_pm_put(engine);
553                 if (err)
554                         break;
555         }
556
557         if (igt_flush_test(gt->i915))
558                 err = -EIO;
559
560         for (n = 0; n < count; n++) {
561                 struct intel_timeline *tl = timelines[n];
562
563                 if (!err && *tl->hwsp_seqno != n) {
564                         pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
565                                n, *tl->hwsp_seqno);
566                         err = -EINVAL;
567                 }
568                 intel_timeline_put(tl);
569         }
570
571         kvfree(timelines);
572         return err;
573 #undef NUM_TIMELINES
574 }
575
576 static int live_hwsp_alternate(void *arg)
577 {
578 #define NUM_TIMELINES 4096
579         struct intel_gt *gt = arg;
580         struct intel_timeline **timelines;
581         struct intel_engine_cs *engine;
582         enum intel_engine_id id;
583         unsigned long count, n;
584         int err = 0;
585
586         /*
587          * Create a bunch of timelines and check we can write
588          * independently to each of their breadcrumb slots with adjacent
589          * engines.
590          */
591
592         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
593                                    sizeof(*timelines),
594                                    GFP_KERNEL);
595         if (!timelines)
596                 return -ENOMEM;
597
598         count = 0;
599         for (n = 0; n < NUM_TIMELINES; n++) {
600                 for_each_engine(engine, gt, id) {
601                         struct intel_timeline *tl;
602                         struct i915_request *rq;
603
604                         if (!intel_engine_can_store_dword(engine))
605                                 continue;
606
607                         tl = checked_intel_timeline_create(gt);
608                         if (IS_ERR(tl)) {
609                                 err = PTR_ERR(tl);
610                                 goto out;
611                         }
612
613                         intel_engine_pm_get(engine);
614                         rq = tl_write(tl, engine, count);
615                         intel_engine_pm_put(engine);
616                         if (IS_ERR(rq)) {
617                                 intel_timeline_put(tl);
618                                 err = PTR_ERR(rq);
619                                 goto out;
620                         }
621
622                         timelines[count++] = tl;
623                         i915_request_put(rq);
624                 }
625         }
626
627 out:
628         if (igt_flush_test(gt->i915))
629                 err = -EIO;
630
631         for (n = 0; n < count; n++) {
632                 struct intel_timeline *tl = timelines[n];
633
634                 if (!err && *tl->hwsp_seqno != n) {
635                         pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
636                                n, *tl->hwsp_seqno);
637                         err = -EINVAL;
638                 }
639                 intel_timeline_put(tl);
640         }
641
642         kvfree(timelines);
643         return err;
644 #undef NUM_TIMELINES
645 }
646
647 static int live_hwsp_wrap(void *arg)
648 {
649         struct intel_gt *gt = arg;
650         struct intel_engine_cs *engine;
651         struct intel_timeline *tl;
652         enum intel_engine_id id;
653         int err = 0;
654
655         /*
656          * Across a seqno wrap, we need to keep the old cacheline alive for
657          * foreign GPU references.
658          */
659
660         tl = intel_timeline_create(gt, NULL);
661         if (IS_ERR(tl))
662                 return PTR_ERR(tl);
663
664         if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
665                 goto out_free;
666
667         err = intel_timeline_pin(tl);
668         if (err)
669                 goto out_free;
670
671         for_each_engine(engine, gt, id) {
672                 const u32 *hwsp_seqno[2];
673                 struct i915_request *rq;
674                 u32 seqno[2];
675
676                 if (!intel_engine_can_store_dword(engine))
677                         continue;
678
679                 rq = intel_engine_create_kernel_request(engine);
680                 if (IS_ERR(rq)) {
681                         err = PTR_ERR(rq);
682                         goto out;
683                 }
684
685                 tl->seqno = -4u;
686
687                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
688                 err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
689                 mutex_unlock(&tl->mutex);
690                 if (err) {
691                         i915_request_add(rq);
692                         goto out;
693                 }
694                 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
695                          seqno[0], tl->hwsp_offset);
696
697                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
698                 if (err) {
699                         i915_request_add(rq);
700                         goto out;
701                 }
702                 hwsp_seqno[0] = tl->hwsp_seqno;
703
704                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
705                 err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
706                 mutex_unlock(&tl->mutex);
707                 if (err) {
708                         i915_request_add(rq);
709                         goto out;
710                 }
711                 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
712                          seqno[1], tl->hwsp_offset);
713
714                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
715                 if (err) {
716                         i915_request_add(rq);
717                         goto out;
718                 }
719                 hwsp_seqno[1] = tl->hwsp_seqno;
720
721                 /* With wrap should come a new hwsp */
722                 GEM_BUG_ON(seqno[1] >= seqno[0]);
723                 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
724
725                 i915_request_add(rq);
726
727                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
728                         pr_err("Wait for timeline writes timed out!\n");
729                         err = -EIO;
730                         goto out;
731                 }
732
733                 if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
734                         pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
735                                *hwsp_seqno[0], *hwsp_seqno[1],
736                                seqno[0], seqno[1]);
737                         err = -EINVAL;
738                         goto out;
739                 }
740
741                 intel_gt_retire_requests(gt); /* recycle HWSP */
742         }
743
744 out:
745         if (igt_flush_test(gt->i915))
746                 err = -EIO;
747
748         intel_timeline_unpin(tl);
749 out_free:
750         intel_timeline_put(tl);
751         return err;
752 }
753
754 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
755                                      unsigned long *saved)
756 {
757         *saved = engine->props.heartbeat_interval_ms;
758         engine->props.heartbeat_interval_ms = 0;
759
760         intel_engine_pm_get(engine);
761         intel_engine_park_heartbeat(engine);
762 }
763
764 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
765                                     unsigned long saved)
766 {
767         intel_engine_pm_put(engine);
768
769         engine->props.heartbeat_interval_ms = saved;
770 }
771
772 static int live_hwsp_rollover_kernel(void *arg)
773 {
774         struct intel_gt *gt = arg;
775         struct intel_engine_cs *engine;
776         enum intel_engine_id id;
777         int err = 0;
778
779         /*
780          * Run the host for long enough, and even the kernel context will
781          * see a seqno rollover.
782          */
783
784         for_each_engine(engine, gt, id) {
785                 struct intel_context *ce = engine->kernel_context;
786                 struct intel_timeline *tl = ce->timeline;
787                 struct i915_request *rq[3] = {};
788                 unsigned long heartbeat;
789                 int i;
790
791                 engine_heartbeat_disable(engine, &heartbeat);
792                 if (intel_gt_wait_for_idle(gt, HZ / 2)) {
793                         err = -EIO;
794                         goto out;
795                 }
796
797                 GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
798                 tl->seqno = 0;
799                 timeline_rollback(tl);
800                 timeline_rollback(tl);
801                 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
802
803                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
804                         struct i915_request *this;
805
806                         this = i915_request_create(ce);
807                         if (IS_ERR(this)) {
808                                 err = PTR_ERR(this);
809                                 goto out;
810                         }
811
812                         pr_debug("%s: create fence.seqnp:%d\n",
813                                  engine->name,
814                                  lower_32_bits(this->fence.seqno));
815
816                         GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
817
818                         rq[i] = i915_request_get(this);
819                         i915_request_add(this);
820                 }
821
822                 /* We expected a wrap! */
823                 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
824
825                 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
826                         pr_err("Wait for timeline wrap timed out!\n");
827                         err = -EIO;
828                         goto out;
829                 }
830
831                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
832                         if (!i915_request_completed(rq[i])) {
833                                 pr_err("Pre-wrap request not completed!\n");
834                                 err = -EINVAL;
835                                 goto out;
836                         }
837                 }
838
839 out:
840                 for (i = 0; i < ARRAY_SIZE(rq); i++)
841                         i915_request_put(rq[i]);
842                 engine_heartbeat_enable(engine, heartbeat);
843                 if (err)
844                         break;
845         }
846
847         if (igt_flush_test(gt->i915))
848                 err = -EIO;
849
850         return err;
851 }
852
853 static int live_hwsp_rollover_user(void *arg)
854 {
855         struct intel_gt *gt = arg;
856         struct intel_engine_cs *engine;
857         enum intel_engine_id id;
858         int err = 0;
859
860         /*
861          * Simulate a long running user context, and force the seqno wrap
862          * on the user's timeline.
863          */
864
865         for_each_engine(engine, gt, id) {
866                 struct i915_request *rq[3] = {};
867                 struct intel_timeline *tl;
868                 struct intel_context *ce;
869                 int i;
870
871                 ce = intel_context_create(engine);
872                 if (IS_ERR(ce))
873                         return PTR_ERR(ce);
874
875                 err = intel_context_alloc_state(ce);
876                 if (err)
877                         goto out;
878
879                 tl = ce->timeline;
880                 if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
881                         goto out;
882
883                 timeline_rollback(tl);
884                 timeline_rollback(tl);
885                 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
886
887                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
888                         struct i915_request *this;
889
890                         this = intel_context_create_request(ce);
891                         if (IS_ERR(this)) {
892                                 err = PTR_ERR(this);
893                                 goto out;
894                         }
895
896                         pr_debug("%s: create fence.seqnp:%d\n",
897                                  engine->name,
898                                  lower_32_bits(this->fence.seqno));
899
900                         GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
901
902                         rq[i] = i915_request_get(this);
903                         i915_request_add(this);
904                 }
905
906                 /* We expected a wrap! */
907                 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
908
909                 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
910                         pr_err("Wait for timeline wrap timed out!\n");
911                         err = -EIO;
912                         goto out;
913                 }
914
915                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
916                         if (!i915_request_completed(rq[i])) {
917                                 pr_err("Pre-wrap request not completed!\n");
918                                 err = -EINVAL;
919                                 goto out;
920                         }
921                 }
922
923 out:
924                 for (i = 0; i < ARRAY_SIZE(rq); i++)
925                         i915_request_put(rq[i]);
926                 intel_context_put(ce);
927                 if (err)
928                         break;
929         }
930
931         if (igt_flush_test(gt->i915))
932                 err = -EIO;
933
934         return err;
935 }
936
937 static int live_hwsp_recycle(void *arg)
938 {
939         struct intel_gt *gt = arg;
940         struct intel_engine_cs *engine;
941         enum intel_engine_id id;
942         unsigned long count;
943         int err = 0;
944
945         /*
946          * Check seqno writes into one timeline at a time. We expect to
947          * recycle the breadcrumb slot between iterations and neither
948          * want to confuse ourselves or the GPU.
949          */
950
951         count = 0;
952         for_each_engine(engine, gt, id) {
953                 IGT_TIMEOUT(end_time);
954
955                 if (!intel_engine_can_store_dword(engine))
956                         continue;
957
958                 intel_engine_pm_get(engine);
959
960                 do {
961                         struct intel_timeline *tl;
962                         struct i915_request *rq;
963
964                         tl = checked_intel_timeline_create(gt);
965                         if (IS_ERR(tl)) {
966                                 err = PTR_ERR(tl);
967                                 break;
968                         }
969
970                         rq = tl_write(tl, engine, count);
971                         if (IS_ERR(rq)) {
972                                 intel_timeline_put(tl);
973                                 err = PTR_ERR(rq);
974                                 break;
975                         }
976
977                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
978                                 pr_err("Wait for timeline writes timed out!\n");
979                                 i915_request_put(rq);
980                                 intel_timeline_put(tl);
981                                 err = -EIO;
982                                 break;
983                         }
984
985                         if (*tl->hwsp_seqno != count) {
986                                 pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
987                                        count, *tl->hwsp_seqno);
988                                 err = -EINVAL;
989                         }
990
991                         i915_request_put(rq);
992                         intel_timeline_put(tl);
993                         count++;
994
995                         if (err)
996                                 break;
997                 } while (!__igt_timeout(end_time, NULL));
998
999                 intel_engine_pm_put(engine);
1000                 if (err)
1001                         break;
1002         }
1003
1004         return err;
1005 }
1006
1007 int intel_timeline_live_selftests(struct drm_i915_private *i915)
1008 {
1009         static const struct i915_subtest tests[] = {
1010                 SUBTEST(live_hwsp_recycle),
1011                 SUBTEST(live_hwsp_engine),
1012                 SUBTEST(live_hwsp_alternate),
1013                 SUBTEST(live_hwsp_wrap),
1014                 SUBTEST(live_hwsp_rollover_kernel),
1015                 SUBTEST(live_hwsp_rollover_user),
1016         };
1017
1018         if (intel_gt_is_wedged(&i915->gt))
1019                 return 0;
1020
1021         return intel_gt_live_subtests(tests, &i915->gt);
1022 }