Merge tag 'for-5.13-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / selftest_execlists.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5
6 #include <linux/prime_numbers.h>
7
8 #include "gem/i915_gem_pm.h"
9 #include "gt/intel_engine_heartbeat.h"
10 #include "gt/intel_reset.h"
11 #include "gt/selftest_engine_heartbeat.h"
12
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR 16
25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
26
27 static bool is_active(struct i915_request *rq)
28 {
29         if (i915_request_is_active(rq))
30                 return true;
31
32         if (i915_request_on_hold(rq))
33                 return true;
34
35         if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
36                 return true;
37
38         return false;
39 }
40
41 static int wait_for_submit(struct intel_engine_cs *engine,
42                            struct i915_request *rq,
43                            unsigned long timeout)
44 {
45         /* Ignore our own attempts to suppress excess tasklets */
46         tasklet_hi_schedule(&engine->execlists.tasklet);
47
48         timeout += jiffies;
49         do {
50                 bool done = time_after(jiffies, timeout);
51
52                 if (i915_request_completed(rq)) /* that was quick! */
53                         return 0;
54
55                 /* Wait until the HW has acknowleged the submission (or err) */
56                 intel_engine_flush_submission(engine);
57                 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
58                         return 0;
59
60                 if (done)
61                         return -ETIME;
62
63                 cond_resched();
64         } while (1);
65 }
66
67 static int wait_for_reset(struct intel_engine_cs *engine,
68                           struct i915_request *rq,
69                           unsigned long timeout)
70 {
71         timeout += jiffies;
72
73         do {
74                 cond_resched();
75                 intel_engine_flush_submission(engine);
76
77                 if (READ_ONCE(engine->execlists.pending[0]))
78                         continue;
79
80                 if (i915_request_completed(rq))
81                         break;
82
83                 if (READ_ONCE(rq->fence.error))
84                         break;
85         } while (time_before(jiffies, timeout));
86
87         flush_scheduled_work();
88
89         if (rq->fence.error != -EIO) {
90                 pr_err("%s: hanging request %llx:%lld not reset\n",
91                        engine->name,
92                        rq->fence.context,
93                        rq->fence.seqno);
94                 return -EINVAL;
95         }
96
97         /* Give the request a jiffie to complete after flushing the worker */
98         if (i915_request_wait(rq, 0,
99                               max(0l, (long)(timeout - jiffies)) + 1) < 0) {
100                 pr_err("%s: hanging request %llx:%lld did not complete\n",
101                        engine->name,
102                        rq->fence.context,
103                        rq->fence.seqno);
104                 return -ETIME;
105         }
106
107         return 0;
108 }
109
110 static int live_sanitycheck(void *arg)
111 {
112         struct intel_gt *gt = arg;
113         struct intel_engine_cs *engine;
114         enum intel_engine_id id;
115         struct igt_spinner spin;
116         int err = 0;
117
118         if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
119                 return 0;
120
121         if (igt_spinner_init(&spin, gt))
122                 return -ENOMEM;
123
124         for_each_engine(engine, gt, id) {
125                 struct intel_context *ce;
126                 struct i915_request *rq;
127
128                 ce = intel_context_create(engine);
129                 if (IS_ERR(ce)) {
130                         err = PTR_ERR(ce);
131                         break;
132                 }
133
134                 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
135                 if (IS_ERR(rq)) {
136                         err = PTR_ERR(rq);
137                         goto out_ctx;
138                 }
139
140                 i915_request_add(rq);
141                 if (!igt_wait_for_spinner(&spin, rq)) {
142                         GEM_TRACE("spinner failed to start\n");
143                         GEM_TRACE_DUMP();
144                         intel_gt_set_wedged(gt);
145                         err = -EIO;
146                         goto out_ctx;
147                 }
148
149                 igt_spinner_end(&spin);
150                 if (igt_flush_test(gt->i915)) {
151                         err = -EIO;
152                         goto out_ctx;
153                 }
154
155 out_ctx:
156                 intel_context_put(ce);
157                 if (err)
158                         break;
159         }
160
161         igt_spinner_fini(&spin);
162         return err;
163 }
164
165 static int live_unlite_restore(struct intel_gt *gt, int prio)
166 {
167         struct intel_engine_cs *engine;
168         enum intel_engine_id id;
169         struct igt_spinner spin;
170         int err = -ENOMEM;
171
172         /*
173          * Check that we can correctly context switch between 2 instances
174          * on the same engine from the same parent context.
175          */
176
177         if (igt_spinner_init(&spin, gt))
178                 return err;
179
180         err = 0;
181         for_each_engine(engine, gt, id) {
182                 struct intel_context *ce[2] = {};
183                 struct i915_request *rq[2];
184                 struct igt_live_test t;
185                 int n;
186
187                 if (prio && !intel_engine_has_preemption(engine))
188                         continue;
189
190                 if (!intel_engine_can_store_dword(engine))
191                         continue;
192
193                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
194                         err = -EIO;
195                         break;
196                 }
197                 st_engine_heartbeat_disable(engine);
198
199                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
200                         struct intel_context *tmp;
201
202                         tmp = intel_context_create(engine);
203                         if (IS_ERR(tmp)) {
204                                 err = PTR_ERR(tmp);
205                                 goto err_ce;
206                         }
207
208                         err = intel_context_pin(tmp);
209                         if (err) {
210                                 intel_context_put(tmp);
211                                 goto err_ce;
212                         }
213
214                         /*
215                          * Setup the pair of contexts such that if we
216                          * lite-restore using the RING_TAIL from ce[1] it
217                          * will execute garbage from ce[0]->ring.
218                          */
219                         memset(tmp->ring->vaddr,
220                                POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
221                                tmp->ring->vma->size);
222
223                         ce[n] = tmp;
224                 }
225                 GEM_BUG_ON(!ce[1]->ring->size);
226                 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
227                 lrc_update_regs(ce[1], engine, ce[1]->ring->head);
228
229                 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
230                 if (IS_ERR(rq[0])) {
231                         err = PTR_ERR(rq[0]);
232                         goto err_ce;
233                 }
234
235                 i915_request_get(rq[0]);
236                 i915_request_add(rq[0]);
237                 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
238
239                 if (!igt_wait_for_spinner(&spin, rq[0])) {
240                         i915_request_put(rq[0]);
241                         goto err_ce;
242                 }
243
244                 rq[1] = i915_request_create(ce[1]);
245                 if (IS_ERR(rq[1])) {
246                         err = PTR_ERR(rq[1]);
247                         i915_request_put(rq[0]);
248                         goto err_ce;
249                 }
250
251                 if (!prio) {
252                         /*
253                          * Ensure we do the switch to ce[1] on completion.
254                          *
255                          * rq[0] is already submitted, so this should reduce
256                          * to a no-op (a wait on a request on the same engine
257                          * uses the submit fence, not the completion fence),
258                          * but it will install a dependency on rq[1] for rq[0]
259                          * that will prevent the pair being reordered by
260                          * timeslicing.
261                          */
262                         i915_request_await_dma_fence(rq[1], &rq[0]->fence);
263                 }
264
265                 i915_request_get(rq[1]);
266                 i915_request_add(rq[1]);
267                 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
268                 i915_request_put(rq[0]);
269
270                 if (prio) {
271                         struct i915_sched_attr attr = {
272                                 .priority = prio,
273                         };
274
275                         /* Alternatively preempt the spinner with ce[1] */
276                         engine->schedule(rq[1], &attr);
277                 }
278
279                 /* And switch back to ce[0] for good measure */
280                 rq[0] = i915_request_create(ce[0]);
281                 if (IS_ERR(rq[0])) {
282                         err = PTR_ERR(rq[0]);
283                         i915_request_put(rq[1]);
284                         goto err_ce;
285                 }
286
287                 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
288                 i915_request_get(rq[0]);
289                 i915_request_add(rq[0]);
290                 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
291                 i915_request_put(rq[1]);
292                 i915_request_put(rq[0]);
293
294 err_ce:
295                 intel_engine_flush_submission(engine);
296                 igt_spinner_end(&spin);
297                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
298                         if (IS_ERR_OR_NULL(ce[n]))
299                                 break;
300
301                         intel_context_unpin(ce[n]);
302                         intel_context_put(ce[n]);
303                 }
304
305                 st_engine_heartbeat_enable(engine);
306                 if (igt_live_test_end(&t))
307                         err = -EIO;
308                 if (err)
309                         break;
310         }
311
312         igt_spinner_fini(&spin);
313         return err;
314 }
315
316 static int live_unlite_switch(void *arg)
317 {
318         return live_unlite_restore(arg, 0);
319 }
320
321 static int live_unlite_preempt(void *arg)
322 {
323         return live_unlite_restore(arg, I915_PRIORITY_MAX);
324 }
325
326 static int live_unlite_ring(void *arg)
327 {
328         struct intel_gt *gt = arg;
329         struct intel_engine_cs *engine;
330         struct igt_spinner spin;
331         enum intel_engine_id id;
332         int err = 0;
333
334         /*
335          * Setup a preemption event that will cause almost the entire ring
336          * to be unwound, potentially fooling our intel_ring_direction()
337          * into emitting a forward lite-restore instead of the rollback.
338          */
339
340         if (igt_spinner_init(&spin, gt))
341                 return -ENOMEM;
342
343         for_each_engine(engine, gt, id) {
344                 struct intel_context *ce[2] = {};
345                 struct i915_request *rq;
346                 struct igt_live_test t;
347                 int n;
348
349                 if (!intel_engine_has_preemption(engine))
350                         continue;
351
352                 if (!intel_engine_can_store_dword(engine))
353                         continue;
354
355                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
356                         err = -EIO;
357                         break;
358                 }
359                 st_engine_heartbeat_disable(engine);
360
361                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
362                         struct intel_context *tmp;
363
364                         tmp = intel_context_create(engine);
365                         if (IS_ERR(tmp)) {
366                                 err = PTR_ERR(tmp);
367                                 goto err_ce;
368                         }
369
370                         err = intel_context_pin(tmp);
371                         if (err) {
372                                 intel_context_put(tmp);
373                                 goto err_ce;
374                         }
375
376                         memset32(tmp->ring->vaddr,
377                                  0xdeadbeef, /* trigger a hang if executed */
378                                  tmp->ring->vma->size / sizeof(u32));
379
380                         ce[n] = tmp;
381                 }
382
383                 /* Create max prio spinner, followed by N low prio nops */
384                 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
385                 if (IS_ERR(rq)) {
386                         err = PTR_ERR(rq);
387                         goto err_ce;
388                 }
389
390                 i915_request_get(rq);
391                 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
392                 i915_request_add(rq);
393
394                 if (!igt_wait_for_spinner(&spin, rq)) {
395                         intel_gt_set_wedged(gt);
396                         i915_request_put(rq);
397                         err = -ETIME;
398                         goto err_ce;
399                 }
400
401                 /* Fill the ring, until we will cause a wrap */
402                 n = 0;
403                 while (intel_ring_direction(ce[0]->ring,
404                                             rq->wa_tail,
405                                             ce[0]->ring->tail) <= 0) {
406                         struct i915_request *tmp;
407
408                         tmp = intel_context_create_request(ce[0]);
409                         if (IS_ERR(tmp)) {
410                                 err = PTR_ERR(tmp);
411                                 i915_request_put(rq);
412                                 goto err_ce;
413                         }
414
415                         i915_request_add(tmp);
416                         intel_engine_flush_submission(engine);
417                         n++;
418                 }
419                 intel_engine_flush_submission(engine);
420                 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
421                          engine->name, n,
422                          ce[0]->ring->size,
423                          ce[0]->ring->tail,
424                          ce[0]->ring->emit,
425                          rq->tail);
426                 GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
427                                                 rq->tail,
428                                                 ce[0]->ring->tail) <= 0);
429                 i915_request_put(rq);
430
431                 /* Create a second ring to preempt the first ring after rq[0] */
432                 rq = intel_context_create_request(ce[1]);
433                 if (IS_ERR(rq)) {
434                         err = PTR_ERR(rq);
435                         goto err_ce;
436                 }
437
438                 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
439                 i915_request_get(rq);
440                 i915_request_add(rq);
441
442                 err = wait_for_submit(engine, rq, HZ / 2);
443                 i915_request_put(rq);
444                 if (err) {
445                         pr_err("%s: preemption request was not submitted\n",
446                                engine->name);
447                         err = -ETIME;
448                 }
449
450                 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
451                          engine->name,
452                          ce[0]->ring->tail, ce[0]->ring->emit,
453                          ce[1]->ring->tail, ce[1]->ring->emit);
454
455 err_ce:
456                 intel_engine_flush_submission(engine);
457                 igt_spinner_end(&spin);
458                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
459                         if (IS_ERR_OR_NULL(ce[n]))
460                                 break;
461
462                         intel_context_unpin(ce[n]);
463                         intel_context_put(ce[n]);
464                 }
465                 st_engine_heartbeat_enable(engine);
466                 if (igt_live_test_end(&t))
467                         err = -EIO;
468                 if (err)
469                         break;
470         }
471
472         igt_spinner_fini(&spin);
473         return err;
474 }
475
476 static int live_pin_rewind(void *arg)
477 {
478         struct intel_gt *gt = arg;
479         struct intel_engine_cs *engine;
480         enum intel_engine_id id;
481         int err = 0;
482
483         /*
484          * We have to be careful not to trust intel_ring too much, for example
485          * ring->head is updated upon retire which is out of sync with pinning
486          * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
487          * or else we risk writing an older, stale value.
488          *
489          * To simulate this, let's apply a bit of deliberate sabotague.
490          */
491
492         for_each_engine(engine, gt, id) {
493                 struct intel_context *ce;
494                 struct i915_request *rq;
495                 struct intel_ring *ring;
496                 struct igt_live_test t;
497
498                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
499                         err = -EIO;
500                         break;
501                 }
502
503                 ce = intel_context_create(engine);
504                 if (IS_ERR(ce)) {
505                         err = PTR_ERR(ce);
506                         break;
507                 }
508
509                 err = intel_context_pin(ce);
510                 if (err) {
511                         intel_context_put(ce);
512                         break;
513                 }
514
515                 /* Keep the context awake while we play games */
516                 err = i915_active_acquire(&ce->active);
517                 if (err) {
518                         intel_context_unpin(ce);
519                         intel_context_put(ce);
520                         break;
521                 }
522                 ring = ce->ring;
523
524                 /* Poison the ring, and offset the next request from HEAD */
525                 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
526                 ring->emit = ring->size / 2;
527                 ring->tail = ring->emit;
528                 GEM_BUG_ON(ring->head);
529
530                 intel_context_unpin(ce);
531
532                 /* Submit a simple nop request */
533                 GEM_BUG_ON(intel_context_is_pinned(ce));
534                 rq = intel_context_create_request(ce);
535                 i915_active_release(&ce->active); /* e.g. async retire */
536                 intel_context_put(ce);
537                 if (IS_ERR(rq)) {
538                         err = PTR_ERR(rq);
539                         break;
540                 }
541                 GEM_BUG_ON(!rq->head);
542                 i915_request_add(rq);
543
544                 /* Expect not to hang! */
545                 if (igt_live_test_end(&t)) {
546                         err = -EIO;
547                         break;
548                 }
549         }
550
551         return err;
552 }
553
554 static int live_hold_reset(void *arg)
555 {
556         struct intel_gt *gt = arg;
557         struct intel_engine_cs *engine;
558         enum intel_engine_id id;
559         struct igt_spinner spin;
560         int err = 0;
561
562         /*
563          * In order to support offline error capture for fast preempt reset,
564          * we need to decouple the guilty request and ensure that it and its
565          * descendents are not executed while the capture is in progress.
566          */
567
568         if (!intel_has_reset_engine(gt))
569                 return 0;
570
571         if (igt_spinner_init(&spin, gt))
572                 return -ENOMEM;
573
574         for_each_engine(engine, gt, id) {
575                 struct intel_context *ce;
576                 struct i915_request *rq;
577
578                 ce = intel_context_create(engine);
579                 if (IS_ERR(ce)) {
580                         err = PTR_ERR(ce);
581                         break;
582                 }
583
584                 st_engine_heartbeat_disable(engine);
585
586                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
587                 if (IS_ERR(rq)) {
588                         err = PTR_ERR(rq);
589                         goto out;
590                 }
591                 i915_request_add(rq);
592
593                 if (!igt_wait_for_spinner(&spin, rq)) {
594                         intel_gt_set_wedged(gt);
595                         err = -ETIME;
596                         goto out;
597                 }
598
599                 /* We have our request executing, now remove it and reset */
600
601                 local_bh_disable();
602                 if (test_and_set_bit(I915_RESET_ENGINE + id,
603                                      &gt->reset.flags)) {
604                         local_bh_enable();
605                         intel_gt_set_wedged(gt);
606                         err = -EBUSY;
607                         goto out;
608                 }
609                 tasklet_disable(&engine->execlists.tasklet);
610
611                 engine->execlists.tasklet.callback(&engine->execlists.tasklet);
612                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
613
614                 i915_request_get(rq);
615                 execlists_hold(engine, rq);
616                 GEM_BUG_ON(!i915_request_on_hold(rq));
617
618                 __intel_engine_reset_bh(engine, NULL);
619                 GEM_BUG_ON(rq->fence.error != -EIO);
620
621                 tasklet_enable(&engine->execlists.tasklet);
622                 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
623                                       &gt->reset.flags);
624                 local_bh_enable();
625
626                 /* Check that we do not resubmit the held request */
627                 if (!i915_request_wait(rq, 0, HZ / 5)) {
628                         pr_err("%s: on hold request completed!\n",
629                                engine->name);
630                         i915_request_put(rq);
631                         err = -EIO;
632                         goto out;
633                 }
634                 GEM_BUG_ON(!i915_request_on_hold(rq));
635
636                 /* But is resubmitted on release */
637                 execlists_unhold(engine, rq);
638                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
639                         pr_err("%s: held request did not complete!\n",
640                                engine->name);
641                         intel_gt_set_wedged(gt);
642                         err = -ETIME;
643                 }
644                 i915_request_put(rq);
645
646 out:
647                 st_engine_heartbeat_enable(engine);
648                 intel_context_put(ce);
649                 if (err)
650                         break;
651         }
652
653         igt_spinner_fini(&spin);
654         return err;
655 }
656
657 static const char *error_repr(int err)
658 {
659         return err ? "bad" : "good";
660 }
661
662 static int live_error_interrupt(void *arg)
663 {
664         static const struct error_phase {
665                 enum { GOOD = 0, BAD = -EIO } error[2];
666         } phases[] = {
667                 { { BAD,  GOOD } },
668                 { { BAD,  BAD  } },
669                 { { BAD,  GOOD } },
670                 { { GOOD, GOOD } }, /* sentinel */
671         };
672         struct intel_gt *gt = arg;
673         struct intel_engine_cs *engine;
674         enum intel_engine_id id;
675
676         /*
677          * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
678          * of invalid commands in user batches that will cause a GPU hang.
679          * This is a faster mechanism than using hangcheck/heartbeats, but
680          * only detects problems the HW knows about -- it will not warn when
681          * we kill the HW!
682          *
683          * To verify our detection and reset, we throw some invalid commands
684          * at the HW and wait for the interrupt.
685          */
686
687         if (!intel_has_reset_engine(gt))
688                 return 0;
689
690         for_each_engine(engine, gt, id) {
691                 const struct error_phase *p;
692                 int err = 0;
693
694                 st_engine_heartbeat_disable(engine);
695
696                 for (p = phases; p->error[0] != GOOD; p++) {
697                         struct i915_request *client[ARRAY_SIZE(phases->error)];
698                         u32 *cs;
699                         int i;
700
701                         memset(client, 0, sizeof(*client));
702                         for (i = 0; i < ARRAY_SIZE(client); i++) {
703                                 struct intel_context *ce;
704                                 struct i915_request *rq;
705
706                                 ce = intel_context_create(engine);
707                                 if (IS_ERR(ce)) {
708                                         err = PTR_ERR(ce);
709                                         goto out;
710                                 }
711
712                                 rq = intel_context_create_request(ce);
713                                 intel_context_put(ce);
714                                 if (IS_ERR(rq)) {
715                                         err = PTR_ERR(rq);
716                                         goto out;
717                                 }
718
719                                 if (rq->engine->emit_init_breadcrumb) {
720                                         err = rq->engine->emit_init_breadcrumb(rq);
721                                         if (err) {
722                                                 i915_request_add(rq);
723                                                 goto out;
724                                         }
725                                 }
726
727                                 cs = intel_ring_begin(rq, 2);
728                                 if (IS_ERR(cs)) {
729                                         i915_request_add(rq);
730                                         err = PTR_ERR(cs);
731                                         goto out;
732                                 }
733
734                                 if (p->error[i]) {
735                                         *cs++ = 0xdeadbeef;
736                                         *cs++ = 0xdeadbeef;
737                                 } else {
738                                         *cs++ = MI_NOOP;
739                                         *cs++ = MI_NOOP;
740                                 }
741
742                                 client[i] = i915_request_get(rq);
743                                 i915_request_add(rq);
744                         }
745
746                         err = wait_for_submit(engine, client[0], HZ / 2);
747                         if (err) {
748                                 pr_err("%s: first request did not start within time!\n",
749                                        engine->name);
750                                 err = -ETIME;
751                                 goto out;
752                         }
753
754                         for (i = 0; i < ARRAY_SIZE(client); i++) {
755                                 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
756                                         pr_debug("%s: %s request incomplete!\n",
757                                                  engine->name,
758                                                  error_repr(p->error[i]));
759
760                                 if (!i915_request_started(client[i])) {
761                                         pr_err("%s: %s request not started!\n",
762                                                engine->name,
763                                                error_repr(p->error[i]));
764                                         err = -ETIME;
765                                         goto out;
766                                 }
767
768                                 /* Kick the tasklet to process the error */
769                                 intel_engine_flush_submission(engine);
770                                 if (client[i]->fence.error != p->error[i]) {
771                                         pr_err("%s: %s request (%s) with wrong error code: %d\n",
772                                                engine->name,
773                                                error_repr(p->error[i]),
774                                                i915_request_completed(client[i]) ? "completed" : "running",
775                                                client[i]->fence.error);
776                                         err = -EINVAL;
777                                         goto out;
778                                 }
779                         }
780
781 out:
782                         for (i = 0; i < ARRAY_SIZE(client); i++)
783                                 if (client[i])
784                                         i915_request_put(client[i]);
785                         if (err) {
786                                 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
787                                        engine->name, p - phases,
788                                        p->error[0], p->error[1]);
789                                 break;
790                         }
791                 }
792
793                 st_engine_heartbeat_enable(engine);
794                 if (err) {
795                         intel_gt_set_wedged(gt);
796                         return err;
797                 }
798         }
799
800         return 0;
801 }
802
803 static int
804 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
805 {
806         u32 *cs;
807
808         cs = intel_ring_begin(rq, 10);
809         if (IS_ERR(cs))
810                 return PTR_ERR(cs);
811
812         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
813
814         *cs++ = MI_SEMAPHORE_WAIT |
815                 MI_SEMAPHORE_GLOBAL_GTT |
816                 MI_SEMAPHORE_POLL |
817                 MI_SEMAPHORE_SAD_NEQ_SDD;
818         *cs++ = 0;
819         *cs++ = i915_ggtt_offset(vma) + 4 * idx;
820         *cs++ = 0;
821
822         if (idx > 0) {
823                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
824                 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
825                 *cs++ = 0;
826                 *cs++ = 1;
827         } else {
828                 *cs++ = MI_NOOP;
829                 *cs++ = MI_NOOP;
830                 *cs++ = MI_NOOP;
831                 *cs++ = MI_NOOP;
832         }
833
834         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
835
836         intel_ring_advance(rq, cs);
837         return 0;
838 }
839
840 static struct i915_request *
841 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
842 {
843         struct intel_context *ce;
844         struct i915_request *rq;
845         int err;
846
847         ce = intel_context_create(engine);
848         if (IS_ERR(ce))
849                 return ERR_CAST(ce);
850
851         rq = intel_context_create_request(ce);
852         if (IS_ERR(rq))
853                 goto out_ce;
854
855         err = 0;
856         if (rq->engine->emit_init_breadcrumb)
857                 err = rq->engine->emit_init_breadcrumb(rq);
858         if (err == 0)
859                 err = emit_semaphore_chain(rq, vma, idx);
860         if (err == 0)
861                 i915_request_get(rq);
862         i915_request_add(rq);
863         if (err)
864                 rq = ERR_PTR(err);
865
866 out_ce:
867         intel_context_put(ce);
868         return rq;
869 }
870
871 static int
872 release_queue(struct intel_engine_cs *engine,
873               struct i915_vma *vma,
874               int idx, int prio)
875 {
876         struct i915_sched_attr attr = {
877                 .priority = prio,
878         };
879         struct i915_request *rq;
880         u32 *cs;
881
882         rq = intel_engine_create_kernel_request(engine);
883         if (IS_ERR(rq))
884                 return PTR_ERR(rq);
885
886         cs = intel_ring_begin(rq, 4);
887         if (IS_ERR(cs)) {
888                 i915_request_add(rq);
889                 return PTR_ERR(cs);
890         }
891
892         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
893         *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
894         *cs++ = 0;
895         *cs++ = 1;
896
897         intel_ring_advance(rq, cs);
898
899         i915_request_get(rq);
900         i915_request_add(rq);
901
902         local_bh_disable();
903         engine->schedule(rq, &attr);
904         local_bh_enable(); /* kick tasklet */
905
906         i915_request_put(rq);
907
908         return 0;
909 }
910
911 static int
912 slice_semaphore_queue(struct intel_engine_cs *outer,
913                       struct i915_vma *vma,
914                       int count)
915 {
916         struct intel_engine_cs *engine;
917         struct i915_request *head;
918         enum intel_engine_id id;
919         int err, i, n = 0;
920
921         head = semaphore_queue(outer, vma, n++);
922         if (IS_ERR(head))
923                 return PTR_ERR(head);
924
925         for_each_engine(engine, outer->gt, id) {
926                 if (!intel_engine_has_preemption(engine))
927                         continue;
928
929                 for (i = 0; i < count; i++) {
930                         struct i915_request *rq;
931
932                         rq = semaphore_queue(engine, vma, n++);
933                         if (IS_ERR(rq)) {
934                                 err = PTR_ERR(rq);
935                                 goto out;
936                         }
937
938                         i915_request_put(rq);
939                 }
940         }
941
942         err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
943         if (err)
944                 goto out;
945
946         if (i915_request_wait(head, 0,
947                               2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
948                 pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
949                        outer->name, count, n);
950                 GEM_TRACE_DUMP();
951                 intel_gt_set_wedged(outer->gt);
952                 err = -EIO;
953         }
954
955 out:
956         i915_request_put(head);
957         return err;
958 }
959
960 static int live_timeslice_preempt(void *arg)
961 {
962         struct intel_gt *gt = arg;
963         struct drm_i915_gem_object *obj;
964         struct intel_engine_cs *engine;
965         enum intel_engine_id id;
966         struct i915_vma *vma;
967         void *vaddr;
968         int err = 0;
969
970         /*
971          * If a request takes too long, we would like to give other users
972          * a fair go on the GPU. In particular, users may create batches
973          * that wait upon external input, where that input may even be
974          * supplied by another GPU job. To avoid blocking forever, we
975          * need to preempt the current task and replace it with another
976          * ready task.
977          */
978         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
979                 return 0;
980
981         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
982         if (IS_ERR(obj))
983                 return PTR_ERR(obj);
984
985         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
986         if (IS_ERR(vma)) {
987                 err = PTR_ERR(vma);
988                 goto err_obj;
989         }
990
991         vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
992         if (IS_ERR(vaddr)) {
993                 err = PTR_ERR(vaddr);
994                 goto err_obj;
995         }
996
997         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
998         if (err)
999                 goto err_map;
1000
1001         err = i915_vma_sync(vma);
1002         if (err)
1003                 goto err_pin;
1004
1005         for_each_engine(engine, gt, id) {
1006                 if (!intel_engine_has_preemption(engine))
1007                         continue;
1008
1009                 memset(vaddr, 0, PAGE_SIZE);
1010
1011                 st_engine_heartbeat_disable(engine);
1012                 err = slice_semaphore_queue(engine, vma, 5);
1013                 st_engine_heartbeat_enable(engine);
1014                 if (err)
1015                         goto err_pin;
1016
1017                 if (igt_flush_test(gt->i915)) {
1018                         err = -EIO;
1019                         goto err_pin;
1020                 }
1021         }
1022
1023 err_pin:
1024         i915_vma_unpin(vma);
1025 err_map:
1026         i915_gem_object_unpin_map(obj);
1027 err_obj:
1028         i915_gem_object_put(obj);
1029         return err;
1030 }
1031
1032 static struct i915_request *
1033 create_rewinder(struct intel_context *ce,
1034                 struct i915_request *wait,
1035                 void *slot, int idx)
1036 {
1037         const u32 offset =
1038                 i915_ggtt_offset(ce->engine->status_page.vma) +
1039                 offset_in_page(slot);
1040         struct i915_request *rq;
1041         u32 *cs;
1042         int err;
1043
1044         rq = intel_context_create_request(ce);
1045         if (IS_ERR(rq))
1046                 return rq;
1047
1048         if (wait) {
1049                 err = i915_request_await_dma_fence(rq, &wait->fence);
1050                 if (err)
1051                         goto err;
1052         }
1053
1054         cs = intel_ring_begin(rq, 14);
1055         if (IS_ERR(cs)) {
1056                 err = PTR_ERR(cs);
1057                 goto err;
1058         }
1059
1060         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1061         *cs++ = MI_NOOP;
1062
1063         *cs++ = MI_SEMAPHORE_WAIT |
1064                 MI_SEMAPHORE_GLOBAL_GTT |
1065                 MI_SEMAPHORE_POLL |
1066                 MI_SEMAPHORE_SAD_GTE_SDD;
1067         *cs++ = idx;
1068         *cs++ = offset;
1069         *cs++ = 0;
1070
1071         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1072         *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1073         *cs++ = offset + idx * sizeof(u32);
1074         *cs++ = 0;
1075
1076         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1077         *cs++ = offset;
1078         *cs++ = 0;
1079         *cs++ = idx + 1;
1080
1081         intel_ring_advance(rq, cs);
1082
1083         err = 0;
1084 err:
1085         i915_request_get(rq);
1086         i915_request_add(rq);
1087         if (err) {
1088                 i915_request_put(rq);
1089                 return ERR_PTR(err);
1090         }
1091
1092         return rq;
1093 }
1094
1095 static int live_timeslice_rewind(void *arg)
1096 {
1097         struct intel_gt *gt = arg;
1098         struct intel_engine_cs *engine;
1099         enum intel_engine_id id;
1100
1101         /*
1102          * The usual presumption on timeslice expiration is that we replace
1103          * the active context with another. However, given a chain of
1104          * dependencies we may end up with replacing the context with itself,
1105          * but only a few of those requests, forcing us to rewind the
1106          * RING_TAIL of the original request.
1107          */
1108         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1109                 return 0;
1110
1111         for_each_engine(engine, gt, id) {
1112                 enum { A1, A2, B1 };
1113                 enum { X = 1, Z, Y };
1114                 struct i915_request *rq[3] = {};
1115                 struct intel_context *ce;
1116                 unsigned long timeslice;
1117                 int i, err = 0;
1118                 u32 *slot;
1119
1120                 if (!intel_engine_has_timeslices(engine))
1121                         continue;
1122
1123                 /*
1124                  * A:rq1 -- semaphore wait, timestamp X
1125                  * A:rq2 -- write timestamp Y
1126                  *
1127                  * B:rq1 [await A:rq1] -- write timestamp Z
1128                  *
1129                  * Force timeslice, release semaphore.
1130                  *
1131                  * Expect execution/evaluation order XZY
1132                  */
1133
1134                 st_engine_heartbeat_disable(engine);
1135                 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1136
1137                 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1138
1139                 ce = intel_context_create(engine);
1140                 if (IS_ERR(ce)) {
1141                         err = PTR_ERR(ce);
1142                         goto err;
1143                 }
1144
1145                 rq[A1] = create_rewinder(ce, NULL, slot, X);
1146                 if (IS_ERR(rq[A1])) {
1147                         intel_context_put(ce);
1148                         goto err;
1149                 }
1150
1151                 rq[A2] = create_rewinder(ce, NULL, slot, Y);
1152                 intel_context_put(ce);
1153                 if (IS_ERR(rq[A2]))
1154                         goto err;
1155
1156                 err = wait_for_submit(engine, rq[A2], HZ / 2);
1157                 if (err) {
1158                         pr_err("%s: failed to submit first context\n",
1159                                engine->name);
1160                         goto err;
1161                 }
1162
1163                 ce = intel_context_create(engine);
1164                 if (IS_ERR(ce)) {
1165                         err = PTR_ERR(ce);
1166                         goto err;
1167                 }
1168
1169                 rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1170                 intel_context_put(ce);
1171                 if (IS_ERR(rq[2]))
1172                         goto err;
1173
1174                 err = wait_for_submit(engine, rq[B1], HZ / 2);
1175                 if (err) {
1176                         pr_err("%s: failed to submit second context\n",
1177                                engine->name);
1178                         goto err;
1179                 }
1180
1181                 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1182                 ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1183                 while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1184                         /* Wait for the timeslice to kick in */
1185                         del_timer(&engine->execlists.timer);
1186                         tasklet_hi_schedule(&engine->execlists.tasklet);
1187                         intel_engine_flush_submission(engine);
1188                 }
1189                 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1190                 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1191                 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1192                 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1193
1194                 /* Release the hounds! */
1195                 slot[0] = 1;
1196                 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1197
1198                 for (i = 1; i <= 3; i++) {
1199                         unsigned long timeout = jiffies + HZ / 2;
1200
1201                         while (!READ_ONCE(slot[i]) &&
1202                                time_before(jiffies, timeout))
1203                                 ;
1204
1205                         if (!time_before(jiffies, timeout)) {
1206                                 pr_err("%s: rq[%d] timed out\n",
1207                                        engine->name, i - 1);
1208                                 err = -ETIME;
1209                                 goto err;
1210                         }
1211
1212                         pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1213                 }
1214
1215                 /* XZY: XZ < XY */
1216                 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1217                         pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1218                                engine->name,
1219                                slot[Z] - slot[X],
1220                                slot[Y] - slot[X]);
1221                         err = -EINVAL;
1222                 }
1223
1224 err:
1225                 memset32(&slot[0], -1, 4);
1226                 wmb();
1227
1228                 engine->props.timeslice_duration_ms = timeslice;
1229                 st_engine_heartbeat_enable(engine);
1230                 for (i = 0; i < 3; i++)
1231                         i915_request_put(rq[i]);
1232                 if (igt_flush_test(gt->i915))
1233                         err = -EIO;
1234                 if (err)
1235                         return err;
1236         }
1237
1238         return 0;
1239 }
1240
1241 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1242 {
1243         struct i915_request *rq;
1244
1245         rq = intel_engine_create_kernel_request(engine);
1246         if (IS_ERR(rq))
1247                 return rq;
1248
1249         i915_request_get(rq);
1250         i915_request_add(rq);
1251
1252         return rq;
1253 }
1254
1255 static long slice_timeout(struct intel_engine_cs *engine)
1256 {
1257         long timeout;
1258
1259         /* Enough time for a timeslice to kick in, and kick out */
1260         timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1261
1262         /* Enough time for the nop request to complete */
1263         timeout += HZ / 5;
1264
1265         return timeout + 1;
1266 }
1267
1268 static int live_timeslice_queue(void *arg)
1269 {
1270         struct intel_gt *gt = arg;
1271         struct drm_i915_gem_object *obj;
1272         struct intel_engine_cs *engine;
1273         enum intel_engine_id id;
1274         struct i915_vma *vma;
1275         void *vaddr;
1276         int err = 0;
1277
1278         /*
1279          * Make sure that even if ELSP[0] and ELSP[1] are filled with
1280          * timeslicing between them disabled, we *do* enable timeslicing
1281          * if the queue demands it. (Normally, we do not submit if
1282          * ELSP[1] is already occupied, so must rely on timeslicing to
1283          * eject ELSP[0] in favour of the queue.)
1284          */
1285         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1286                 return 0;
1287
1288         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1289         if (IS_ERR(obj))
1290                 return PTR_ERR(obj);
1291
1292         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1293         if (IS_ERR(vma)) {
1294                 err = PTR_ERR(vma);
1295                 goto err_obj;
1296         }
1297
1298         vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1299         if (IS_ERR(vaddr)) {
1300                 err = PTR_ERR(vaddr);
1301                 goto err_obj;
1302         }
1303
1304         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1305         if (err)
1306                 goto err_map;
1307
1308         err = i915_vma_sync(vma);
1309         if (err)
1310                 goto err_pin;
1311
1312         for_each_engine(engine, gt, id) {
1313                 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1314                 struct i915_request *rq, *nop;
1315
1316                 if (!intel_engine_has_preemption(engine))
1317                         continue;
1318
1319                 st_engine_heartbeat_disable(engine);
1320                 memset(vaddr, 0, PAGE_SIZE);
1321
1322                 /* ELSP[0]: semaphore wait */
1323                 rq = semaphore_queue(engine, vma, 0);
1324                 if (IS_ERR(rq)) {
1325                         err = PTR_ERR(rq);
1326                         goto err_heartbeat;
1327                 }
1328                 engine->schedule(rq, &attr);
1329                 err = wait_for_submit(engine, rq, HZ / 2);
1330                 if (err) {
1331                         pr_err("%s: Timed out trying to submit semaphores\n",
1332                                engine->name);
1333                         goto err_rq;
1334                 }
1335
1336                 /* ELSP[1]: nop request */
1337                 nop = nop_request(engine);
1338                 if (IS_ERR(nop)) {
1339                         err = PTR_ERR(nop);
1340                         goto err_rq;
1341                 }
1342                 err = wait_for_submit(engine, nop, HZ / 2);
1343                 i915_request_put(nop);
1344                 if (err) {
1345                         pr_err("%s: Timed out trying to submit nop\n",
1346                                engine->name);
1347                         goto err_rq;
1348                 }
1349
1350                 GEM_BUG_ON(i915_request_completed(rq));
1351                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1352
1353                 /* Queue: semaphore signal, matching priority as semaphore */
1354                 err = release_queue(engine, vma, 1, effective_prio(rq));
1355                 if (err)
1356                         goto err_rq;
1357
1358                 /* Wait until we ack the release_queue and start timeslicing */
1359                 do {
1360                         cond_resched();
1361                         intel_engine_flush_submission(engine);
1362                 } while (READ_ONCE(engine->execlists.pending[0]));
1363
1364                 /* Timeslice every jiffy, so within 2 we should signal */
1365                 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1366                         struct drm_printer p =
1367                                 drm_info_printer(gt->i915->drm.dev);
1368
1369                         pr_err("%s: Failed to timeslice into queue\n",
1370                                engine->name);
1371                         intel_engine_dump(engine, &p,
1372                                           "%s\n", engine->name);
1373
1374                         memset(vaddr, 0xff, PAGE_SIZE);
1375                         err = -EIO;
1376                 }
1377 err_rq:
1378                 i915_request_put(rq);
1379 err_heartbeat:
1380                 st_engine_heartbeat_enable(engine);
1381                 if (err)
1382                         break;
1383         }
1384
1385 err_pin:
1386         i915_vma_unpin(vma);
1387 err_map:
1388         i915_gem_object_unpin_map(obj);
1389 err_obj:
1390         i915_gem_object_put(obj);
1391         return err;
1392 }
1393
1394 static int live_timeslice_nopreempt(void *arg)
1395 {
1396         struct intel_gt *gt = arg;
1397         struct intel_engine_cs *engine;
1398         enum intel_engine_id id;
1399         struct igt_spinner spin;
1400         int err = 0;
1401
1402         /*
1403          * We should not timeslice into a request that is marked with
1404          * I915_REQUEST_NOPREEMPT.
1405          */
1406         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1407                 return 0;
1408
1409         if (igt_spinner_init(&spin, gt))
1410                 return -ENOMEM;
1411
1412         for_each_engine(engine, gt, id) {
1413                 struct intel_context *ce;
1414                 struct i915_request *rq;
1415                 unsigned long timeslice;
1416
1417                 if (!intel_engine_has_preemption(engine))
1418                         continue;
1419
1420                 ce = intel_context_create(engine);
1421                 if (IS_ERR(ce)) {
1422                         err = PTR_ERR(ce);
1423                         break;
1424                 }
1425
1426                 st_engine_heartbeat_disable(engine);
1427                 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1428
1429                 /* Create an unpreemptible spinner */
1430
1431                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1432                 intel_context_put(ce);
1433                 if (IS_ERR(rq)) {
1434                         err = PTR_ERR(rq);
1435                         goto out_heartbeat;
1436                 }
1437
1438                 i915_request_get(rq);
1439                 i915_request_add(rq);
1440
1441                 if (!igt_wait_for_spinner(&spin, rq)) {
1442                         i915_request_put(rq);
1443                         err = -ETIME;
1444                         goto out_spin;
1445                 }
1446
1447                 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1448                 i915_request_put(rq);
1449
1450                 /* Followed by a maximum priority barrier (heartbeat) */
1451
1452                 ce = intel_context_create(engine);
1453                 if (IS_ERR(ce)) {
1454                         err = PTR_ERR(ce);
1455                         goto out_spin;
1456                 }
1457
1458                 rq = intel_context_create_request(ce);
1459                 intel_context_put(ce);
1460                 if (IS_ERR(rq)) {
1461                         err = PTR_ERR(rq);
1462                         goto out_spin;
1463                 }
1464
1465                 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1466                 i915_request_get(rq);
1467                 i915_request_add(rq);
1468
1469                 /*
1470                  * Wait until the barrier is in ELSP, and we know timeslicing
1471                  * will have been activated.
1472                  */
1473                 if (wait_for_submit(engine, rq, HZ / 2)) {
1474                         i915_request_put(rq);
1475                         err = -ETIME;
1476                         goto out_spin;
1477                 }
1478
1479                 /*
1480                  * Since the ELSP[0] request is unpreemptible, it should not
1481                  * allow the maximum priority barrier through. Wait long
1482                  * enough to see if it is timesliced in by mistake.
1483                  */
1484                 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1485                         pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1486                                engine->name);
1487                         err = -EINVAL;
1488                 }
1489                 i915_request_put(rq);
1490
1491 out_spin:
1492                 igt_spinner_end(&spin);
1493 out_heartbeat:
1494                 xchg(&engine->props.timeslice_duration_ms, timeslice);
1495                 st_engine_heartbeat_enable(engine);
1496                 if (err)
1497                         break;
1498
1499                 if (igt_flush_test(gt->i915)) {
1500                         err = -EIO;
1501                         break;
1502                 }
1503         }
1504
1505         igt_spinner_fini(&spin);
1506         return err;
1507 }
1508
1509 static int live_busywait_preempt(void *arg)
1510 {
1511         struct intel_gt *gt = arg;
1512         struct i915_gem_context *ctx_hi, *ctx_lo;
1513         struct intel_engine_cs *engine;
1514         struct drm_i915_gem_object *obj;
1515         struct i915_vma *vma;
1516         enum intel_engine_id id;
1517         int err = -ENOMEM;
1518         u32 *map;
1519
1520         /*
1521          * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1522          * preempt the busywaits used to synchronise between rings.
1523          */
1524
1525         ctx_hi = kernel_context(gt->i915);
1526         if (!ctx_hi)
1527                 return -ENOMEM;
1528         ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1529
1530         ctx_lo = kernel_context(gt->i915);
1531         if (!ctx_lo)
1532                 goto err_ctx_hi;
1533         ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1534
1535         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1536         if (IS_ERR(obj)) {
1537                 err = PTR_ERR(obj);
1538                 goto err_ctx_lo;
1539         }
1540
1541         map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1542         if (IS_ERR(map)) {
1543                 err = PTR_ERR(map);
1544                 goto err_obj;
1545         }
1546
1547         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1548         if (IS_ERR(vma)) {
1549                 err = PTR_ERR(vma);
1550                 goto err_map;
1551         }
1552
1553         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1554         if (err)
1555                 goto err_map;
1556
1557         err = i915_vma_sync(vma);
1558         if (err)
1559                 goto err_vma;
1560
1561         for_each_engine(engine, gt, id) {
1562                 struct i915_request *lo, *hi;
1563                 struct igt_live_test t;
1564                 u32 *cs;
1565
1566                 if (!intel_engine_has_preemption(engine))
1567                         continue;
1568
1569                 if (!intel_engine_can_store_dword(engine))
1570                         continue;
1571
1572                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1573                         err = -EIO;
1574                         goto err_vma;
1575                 }
1576
1577                 /*
1578                  * We create two requests. The low priority request
1579                  * busywaits on a semaphore (inside the ringbuffer where
1580                  * is should be preemptible) and the high priority requests
1581                  * uses a MI_STORE_DWORD_IMM to update the semaphore value
1582                  * allowing the first request to complete. If preemption
1583                  * fails, we hang instead.
1584                  */
1585
1586                 lo = igt_request_alloc(ctx_lo, engine);
1587                 if (IS_ERR(lo)) {
1588                         err = PTR_ERR(lo);
1589                         goto err_vma;
1590                 }
1591
1592                 cs = intel_ring_begin(lo, 8);
1593                 if (IS_ERR(cs)) {
1594                         err = PTR_ERR(cs);
1595                         i915_request_add(lo);
1596                         goto err_vma;
1597                 }
1598
1599                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1600                 *cs++ = i915_ggtt_offset(vma);
1601                 *cs++ = 0;
1602                 *cs++ = 1;
1603
1604                 /* XXX Do we need a flush + invalidate here? */
1605
1606                 *cs++ = MI_SEMAPHORE_WAIT |
1607                         MI_SEMAPHORE_GLOBAL_GTT |
1608                         MI_SEMAPHORE_POLL |
1609                         MI_SEMAPHORE_SAD_EQ_SDD;
1610                 *cs++ = 0;
1611                 *cs++ = i915_ggtt_offset(vma);
1612                 *cs++ = 0;
1613
1614                 intel_ring_advance(lo, cs);
1615
1616                 i915_request_get(lo);
1617                 i915_request_add(lo);
1618
1619                 if (wait_for(READ_ONCE(*map), 10)) {
1620                         i915_request_put(lo);
1621                         err = -ETIMEDOUT;
1622                         goto err_vma;
1623                 }
1624
1625                 /* Low priority request should be busywaiting now */
1626                 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1627                         i915_request_put(lo);
1628                         pr_err("%s: Busywaiting request did not!\n",
1629                                engine->name);
1630                         err = -EIO;
1631                         goto err_vma;
1632                 }
1633
1634                 hi = igt_request_alloc(ctx_hi, engine);
1635                 if (IS_ERR(hi)) {
1636                         err = PTR_ERR(hi);
1637                         i915_request_put(lo);
1638                         goto err_vma;
1639                 }
1640
1641                 cs = intel_ring_begin(hi, 4);
1642                 if (IS_ERR(cs)) {
1643                         err = PTR_ERR(cs);
1644                         i915_request_add(hi);
1645                         i915_request_put(lo);
1646                         goto err_vma;
1647                 }
1648
1649                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1650                 *cs++ = i915_ggtt_offset(vma);
1651                 *cs++ = 0;
1652                 *cs++ = 0;
1653
1654                 intel_ring_advance(hi, cs);
1655                 i915_request_add(hi);
1656
1657                 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1658                         struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1659
1660                         pr_err("%s: Failed to preempt semaphore busywait!\n",
1661                                engine->name);
1662
1663                         intel_engine_dump(engine, &p, "%s\n", engine->name);
1664                         GEM_TRACE_DUMP();
1665
1666                         i915_request_put(lo);
1667                         intel_gt_set_wedged(gt);
1668                         err = -EIO;
1669                         goto err_vma;
1670                 }
1671                 GEM_BUG_ON(READ_ONCE(*map));
1672                 i915_request_put(lo);
1673
1674                 if (igt_live_test_end(&t)) {
1675                         err = -EIO;
1676                         goto err_vma;
1677                 }
1678         }
1679
1680         err = 0;
1681 err_vma:
1682         i915_vma_unpin(vma);
1683 err_map:
1684         i915_gem_object_unpin_map(obj);
1685 err_obj:
1686         i915_gem_object_put(obj);
1687 err_ctx_lo:
1688         kernel_context_close(ctx_lo);
1689 err_ctx_hi:
1690         kernel_context_close(ctx_hi);
1691         return err;
1692 }
1693
1694 static struct i915_request *
1695 spinner_create_request(struct igt_spinner *spin,
1696                        struct i915_gem_context *ctx,
1697                        struct intel_engine_cs *engine,
1698                        u32 arb)
1699 {
1700         struct intel_context *ce;
1701         struct i915_request *rq;
1702
1703         ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1704         if (IS_ERR(ce))
1705                 return ERR_CAST(ce);
1706
1707         rq = igt_spinner_create_request(spin, ce, arb);
1708         intel_context_put(ce);
1709         return rq;
1710 }
1711
1712 static int live_preempt(void *arg)
1713 {
1714         struct intel_gt *gt = arg;
1715         struct i915_gem_context *ctx_hi, *ctx_lo;
1716         struct igt_spinner spin_hi, spin_lo;
1717         struct intel_engine_cs *engine;
1718         enum intel_engine_id id;
1719         int err = -ENOMEM;
1720
1721         if (igt_spinner_init(&spin_hi, gt))
1722                 return -ENOMEM;
1723
1724         if (igt_spinner_init(&spin_lo, gt))
1725                 goto err_spin_hi;
1726
1727         ctx_hi = kernel_context(gt->i915);
1728         if (!ctx_hi)
1729                 goto err_spin_lo;
1730         ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1731
1732         ctx_lo = kernel_context(gt->i915);
1733         if (!ctx_lo)
1734                 goto err_ctx_hi;
1735         ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1736
1737         for_each_engine(engine, gt, id) {
1738                 struct igt_live_test t;
1739                 struct i915_request *rq;
1740
1741                 if (!intel_engine_has_preemption(engine))
1742                         continue;
1743
1744                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1745                         err = -EIO;
1746                         goto err_ctx_lo;
1747                 }
1748
1749                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1750                                             MI_ARB_CHECK);
1751                 if (IS_ERR(rq)) {
1752                         err = PTR_ERR(rq);
1753                         goto err_ctx_lo;
1754                 }
1755
1756                 i915_request_add(rq);
1757                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1758                         GEM_TRACE("lo spinner failed to start\n");
1759                         GEM_TRACE_DUMP();
1760                         intel_gt_set_wedged(gt);
1761                         err = -EIO;
1762                         goto err_ctx_lo;
1763                 }
1764
1765                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1766                                             MI_ARB_CHECK);
1767                 if (IS_ERR(rq)) {
1768                         igt_spinner_end(&spin_lo);
1769                         err = PTR_ERR(rq);
1770                         goto err_ctx_lo;
1771                 }
1772
1773                 i915_request_add(rq);
1774                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1775                         GEM_TRACE("hi spinner failed to start\n");
1776                         GEM_TRACE_DUMP();
1777                         intel_gt_set_wedged(gt);
1778                         err = -EIO;
1779                         goto err_ctx_lo;
1780                 }
1781
1782                 igt_spinner_end(&spin_hi);
1783                 igt_spinner_end(&spin_lo);
1784
1785                 if (igt_live_test_end(&t)) {
1786                         err = -EIO;
1787                         goto err_ctx_lo;
1788                 }
1789         }
1790
1791         err = 0;
1792 err_ctx_lo:
1793         kernel_context_close(ctx_lo);
1794 err_ctx_hi:
1795         kernel_context_close(ctx_hi);
1796 err_spin_lo:
1797         igt_spinner_fini(&spin_lo);
1798 err_spin_hi:
1799         igt_spinner_fini(&spin_hi);
1800         return err;
1801 }
1802
1803 static int live_late_preempt(void *arg)
1804 {
1805         struct intel_gt *gt = arg;
1806         struct i915_gem_context *ctx_hi, *ctx_lo;
1807         struct igt_spinner spin_hi, spin_lo;
1808         struct intel_engine_cs *engine;
1809         struct i915_sched_attr attr = {};
1810         enum intel_engine_id id;
1811         int err = -ENOMEM;
1812
1813         if (igt_spinner_init(&spin_hi, gt))
1814                 return -ENOMEM;
1815
1816         if (igt_spinner_init(&spin_lo, gt))
1817                 goto err_spin_hi;
1818
1819         ctx_hi = kernel_context(gt->i915);
1820         if (!ctx_hi)
1821                 goto err_spin_lo;
1822
1823         ctx_lo = kernel_context(gt->i915);
1824         if (!ctx_lo)
1825                 goto err_ctx_hi;
1826
1827         /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1828         ctx_lo->sched.priority = 1;
1829
1830         for_each_engine(engine, gt, id) {
1831                 struct igt_live_test t;
1832                 struct i915_request *rq;
1833
1834                 if (!intel_engine_has_preemption(engine))
1835                         continue;
1836
1837                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1838                         err = -EIO;
1839                         goto err_ctx_lo;
1840                 }
1841
1842                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1843                                             MI_ARB_CHECK);
1844                 if (IS_ERR(rq)) {
1845                         err = PTR_ERR(rq);
1846                         goto err_ctx_lo;
1847                 }
1848
1849                 i915_request_add(rq);
1850                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1851                         pr_err("First context failed to start\n");
1852                         goto err_wedged;
1853                 }
1854
1855                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1856                                             MI_NOOP);
1857                 if (IS_ERR(rq)) {
1858                         igt_spinner_end(&spin_lo);
1859                         err = PTR_ERR(rq);
1860                         goto err_ctx_lo;
1861                 }
1862
1863                 i915_request_add(rq);
1864                 if (igt_wait_for_spinner(&spin_hi, rq)) {
1865                         pr_err("Second context overtook first?\n");
1866                         goto err_wedged;
1867                 }
1868
1869                 attr.priority = I915_PRIORITY_MAX;
1870                 engine->schedule(rq, &attr);
1871
1872                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1873                         pr_err("High priority context failed to preempt the low priority context\n");
1874                         GEM_TRACE_DUMP();
1875                         goto err_wedged;
1876                 }
1877
1878                 igt_spinner_end(&spin_hi);
1879                 igt_spinner_end(&spin_lo);
1880
1881                 if (igt_live_test_end(&t)) {
1882                         err = -EIO;
1883                         goto err_ctx_lo;
1884                 }
1885         }
1886
1887         err = 0;
1888 err_ctx_lo:
1889         kernel_context_close(ctx_lo);
1890 err_ctx_hi:
1891         kernel_context_close(ctx_hi);
1892 err_spin_lo:
1893         igt_spinner_fini(&spin_lo);
1894 err_spin_hi:
1895         igt_spinner_fini(&spin_hi);
1896         return err;
1897
1898 err_wedged:
1899         igt_spinner_end(&spin_hi);
1900         igt_spinner_end(&spin_lo);
1901         intel_gt_set_wedged(gt);
1902         err = -EIO;
1903         goto err_ctx_lo;
1904 }
1905
1906 struct preempt_client {
1907         struct igt_spinner spin;
1908         struct i915_gem_context *ctx;
1909 };
1910
1911 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1912 {
1913         c->ctx = kernel_context(gt->i915);
1914         if (!c->ctx)
1915                 return -ENOMEM;
1916
1917         if (igt_spinner_init(&c->spin, gt))
1918                 goto err_ctx;
1919
1920         return 0;
1921
1922 err_ctx:
1923         kernel_context_close(c->ctx);
1924         return -ENOMEM;
1925 }
1926
1927 static void preempt_client_fini(struct preempt_client *c)
1928 {
1929         igt_spinner_fini(&c->spin);
1930         kernel_context_close(c->ctx);
1931 }
1932
1933 static int live_nopreempt(void *arg)
1934 {
1935         struct intel_gt *gt = arg;
1936         struct intel_engine_cs *engine;
1937         struct preempt_client a, b;
1938         enum intel_engine_id id;
1939         int err = -ENOMEM;
1940
1941         /*
1942          * Verify that we can disable preemption for an individual request
1943          * that may be being observed and not want to be interrupted.
1944          */
1945
1946         if (preempt_client_init(gt, &a))
1947                 return -ENOMEM;
1948         if (preempt_client_init(gt, &b))
1949                 goto err_client_a;
1950         b.ctx->sched.priority = I915_PRIORITY_MAX;
1951
1952         for_each_engine(engine, gt, id) {
1953                 struct i915_request *rq_a, *rq_b;
1954
1955                 if (!intel_engine_has_preemption(engine))
1956                         continue;
1957
1958                 engine->execlists.preempt_hang.count = 0;
1959
1960                 rq_a = spinner_create_request(&a.spin,
1961                                               a.ctx, engine,
1962                                               MI_ARB_CHECK);
1963                 if (IS_ERR(rq_a)) {
1964                         err = PTR_ERR(rq_a);
1965                         goto err_client_b;
1966                 }
1967
1968                 /* Low priority client, but unpreemptable! */
1969                 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1970
1971                 i915_request_add(rq_a);
1972                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1973                         pr_err("First client failed to start\n");
1974                         goto err_wedged;
1975                 }
1976
1977                 rq_b = spinner_create_request(&b.spin,
1978                                               b.ctx, engine,
1979                                               MI_ARB_CHECK);
1980                 if (IS_ERR(rq_b)) {
1981                         err = PTR_ERR(rq_b);
1982                         goto err_client_b;
1983                 }
1984
1985                 i915_request_add(rq_b);
1986
1987                 /* B is much more important than A! (But A is unpreemptable.) */
1988                 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1989
1990                 /* Wait long enough for preemption and timeslicing */
1991                 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1992                         pr_err("Second client started too early!\n");
1993                         goto err_wedged;
1994                 }
1995
1996                 igt_spinner_end(&a.spin);
1997
1998                 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1999                         pr_err("Second client failed to start\n");
2000                         goto err_wedged;
2001                 }
2002
2003                 igt_spinner_end(&b.spin);
2004
2005                 if (engine->execlists.preempt_hang.count) {
2006                         pr_err("Preemption recorded x%d; should have been suppressed!\n",
2007                                engine->execlists.preempt_hang.count);
2008                         err = -EINVAL;
2009                         goto err_wedged;
2010                 }
2011
2012                 if (igt_flush_test(gt->i915))
2013                         goto err_wedged;
2014         }
2015
2016         err = 0;
2017 err_client_b:
2018         preempt_client_fini(&b);
2019 err_client_a:
2020         preempt_client_fini(&a);
2021         return err;
2022
2023 err_wedged:
2024         igt_spinner_end(&b.spin);
2025         igt_spinner_end(&a.spin);
2026         intel_gt_set_wedged(gt);
2027         err = -EIO;
2028         goto err_client_b;
2029 }
2030
2031 struct live_preempt_cancel {
2032         struct intel_engine_cs *engine;
2033         struct preempt_client a, b;
2034 };
2035
2036 static int __cancel_active0(struct live_preempt_cancel *arg)
2037 {
2038         struct i915_request *rq;
2039         struct igt_live_test t;
2040         int err;
2041
2042         /* Preempt cancel of ELSP0 */
2043         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2044         if (igt_live_test_begin(&t, arg->engine->i915,
2045                                 __func__, arg->engine->name))
2046                 return -EIO;
2047
2048         rq = spinner_create_request(&arg->a.spin,
2049                                     arg->a.ctx, arg->engine,
2050                                     MI_ARB_CHECK);
2051         if (IS_ERR(rq))
2052                 return PTR_ERR(rq);
2053
2054         clear_bit(CONTEXT_BANNED, &rq->context->flags);
2055         i915_request_get(rq);
2056         i915_request_add(rq);
2057         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2058                 err = -EIO;
2059                 goto out;
2060         }
2061
2062         intel_context_set_banned(rq->context);
2063         err = intel_engine_pulse(arg->engine);
2064         if (err)
2065                 goto out;
2066
2067         err = wait_for_reset(arg->engine, rq, HZ / 2);
2068         if (err) {
2069                 pr_err("Cancelled inflight0 request did not reset\n");
2070                 goto out;
2071         }
2072
2073 out:
2074         i915_request_put(rq);
2075         if (igt_live_test_end(&t))
2076                 err = -EIO;
2077         return err;
2078 }
2079
2080 static int __cancel_active1(struct live_preempt_cancel *arg)
2081 {
2082         struct i915_request *rq[2] = {};
2083         struct igt_live_test t;
2084         int err;
2085
2086         /* Preempt cancel of ELSP1 */
2087         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2088         if (igt_live_test_begin(&t, arg->engine->i915,
2089                                 __func__, arg->engine->name))
2090                 return -EIO;
2091
2092         rq[0] = spinner_create_request(&arg->a.spin,
2093                                        arg->a.ctx, arg->engine,
2094                                        MI_NOOP); /* no preemption */
2095         if (IS_ERR(rq[0]))
2096                 return PTR_ERR(rq[0]);
2097
2098         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2099         i915_request_get(rq[0]);
2100         i915_request_add(rq[0]);
2101         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2102                 err = -EIO;
2103                 goto out;
2104         }
2105
2106         rq[1] = spinner_create_request(&arg->b.spin,
2107                                        arg->b.ctx, arg->engine,
2108                                        MI_ARB_CHECK);
2109         if (IS_ERR(rq[1])) {
2110                 err = PTR_ERR(rq[1]);
2111                 goto out;
2112         }
2113
2114         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2115         i915_request_get(rq[1]);
2116         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2117         i915_request_add(rq[1]);
2118         if (err)
2119                 goto out;
2120
2121         intel_context_set_banned(rq[1]->context);
2122         err = intel_engine_pulse(arg->engine);
2123         if (err)
2124                 goto out;
2125
2126         igt_spinner_end(&arg->a.spin);
2127         err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2128         if (err)
2129                 goto out;
2130
2131         if (rq[0]->fence.error != 0) {
2132                 pr_err("Normal inflight0 request did not complete\n");
2133                 err = -EINVAL;
2134                 goto out;
2135         }
2136
2137         if (rq[1]->fence.error != -EIO) {
2138                 pr_err("Cancelled inflight1 request did not report -EIO\n");
2139                 err = -EINVAL;
2140                 goto out;
2141         }
2142
2143 out:
2144         i915_request_put(rq[1]);
2145         i915_request_put(rq[0]);
2146         if (igt_live_test_end(&t))
2147                 err = -EIO;
2148         return err;
2149 }
2150
2151 static int __cancel_queued(struct live_preempt_cancel *arg)
2152 {
2153         struct i915_request *rq[3] = {};
2154         struct igt_live_test t;
2155         int err;
2156
2157         /* Full ELSP and one in the wings */
2158         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2159         if (igt_live_test_begin(&t, arg->engine->i915,
2160                                 __func__, arg->engine->name))
2161                 return -EIO;
2162
2163         rq[0] = spinner_create_request(&arg->a.spin,
2164                                        arg->a.ctx, arg->engine,
2165                                        MI_ARB_CHECK);
2166         if (IS_ERR(rq[0]))
2167                 return PTR_ERR(rq[0]);
2168
2169         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2170         i915_request_get(rq[0]);
2171         i915_request_add(rq[0]);
2172         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2173                 err = -EIO;
2174                 goto out;
2175         }
2176
2177         rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2178         if (IS_ERR(rq[1])) {
2179                 err = PTR_ERR(rq[1]);
2180                 goto out;
2181         }
2182
2183         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2184         i915_request_get(rq[1]);
2185         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2186         i915_request_add(rq[1]);
2187         if (err)
2188                 goto out;
2189
2190         rq[2] = spinner_create_request(&arg->b.spin,
2191                                        arg->a.ctx, arg->engine,
2192                                        MI_ARB_CHECK);
2193         if (IS_ERR(rq[2])) {
2194                 err = PTR_ERR(rq[2]);
2195                 goto out;
2196         }
2197
2198         i915_request_get(rq[2]);
2199         err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2200         i915_request_add(rq[2]);
2201         if (err)
2202                 goto out;
2203
2204         intel_context_set_banned(rq[2]->context);
2205         err = intel_engine_pulse(arg->engine);
2206         if (err)
2207                 goto out;
2208
2209         err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2210         if (err)
2211                 goto out;
2212
2213         if (rq[0]->fence.error != -EIO) {
2214                 pr_err("Cancelled inflight0 request did not report -EIO\n");
2215                 err = -EINVAL;
2216                 goto out;
2217         }
2218
2219         if (rq[1]->fence.error != 0) {
2220                 pr_err("Normal inflight1 request did not complete\n");
2221                 err = -EINVAL;
2222                 goto out;
2223         }
2224
2225         if (rq[2]->fence.error != -EIO) {
2226                 pr_err("Cancelled queued request did not report -EIO\n");
2227                 err = -EINVAL;
2228                 goto out;
2229         }
2230
2231 out:
2232         i915_request_put(rq[2]);
2233         i915_request_put(rq[1]);
2234         i915_request_put(rq[0]);
2235         if (igt_live_test_end(&t))
2236                 err = -EIO;
2237         return err;
2238 }
2239
2240 static int __cancel_hostile(struct live_preempt_cancel *arg)
2241 {
2242         struct i915_request *rq;
2243         int err;
2244
2245         /* Preempt cancel non-preemptible spinner in ELSP0 */
2246         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2247                 return 0;
2248
2249         if (!intel_has_reset_engine(arg->engine->gt))
2250                 return 0;
2251
2252         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2253         rq = spinner_create_request(&arg->a.spin,
2254                                     arg->a.ctx, arg->engine,
2255                                     MI_NOOP); /* preemption disabled */
2256         if (IS_ERR(rq))
2257                 return PTR_ERR(rq);
2258
2259         clear_bit(CONTEXT_BANNED, &rq->context->flags);
2260         i915_request_get(rq);
2261         i915_request_add(rq);
2262         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2263                 err = -EIO;
2264                 goto out;
2265         }
2266
2267         intel_context_set_banned(rq->context);
2268         err = intel_engine_pulse(arg->engine); /* force reset */
2269         if (err)
2270                 goto out;
2271
2272         err = wait_for_reset(arg->engine, rq, HZ / 2);
2273         if (err) {
2274                 pr_err("Cancelled inflight0 request did not reset\n");
2275                 goto out;
2276         }
2277
2278 out:
2279         i915_request_put(rq);
2280         if (igt_flush_test(arg->engine->i915))
2281                 err = -EIO;
2282         return err;
2283 }
2284
2285 static void force_reset_timeout(struct intel_engine_cs *engine)
2286 {
2287         engine->reset_timeout.probability = 999;
2288         atomic_set(&engine->reset_timeout.times, -1);
2289 }
2290
2291 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2292 {
2293         memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2294 }
2295
2296 static int __cancel_fail(struct live_preempt_cancel *arg)
2297 {
2298         struct intel_engine_cs *engine = arg->engine;
2299         struct i915_request *rq;
2300         int err;
2301
2302         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2303                 return 0;
2304
2305         if (!intel_has_reset_engine(engine->gt))
2306                 return 0;
2307
2308         GEM_TRACE("%s(%s)\n", __func__, engine->name);
2309         rq = spinner_create_request(&arg->a.spin,
2310                                     arg->a.ctx, engine,
2311                                     MI_NOOP); /* preemption disabled */
2312         if (IS_ERR(rq))
2313                 return PTR_ERR(rq);
2314
2315         clear_bit(CONTEXT_BANNED, &rq->context->flags);
2316         i915_request_get(rq);
2317         i915_request_add(rq);
2318         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2319                 err = -EIO;
2320                 goto out;
2321         }
2322
2323         intel_context_set_banned(rq->context);
2324
2325         err = intel_engine_pulse(engine);
2326         if (err)
2327                 goto out;
2328
2329         force_reset_timeout(engine);
2330
2331         /* force preempt reset [failure] */
2332         while (!engine->execlists.pending[0])
2333                 intel_engine_flush_submission(engine);
2334         del_timer_sync(&engine->execlists.preempt);
2335         intel_engine_flush_submission(engine);
2336
2337         cancel_reset_timeout(engine);
2338
2339         /* after failure, require heartbeats to reset device */
2340         intel_engine_set_heartbeat(engine, 1);
2341         err = wait_for_reset(engine, rq, HZ / 2);
2342         intel_engine_set_heartbeat(engine,
2343                                    engine->defaults.heartbeat_interval_ms);
2344         if (err) {
2345                 pr_err("Cancelled inflight0 request did not reset\n");
2346                 goto out;
2347         }
2348
2349 out:
2350         i915_request_put(rq);
2351         if (igt_flush_test(engine->i915))
2352                 err = -EIO;
2353         return err;
2354 }
2355
2356 static int live_preempt_cancel(void *arg)
2357 {
2358         struct intel_gt *gt = arg;
2359         struct live_preempt_cancel data;
2360         enum intel_engine_id id;
2361         int err = -ENOMEM;
2362
2363         /*
2364          * To cancel an inflight context, we need to first remove it from the
2365          * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2366          */
2367
2368         if (preempt_client_init(gt, &data.a))
2369                 return -ENOMEM;
2370         if (preempt_client_init(gt, &data.b))
2371                 goto err_client_a;
2372
2373         for_each_engine(data.engine, gt, id) {
2374                 if (!intel_engine_has_preemption(data.engine))
2375                         continue;
2376
2377                 err = __cancel_active0(&data);
2378                 if (err)
2379                         goto err_wedged;
2380
2381                 err = __cancel_active1(&data);
2382                 if (err)
2383                         goto err_wedged;
2384
2385                 err = __cancel_queued(&data);
2386                 if (err)
2387                         goto err_wedged;
2388
2389                 err = __cancel_hostile(&data);
2390                 if (err)
2391                         goto err_wedged;
2392
2393                 err = __cancel_fail(&data);
2394                 if (err)
2395                         goto err_wedged;
2396         }
2397
2398         err = 0;
2399 err_client_b:
2400         preempt_client_fini(&data.b);
2401 err_client_a:
2402         preempt_client_fini(&data.a);
2403         return err;
2404
2405 err_wedged:
2406         GEM_TRACE_DUMP();
2407         igt_spinner_end(&data.b.spin);
2408         igt_spinner_end(&data.a.spin);
2409         intel_gt_set_wedged(gt);
2410         goto err_client_b;
2411 }
2412
2413 static int live_suppress_self_preempt(void *arg)
2414 {
2415         struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2416         struct intel_gt *gt = arg;
2417         struct intel_engine_cs *engine;
2418         struct preempt_client a, b;
2419         enum intel_engine_id id;
2420         int err = -ENOMEM;
2421
2422         /*
2423          * Verify that if a preemption request does not cause a change in
2424          * the current execution order, the preempt-to-idle injection is
2425          * skipped and that we do not accidentally apply it after the CS
2426          * completion event.
2427          */
2428
2429         if (intel_uc_uses_guc_submission(&gt->uc))
2430                 return 0; /* presume black blox */
2431
2432         if (intel_vgpu_active(gt->i915))
2433                 return 0; /* GVT forces single port & request submission */
2434
2435         if (preempt_client_init(gt, &a))
2436                 return -ENOMEM;
2437         if (preempt_client_init(gt, &b))
2438                 goto err_client_a;
2439
2440         for_each_engine(engine, gt, id) {
2441                 struct i915_request *rq_a, *rq_b;
2442                 int depth;
2443
2444                 if (!intel_engine_has_preemption(engine))
2445                         continue;
2446
2447                 if (igt_flush_test(gt->i915))
2448                         goto err_wedged;
2449
2450                 st_engine_heartbeat_disable(engine);
2451                 engine->execlists.preempt_hang.count = 0;
2452
2453                 rq_a = spinner_create_request(&a.spin,
2454                                               a.ctx, engine,
2455                                               MI_NOOP);
2456                 if (IS_ERR(rq_a)) {
2457                         err = PTR_ERR(rq_a);
2458                         st_engine_heartbeat_enable(engine);
2459                         goto err_client_b;
2460                 }
2461
2462                 i915_request_add(rq_a);
2463                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2464                         pr_err("First client failed to start\n");
2465                         st_engine_heartbeat_enable(engine);
2466                         goto err_wedged;
2467                 }
2468
2469                 /* Keep postponing the timer to avoid premature slicing */
2470                 mod_timer(&engine->execlists.timer, jiffies + HZ);
2471                 for (depth = 0; depth < 8; depth++) {
2472                         rq_b = spinner_create_request(&b.spin,
2473                                                       b.ctx, engine,
2474                                                       MI_NOOP);
2475                         if (IS_ERR(rq_b)) {
2476                                 err = PTR_ERR(rq_b);
2477                                 st_engine_heartbeat_enable(engine);
2478                                 goto err_client_b;
2479                         }
2480                         i915_request_add(rq_b);
2481
2482                         GEM_BUG_ON(i915_request_completed(rq_a));
2483                         engine->schedule(rq_a, &attr);
2484                         igt_spinner_end(&a.spin);
2485
2486                         if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2487                                 pr_err("Second client failed to start\n");
2488                                 st_engine_heartbeat_enable(engine);
2489                                 goto err_wedged;
2490                         }
2491
2492                         swap(a, b);
2493                         rq_a = rq_b;
2494                 }
2495                 igt_spinner_end(&a.spin);
2496
2497                 if (engine->execlists.preempt_hang.count) {
2498                         pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2499                                engine->name,
2500                                engine->execlists.preempt_hang.count,
2501                                depth);
2502                         st_engine_heartbeat_enable(engine);
2503                         err = -EINVAL;
2504                         goto err_client_b;
2505                 }
2506
2507                 st_engine_heartbeat_enable(engine);
2508                 if (igt_flush_test(gt->i915))
2509                         goto err_wedged;
2510         }
2511
2512         err = 0;
2513 err_client_b:
2514         preempt_client_fini(&b);
2515 err_client_a:
2516         preempt_client_fini(&a);
2517         return err;
2518
2519 err_wedged:
2520         igt_spinner_end(&b.spin);
2521         igt_spinner_end(&a.spin);
2522         intel_gt_set_wedged(gt);
2523         err = -EIO;
2524         goto err_client_b;
2525 }
2526
2527 static int live_chain_preempt(void *arg)
2528 {
2529         struct intel_gt *gt = arg;
2530         struct intel_engine_cs *engine;
2531         struct preempt_client hi, lo;
2532         enum intel_engine_id id;
2533         int err = -ENOMEM;
2534
2535         /*
2536          * Build a chain AB...BA between two contexts (A, B) and request
2537          * preemption of the last request. It should then complete before
2538          * the previously submitted spinner in B.
2539          */
2540
2541         if (preempt_client_init(gt, &hi))
2542                 return -ENOMEM;
2543
2544         if (preempt_client_init(gt, &lo))
2545                 goto err_client_hi;
2546
2547         for_each_engine(engine, gt, id) {
2548                 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2549                 struct igt_live_test t;
2550                 struct i915_request *rq;
2551                 int ring_size, count, i;
2552
2553                 if (!intel_engine_has_preemption(engine))
2554                         continue;
2555
2556                 rq = spinner_create_request(&lo.spin,
2557                                             lo.ctx, engine,
2558                                             MI_ARB_CHECK);
2559                 if (IS_ERR(rq))
2560                         goto err_wedged;
2561
2562                 i915_request_get(rq);
2563                 i915_request_add(rq);
2564
2565                 ring_size = rq->wa_tail - rq->head;
2566                 if (ring_size < 0)
2567                         ring_size += rq->ring->size;
2568                 ring_size = rq->ring->size / ring_size;
2569                 pr_debug("%s(%s): Using maximum of %d requests\n",
2570                          __func__, engine->name, ring_size);
2571
2572                 igt_spinner_end(&lo.spin);
2573                 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2574                         pr_err("Timed out waiting to flush %s\n", engine->name);
2575                         i915_request_put(rq);
2576                         goto err_wedged;
2577                 }
2578                 i915_request_put(rq);
2579
2580                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2581                         err = -EIO;
2582                         goto err_wedged;
2583                 }
2584
2585                 for_each_prime_number_from(count, 1, ring_size) {
2586                         rq = spinner_create_request(&hi.spin,
2587                                                     hi.ctx, engine,
2588                                                     MI_ARB_CHECK);
2589                         if (IS_ERR(rq))
2590                                 goto err_wedged;
2591                         i915_request_add(rq);
2592                         if (!igt_wait_for_spinner(&hi.spin, rq))
2593                                 goto err_wedged;
2594
2595                         rq = spinner_create_request(&lo.spin,
2596                                                     lo.ctx, engine,
2597                                                     MI_ARB_CHECK);
2598                         if (IS_ERR(rq))
2599                                 goto err_wedged;
2600                         i915_request_add(rq);
2601
2602                         for (i = 0; i < count; i++) {
2603                                 rq = igt_request_alloc(lo.ctx, engine);
2604                                 if (IS_ERR(rq))
2605                                         goto err_wedged;
2606                                 i915_request_add(rq);
2607                         }
2608
2609                         rq = igt_request_alloc(hi.ctx, engine);
2610                         if (IS_ERR(rq))
2611                                 goto err_wedged;
2612
2613                         i915_request_get(rq);
2614                         i915_request_add(rq);
2615                         engine->schedule(rq, &attr);
2616
2617                         igt_spinner_end(&hi.spin);
2618                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2619                                 struct drm_printer p =
2620                                         drm_info_printer(gt->i915->drm.dev);
2621
2622                                 pr_err("Failed to preempt over chain of %d\n",
2623                                        count);
2624                                 intel_engine_dump(engine, &p,
2625                                                   "%s\n", engine->name);
2626                                 i915_request_put(rq);
2627                                 goto err_wedged;
2628                         }
2629                         igt_spinner_end(&lo.spin);
2630                         i915_request_put(rq);
2631
2632                         rq = igt_request_alloc(lo.ctx, engine);
2633                         if (IS_ERR(rq))
2634                                 goto err_wedged;
2635
2636                         i915_request_get(rq);
2637                         i915_request_add(rq);
2638
2639                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2640                                 struct drm_printer p =
2641                                         drm_info_printer(gt->i915->drm.dev);
2642
2643                                 pr_err("Failed to flush low priority chain of %d requests\n",
2644                                        count);
2645                                 intel_engine_dump(engine, &p,
2646                                                   "%s\n", engine->name);
2647
2648                                 i915_request_put(rq);
2649                                 goto err_wedged;
2650                         }
2651                         i915_request_put(rq);
2652                 }
2653
2654                 if (igt_live_test_end(&t)) {
2655                         err = -EIO;
2656                         goto err_wedged;
2657                 }
2658         }
2659
2660         err = 0;
2661 err_client_lo:
2662         preempt_client_fini(&lo);
2663 err_client_hi:
2664         preempt_client_fini(&hi);
2665         return err;
2666
2667 err_wedged:
2668         igt_spinner_end(&hi.spin);
2669         igt_spinner_end(&lo.spin);
2670         intel_gt_set_wedged(gt);
2671         err = -EIO;
2672         goto err_client_lo;
2673 }
2674
2675 static int create_gang(struct intel_engine_cs *engine,
2676                        struct i915_request **prev)
2677 {
2678         struct drm_i915_gem_object *obj;
2679         struct intel_context *ce;
2680         struct i915_request *rq;
2681         struct i915_vma *vma;
2682         u32 *cs;
2683         int err;
2684
2685         ce = intel_context_create(engine);
2686         if (IS_ERR(ce))
2687                 return PTR_ERR(ce);
2688
2689         obj = i915_gem_object_create_internal(engine->i915, 4096);
2690         if (IS_ERR(obj)) {
2691                 err = PTR_ERR(obj);
2692                 goto err_ce;
2693         }
2694
2695         vma = i915_vma_instance(obj, ce->vm, NULL);
2696         if (IS_ERR(vma)) {
2697                 err = PTR_ERR(vma);
2698                 goto err_obj;
2699         }
2700
2701         err = i915_vma_pin(vma, 0, 0, PIN_USER);
2702         if (err)
2703                 goto err_obj;
2704
2705         cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2706         if (IS_ERR(cs)) {
2707                 err = PTR_ERR(cs);
2708                 goto err_obj;
2709         }
2710
2711         /* Semaphore target: spin until zero */
2712         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2713
2714         *cs++ = MI_SEMAPHORE_WAIT |
2715                 MI_SEMAPHORE_POLL |
2716                 MI_SEMAPHORE_SAD_EQ_SDD;
2717         *cs++ = 0;
2718         *cs++ = lower_32_bits(vma->node.start);
2719         *cs++ = upper_32_bits(vma->node.start);
2720
2721         if (*prev) {
2722                 u64 offset = (*prev)->batch->node.start;
2723
2724                 /* Terminate the spinner in the next lower priority batch. */
2725                 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2726                 *cs++ = lower_32_bits(offset);
2727                 *cs++ = upper_32_bits(offset);
2728                 *cs++ = 0;
2729         }
2730
2731         *cs++ = MI_BATCH_BUFFER_END;
2732         i915_gem_object_flush_map(obj);
2733         i915_gem_object_unpin_map(obj);
2734
2735         rq = intel_context_create_request(ce);
2736         if (IS_ERR(rq)) {
2737                 err = PTR_ERR(rq);
2738                 goto err_obj;
2739         }
2740
2741         rq->batch = i915_vma_get(vma);
2742         i915_request_get(rq);
2743
2744         i915_vma_lock(vma);
2745         err = i915_request_await_object(rq, vma->obj, false);
2746         if (!err)
2747                 err = i915_vma_move_to_active(vma, rq, 0);
2748         if (!err)
2749                 err = rq->engine->emit_bb_start(rq,
2750                                                 vma->node.start,
2751                                                 PAGE_SIZE, 0);
2752         i915_vma_unlock(vma);
2753         i915_request_add(rq);
2754         if (err)
2755                 goto err_rq;
2756
2757         i915_gem_object_put(obj);
2758         intel_context_put(ce);
2759
2760         rq->mock.link.next = &(*prev)->mock.link;
2761         *prev = rq;
2762         return 0;
2763
2764 err_rq:
2765         i915_vma_put(rq->batch);
2766         i915_request_put(rq);
2767 err_obj:
2768         i915_gem_object_put(obj);
2769 err_ce:
2770         intel_context_put(ce);
2771         return err;
2772 }
2773
2774 static int __live_preempt_ring(struct intel_engine_cs *engine,
2775                                struct igt_spinner *spin,
2776                                int queue_sz, int ring_sz)
2777 {
2778         struct intel_context *ce[2] = {};
2779         struct i915_request *rq;
2780         struct igt_live_test t;
2781         int err = 0;
2782         int n;
2783
2784         if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2785                 return -EIO;
2786
2787         for (n = 0; n < ARRAY_SIZE(ce); n++) {
2788                 struct intel_context *tmp;
2789
2790                 tmp = intel_context_create(engine);
2791                 if (IS_ERR(tmp)) {
2792                         err = PTR_ERR(tmp);
2793                         goto err_ce;
2794                 }
2795
2796                 tmp->ring = __intel_context_ring_size(ring_sz);
2797
2798                 err = intel_context_pin(tmp);
2799                 if (err) {
2800                         intel_context_put(tmp);
2801                         goto err_ce;
2802                 }
2803
2804                 memset32(tmp->ring->vaddr,
2805                          0xdeadbeef, /* trigger a hang if executed */
2806                          tmp->ring->vma->size / sizeof(u32));
2807
2808                 ce[n] = tmp;
2809         }
2810
2811         rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2812         if (IS_ERR(rq)) {
2813                 err = PTR_ERR(rq);
2814                 goto err_ce;
2815         }
2816
2817         i915_request_get(rq);
2818         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2819         i915_request_add(rq);
2820
2821         if (!igt_wait_for_spinner(spin, rq)) {
2822                 intel_gt_set_wedged(engine->gt);
2823                 i915_request_put(rq);
2824                 err = -ETIME;
2825                 goto err_ce;
2826         }
2827
2828         /* Fill the ring, until we will cause a wrap */
2829         n = 0;
2830         while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2831                 struct i915_request *tmp;
2832
2833                 tmp = intel_context_create_request(ce[0]);
2834                 if (IS_ERR(tmp)) {
2835                         err = PTR_ERR(tmp);
2836                         i915_request_put(rq);
2837                         goto err_ce;
2838                 }
2839
2840                 i915_request_add(tmp);
2841                 intel_engine_flush_submission(engine);
2842                 n++;
2843         }
2844         intel_engine_flush_submission(engine);
2845         pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2846                  engine->name, queue_sz, n,
2847                  ce[0]->ring->size,
2848                  ce[0]->ring->tail,
2849                  ce[0]->ring->emit,
2850                  rq->tail);
2851         i915_request_put(rq);
2852
2853         /* Create a second request to preempt the first ring */
2854         rq = intel_context_create_request(ce[1]);
2855         if (IS_ERR(rq)) {
2856                 err = PTR_ERR(rq);
2857                 goto err_ce;
2858         }
2859
2860         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2861         i915_request_get(rq);
2862         i915_request_add(rq);
2863
2864         err = wait_for_submit(engine, rq, HZ / 2);
2865         i915_request_put(rq);
2866         if (err) {
2867                 pr_err("%s: preemption request was not submitted\n",
2868                        engine->name);
2869                 err = -ETIME;
2870         }
2871
2872         pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2873                  engine->name,
2874                  ce[0]->ring->tail, ce[0]->ring->emit,
2875                  ce[1]->ring->tail, ce[1]->ring->emit);
2876
2877 err_ce:
2878         intel_engine_flush_submission(engine);
2879         igt_spinner_end(spin);
2880         for (n = 0; n < ARRAY_SIZE(ce); n++) {
2881                 if (IS_ERR_OR_NULL(ce[n]))
2882                         break;
2883
2884                 intel_context_unpin(ce[n]);
2885                 intel_context_put(ce[n]);
2886         }
2887         if (igt_live_test_end(&t))
2888                 err = -EIO;
2889         return err;
2890 }
2891
2892 static int live_preempt_ring(void *arg)
2893 {
2894         struct intel_gt *gt = arg;
2895         struct intel_engine_cs *engine;
2896         struct igt_spinner spin;
2897         enum intel_engine_id id;
2898         int err = 0;
2899
2900         /*
2901          * Check that we rollback large chunks of a ring in order to do a
2902          * preemption event. Similar to live_unlite_ring, but looking at
2903          * ring size rather than the impact of intel_ring_direction().
2904          */
2905
2906         if (igt_spinner_init(&spin, gt))
2907                 return -ENOMEM;
2908
2909         for_each_engine(engine, gt, id) {
2910                 int n;
2911
2912                 if (!intel_engine_has_preemption(engine))
2913                         continue;
2914
2915                 if (!intel_engine_can_store_dword(engine))
2916                         continue;
2917
2918                 st_engine_heartbeat_disable(engine);
2919
2920                 for (n = 0; n <= 3; n++) {
2921                         err = __live_preempt_ring(engine, &spin,
2922                                                   n * SZ_4K / 4, SZ_4K);
2923                         if (err)
2924                                 break;
2925                 }
2926
2927                 st_engine_heartbeat_enable(engine);
2928                 if (err)
2929                         break;
2930         }
2931
2932         igt_spinner_fini(&spin);
2933         return err;
2934 }
2935
2936 static int live_preempt_gang(void *arg)
2937 {
2938         struct intel_gt *gt = arg;
2939         struct intel_engine_cs *engine;
2940         enum intel_engine_id id;
2941
2942         /*
2943          * Build as long a chain of preempters as we can, with each
2944          * request higher priority than the last. Once we are ready, we release
2945          * the last batch which then precolates down the chain, each releasing
2946          * the next oldest in turn. The intent is to simply push as hard as we
2947          * can with the number of preemptions, trying to exceed narrow HW
2948          * limits. At a minimum, we insist that we can sort all the user
2949          * high priority levels into execution order.
2950          */
2951
2952         for_each_engine(engine, gt, id) {
2953                 struct i915_request *rq = NULL;
2954                 struct igt_live_test t;
2955                 IGT_TIMEOUT(end_time);
2956                 int prio = 0;
2957                 int err = 0;
2958                 u32 *cs;
2959
2960                 if (!intel_engine_has_preemption(engine))
2961                         continue;
2962
2963                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2964                         return -EIO;
2965
2966                 do {
2967                         struct i915_sched_attr attr = { .priority = prio++ };
2968
2969                         err = create_gang(engine, &rq);
2970                         if (err)
2971                                 break;
2972
2973                         /* Submit each spinner at increasing priority */
2974                         engine->schedule(rq, &attr);
2975                 } while (prio <= I915_PRIORITY_MAX &&
2976                          !__igt_timeout(end_time, NULL));
2977                 pr_debug("%s: Preempt chain of %d requests\n",
2978                          engine->name, prio);
2979
2980                 /*
2981                  * Such that the last spinner is the highest priority and
2982                  * should execute first. When that spinner completes,
2983                  * it will terminate the next lowest spinner until there
2984                  * are no more spinners and the gang is complete.
2985                  */
2986                 cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
2987                 if (!IS_ERR(cs)) {
2988                         *cs = 0;
2989                         i915_gem_object_unpin_map(rq->batch->obj);
2990                 } else {
2991                         err = PTR_ERR(cs);
2992                         intel_gt_set_wedged(gt);
2993                 }
2994
2995                 while (rq) { /* wait for each rq from highest to lowest prio */
2996                         struct i915_request *n = list_next_entry(rq, mock.link);
2997
2998                         if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2999                                 struct drm_printer p =
3000                                         drm_info_printer(engine->i915->drm.dev);
3001
3002                                 pr_err("Failed to flush chain of %d requests, at %d\n",
3003                                        prio, rq_prio(rq));
3004                                 intel_engine_dump(engine, &p,
3005                                                   "%s\n", engine->name);
3006
3007                                 err = -ETIME;
3008                         }
3009
3010                         i915_vma_put(rq->batch);
3011                         i915_request_put(rq);
3012                         rq = n;
3013                 }
3014
3015                 if (igt_live_test_end(&t))
3016                         err = -EIO;
3017                 if (err)
3018                         return err;
3019         }
3020
3021         return 0;
3022 }
3023
3024 static struct i915_vma *
3025 create_gpr_user(struct intel_engine_cs *engine,
3026                 struct i915_vma *result,
3027                 unsigned int offset)
3028 {
3029         struct drm_i915_gem_object *obj;
3030         struct i915_vma *vma;
3031         u32 *cs;
3032         int err;
3033         int i;
3034
3035         obj = i915_gem_object_create_internal(engine->i915, 4096);
3036         if (IS_ERR(obj))
3037                 return ERR_CAST(obj);
3038
3039         vma = i915_vma_instance(obj, result->vm, NULL);
3040         if (IS_ERR(vma)) {
3041                 i915_gem_object_put(obj);
3042                 return vma;
3043         }
3044
3045         err = i915_vma_pin(vma, 0, 0, PIN_USER);
3046         if (err) {
3047                 i915_vma_put(vma);
3048                 return ERR_PTR(err);
3049         }
3050
3051         cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3052         if (IS_ERR(cs)) {
3053                 i915_vma_put(vma);
3054                 return ERR_CAST(cs);
3055         }
3056
3057         /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3058         *cs++ = MI_LOAD_REGISTER_IMM(1);
3059         *cs++ = CS_GPR(engine, 0);
3060         *cs++ = 1;
3061
3062         for (i = 1; i < NUM_GPR; i++) {
3063                 u64 addr;
3064
3065                 /*
3066                  * Perform: GPR[i]++
3067                  *
3068                  * As we read and write into the context saved GPR[i], if
3069                  * we restart this batch buffer from an earlier point, we
3070                  * will repeat the increment and store a value > 1.
3071                  */
3072                 *cs++ = MI_MATH(4);
3073                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3074                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3075                 *cs++ = MI_MATH_ADD;
3076                 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3077
3078                 addr = result->node.start + offset + i * sizeof(*cs);
3079                 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
3080                 *cs++ = CS_GPR(engine, 2 * i);
3081                 *cs++ = lower_32_bits(addr);
3082                 *cs++ = upper_32_bits(addr);
3083
3084                 *cs++ = MI_SEMAPHORE_WAIT |
3085                         MI_SEMAPHORE_POLL |
3086                         MI_SEMAPHORE_SAD_GTE_SDD;
3087                 *cs++ = i;
3088                 *cs++ = lower_32_bits(result->node.start);
3089                 *cs++ = upper_32_bits(result->node.start);
3090         }
3091
3092         *cs++ = MI_BATCH_BUFFER_END;
3093         i915_gem_object_flush_map(obj);
3094         i915_gem_object_unpin_map(obj);
3095
3096         return vma;
3097 }
3098
3099 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3100 {
3101         struct drm_i915_gem_object *obj;
3102         struct i915_vma *vma;
3103         int err;
3104
3105         obj = i915_gem_object_create_internal(gt->i915, sz);
3106         if (IS_ERR(obj))
3107                 return ERR_CAST(obj);
3108
3109         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3110         if (IS_ERR(vma)) {
3111                 i915_gem_object_put(obj);
3112                 return vma;
3113         }
3114
3115         err = i915_ggtt_pin(vma, NULL, 0, 0);
3116         if (err) {
3117                 i915_vma_put(vma);
3118                 return ERR_PTR(err);
3119         }
3120
3121         return vma;
3122 }
3123
3124 static struct i915_request *
3125 create_gpr_client(struct intel_engine_cs *engine,
3126                   struct i915_vma *global,
3127                   unsigned int offset)
3128 {
3129         struct i915_vma *batch, *vma;
3130         struct intel_context *ce;
3131         struct i915_request *rq;
3132         int err;
3133
3134         ce = intel_context_create(engine);
3135         if (IS_ERR(ce))
3136                 return ERR_CAST(ce);
3137
3138         vma = i915_vma_instance(global->obj, ce->vm, NULL);
3139         if (IS_ERR(vma)) {
3140                 err = PTR_ERR(vma);
3141                 goto out_ce;
3142         }
3143
3144         err = i915_vma_pin(vma, 0, 0, PIN_USER);
3145         if (err)
3146                 goto out_ce;
3147
3148         batch = create_gpr_user(engine, vma, offset);
3149         if (IS_ERR(batch)) {
3150                 err = PTR_ERR(batch);
3151                 goto out_vma;
3152         }
3153
3154         rq = intel_context_create_request(ce);
3155         if (IS_ERR(rq)) {
3156                 err = PTR_ERR(rq);
3157                 goto out_batch;
3158         }
3159
3160         i915_vma_lock(vma);
3161         err = i915_request_await_object(rq, vma->obj, false);
3162         if (!err)
3163                 err = i915_vma_move_to_active(vma, rq, 0);
3164         i915_vma_unlock(vma);
3165
3166         i915_vma_lock(batch);
3167         if (!err)
3168                 err = i915_request_await_object(rq, batch->obj, false);
3169         if (!err)
3170                 err = i915_vma_move_to_active(batch, rq, 0);
3171         if (!err)
3172                 err = rq->engine->emit_bb_start(rq,
3173                                                 batch->node.start,
3174                                                 PAGE_SIZE, 0);
3175         i915_vma_unlock(batch);
3176         i915_vma_unpin(batch);
3177
3178         if (!err)
3179                 i915_request_get(rq);
3180         i915_request_add(rq);
3181
3182 out_batch:
3183         i915_vma_put(batch);
3184 out_vma:
3185         i915_vma_unpin(vma);
3186 out_ce:
3187         intel_context_put(ce);
3188         return err ? ERR_PTR(err) : rq;
3189 }
3190
3191 static int preempt_user(struct intel_engine_cs *engine,
3192                         struct i915_vma *global,
3193                         int id)
3194 {
3195         struct i915_sched_attr attr = {
3196                 .priority = I915_PRIORITY_MAX
3197         };
3198         struct i915_request *rq;
3199         int err = 0;
3200         u32 *cs;
3201
3202         rq = intel_engine_create_kernel_request(engine);
3203         if (IS_ERR(rq))
3204                 return PTR_ERR(rq);
3205
3206         cs = intel_ring_begin(rq, 4);
3207         if (IS_ERR(cs)) {
3208                 i915_request_add(rq);
3209                 return PTR_ERR(cs);
3210         }
3211
3212         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3213         *cs++ = i915_ggtt_offset(global);
3214         *cs++ = 0;
3215         *cs++ = id;
3216
3217         intel_ring_advance(rq, cs);
3218
3219         i915_request_get(rq);
3220         i915_request_add(rq);
3221
3222         engine->schedule(rq, &attr);
3223
3224         if (i915_request_wait(rq, 0, HZ / 2) < 0)
3225                 err = -ETIME;
3226         i915_request_put(rq);
3227
3228         return err;
3229 }
3230
3231 static int live_preempt_user(void *arg)
3232 {
3233         struct intel_gt *gt = arg;
3234         struct intel_engine_cs *engine;
3235         struct i915_vma *global;
3236         enum intel_engine_id id;
3237         u32 *result;
3238         int err = 0;
3239
3240         /*
3241          * In our other tests, we look at preemption in carefully
3242          * controlled conditions in the ringbuffer. Since most of the
3243          * time is spent in user batches, most of our preemptions naturally
3244          * occur there. We want to verify that when we preempt inside a batch
3245          * we continue on from the current instruction and do not roll back
3246          * to the start, or another earlier arbitration point.
3247          *
3248          * To verify this, we create a batch which is a mixture of
3249          * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3250          * a few preempting contexts thrown into the mix, we look for any
3251          * repeated instructions (which show up as incorrect values).
3252          */
3253
3254         global = create_global(gt, 4096);
3255         if (IS_ERR(global))
3256                 return PTR_ERR(global);
3257
3258         result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3259         if (IS_ERR(result)) {
3260                 i915_vma_unpin_and_release(&global, 0);
3261                 return PTR_ERR(result);
3262         }
3263
3264         for_each_engine(engine, gt, id) {
3265                 struct i915_request *client[3] = {};
3266                 struct igt_live_test t;
3267                 int i;
3268
3269                 if (!intel_engine_has_preemption(engine))
3270                         continue;
3271
3272                 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3273                         continue; /* we need per-context GPR */
3274
3275                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3276                         err = -EIO;
3277                         break;
3278                 }
3279
3280                 memset(result, 0, 4096);
3281
3282                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3283                         struct i915_request *rq;
3284
3285                         rq = create_gpr_client(engine, global,
3286                                                NUM_GPR * i * sizeof(u32));
3287                         if (IS_ERR(rq)) {
3288                                 err = PTR_ERR(rq);
3289                                 goto end_test;
3290                         }
3291
3292                         client[i] = rq;
3293                 }
3294
3295                 /* Continuously preempt the set of 3 running contexts */
3296                 for (i = 1; i <= NUM_GPR; i++) {
3297                         err = preempt_user(engine, global, i);
3298                         if (err)
3299                                 goto end_test;
3300                 }
3301
3302                 if (READ_ONCE(result[0]) != NUM_GPR) {
3303                         pr_err("%s: Failed to release semaphore\n",
3304                                engine->name);
3305                         err = -EIO;
3306                         goto end_test;
3307                 }
3308
3309                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3310                         int gpr;
3311
3312                         if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3313                                 err = -ETIME;
3314                                 goto end_test;
3315                         }
3316
3317                         for (gpr = 1; gpr < NUM_GPR; gpr++) {
3318                                 if (result[NUM_GPR * i + gpr] != 1) {
3319                                         pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3320                                                engine->name,
3321                                                i, gpr, result[NUM_GPR * i + gpr]);
3322                                         err = -EINVAL;
3323                                         goto end_test;
3324                                 }
3325                         }
3326                 }
3327
3328 end_test:
3329                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3330                         if (!client[i])
3331                                 break;
3332
3333                         i915_request_put(client[i]);
3334                 }
3335
3336                 /* Flush the semaphores on error */
3337                 smp_store_mb(result[0], -1);
3338                 if (igt_live_test_end(&t))
3339                         err = -EIO;
3340                 if (err)
3341                         break;
3342         }
3343
3344         i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3345         return err;
3346 }
3347
3348 static int live_preempt_timeout(void *arg)
3349 {
3350         struct intel_gt *gt = arg;
3351         struct i915_gem_context *ctx_hi, *ctx_lo;
3352         struct igt_spinner spin_lo;
3353         struct intel_engine_cs *engine;
3354         enum intel_engine_id id;
3355         int err = -ENOMEM;
3356
3357         /*
3358          * Check that we force preemption to occur by cancelling the previous
3359          * context if it refuses to yield the GPU.
3360          */
3361         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3362                 return 0;
3363
3364         if (!intel_has_reset_engine(gt))
3365                 return 0;
3366
3367         if (igt_spinner_init(&spin_lo, gt))
3368                 return -ENOMEM;
3369
3370         ctx_hi = kernel_context(gt->i915);
3371         if (!ctx_hi)
3372                 goto err_spin_lo;
3373         ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3374
3375         ctx_lo = kernel_context(gt->i915);
3376         if (!ctx_lo)
3377                 goto err_ctx_hi;
3378         ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3379
3380         for_each_engine(engine, gt, id) {
3381                 unsigned long saved_timeout;
3382                 struct i915_request *rq;
3383
3384                 if (!intel_engine_has_preemption(engine))
3385                         continue;
3386
3387                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3388                                             MI_NOOP); /* preemption disabled */
3389                 if (IS_ERR(rq)) {
3390                         err = PTR_ERR(rq);
3391                         goto err_ctx_lo;
3392                 }
3393
3394                 i915_request_add(rq);
3395                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3396                         intel_gt_set_wedged(gt);
3397                         err = -EIO;
3398                         goto err_ctx_lo;
3399                 }
3400
3401                 rq = igt_request_alloc(ctx_hi, engine);
3402                 if (IS_ERR(rq)) {
3403                         igt_spinner_end(&spin_lo);
3404                         err = PTR_ERR(rq);
3405                         goto err_ctx_lo;
3406                 }
3407
3408                 /* Flush the previous CS ack before changing timeouts */
3409                 while (READ_ONCE(engine->execlists.pending[0]))
3410                         cpu_relax();
3411
3412                 saved_timeout = engine->props.preempt_timeout_ms;
3413                 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3414
3415                 i915_request_get(rq);
3416                 i915_request_add(rq);
3417
3418                 intel_engine_flush_submission(engine);
3419                 engine->props.preempt_timeout_ms = saved_timeout;
3420
3421                 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3422                         intel_gt_set_wedged(gt);
3423                         i915_request_put(rq);
3424                         err = -ETIME;
3425                         goto err_ctx_lo;
3426                 }
3427
3428                 igt_spinner_end(&spin_lo);
3429                 i915_request_put(rq);
3430         }
3431
3432         err = 0;
3433 err_ctx_lo:
3434         kernel_context_close(ctx_lo);
3435 err_ctx_hi:
3436         kernel_context_close(ctx_hi);
3437 err_spin_lo:
3438         igt_spinner_fini(&spin_lo);
3439         return err;
3440 }
3441
3442 static int random_range(struct rnd_state *rnd, int min, int max)
3443 {
3444         return i915_prandom_u32_max_state(max - min, rnd) + min;
3445 }
3446
3447 static int random_priority(struct rnd_state *rnd)
3448 {
3449         return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3450 }
3451
3452 struct preempt_smoke {
3453         struct intel_gt *gt;
3454         struct i915_gem_context **contexts;
3455         struct intel_engine_cs *engine;
3456         struct drm_i915_gem_object *batch;
3457         unsigned int ncontext;
3458         struct rnd_state prng;
3459         unsigned long count;
3460 };
3461
3462 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3463 {
3464         return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3465                                                           &smoke->prng)];
3466 }
3467
3468 static int smoke_submit(struct preempt_smoke *smoke,
3469                         struct i915_gem_context *ctx, int prio,
3470                         struct drm_i915_gem_object *batch)
3471 {
3472         struct i915_request *rq;
3473         struct i915_vma *vma = NULL;
3474         int err = 0;
3475
3476         if (batch) {
3477                 struct i915_address_space *vm;
3478
3479                 vm = i915_gem_context_get_vm_rcu(ctx);
3480                 vma = i915_vma_instance(batch, vm, NULL);
3481                 i915_vm_put(vm);
3482                 if (IS_ERR(vma))
3483                         return PTR_ERR(vma);
3484
3485                 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3486                 if (err)
3487                         return err;
3488         }
3489
3490         ctx->sched.priority = prio;
3491
3492         rq = igt_request_alloc(ctx, smoke->engine);
3493         if (IS_ERR(rq)) {
3494                 err = PTR_ERR(rq);
3495                 goto unpin;
3496         }
3497
3498         if (vma) {
3499                 i915_vma_lock(vma);
3500                 err = i915_request_await_object(rq, vma->obj, false);
3501                 if (!err)
3502                         err = i915_vma_move_to_active(vma, rq, 0);
3503                 if (!err)
3504                         err = rq->engine->emit_bb_start(rq,
3505                                                         vma->node.start,
3506                                                         PAGE_SIZE, 0);
3507                 i915_vma_unlock(vma);
3508         }
3509
3510         i915_request_add(rq);
3511
3512 unpin:
3513         if (vma)
3514                 i915_vma_unpin(vma);
3515
3516         return err;
3517 }
3518
3519 static int smoke_crescendo_thread(void *arg)
3520 {
3521         struct preempt_smoke *smoke = arg;
3522         IGT_TIMEOUT(end_time);
3523         unsigned long count;
3524
3525         count = 0;
3526         do {
3527                 struct i915_gem_context *ctx = smoke_context(smoke);
3528                 int err;
3529
3530                 err = smoke_submit(smoke,
3531                                    ctx, count % I915_PRIORITY_MAX,
3532                                    smoke->batch);
3533                 if (err)
3534                         return err;
3535
3536                 count++;
3537         } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3538
3539         smoke->count = count;
3540         return 0;
3541 }
3542
3543 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3544 #define BATCH BIT(0)
3545 {
3546         struct task_struct *tsk[I915_NUM_ENGINES] = {};
3547         struct preempt_smoke arg[I915_NUM_ENGINES];
3548         struct intel_engine_cs *engine;
3549         enum intel_engine_id id;
3550         unsigned long count;
3551         int err = 0;
3552
3553         for_each_engine(engine, smoke->gt, id) {
3554                 arg[id] = *smoke;
3555                 arg[id].engine = engine;
3556                 if (!(flags & BATCH))
3557                         arg[id].batch = NULL;
3558                 arg[id].count = 0;
3559
3560                 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3561                                       "igt/smoke:%d", id);
3562                 if (IS_ERR(tsk[id])) {
3563                         err = PTR_ERR(tsk[id]);
3564                         break;
3565                 }
3566                 get_task_struct(tsk[id]);
3567         }
3568
3569         yield(); /* start all threads before we kthread_stop() */
3570
3571         count = 0;
3572         for_each_engine(engine, smoke->gt, id) {
3573                 int status;
3574
3575                 if (IS_ERR_OR_NULL(tsk[id]))
3576                         continue;
3577
3578                 status = kthread_stop(tsk[id]);
3579                 if (status && !err)
3580                         err = status;
3581
3582                 count += arg[id].count;
3583
3584                 put_task_struct(tsk[id]);
3585         }
3586
3587         pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3588                 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3589         return 0;
3590 }
3591
3592 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3593 {
3594         enum intel_engine_id id;
3595         IGT_TIMEOUT(end_time);
3596         unsigned long count;
3597
3598         count = 0;
3599         do {
3600                 for_each_engine(smoke->engine, smoke->gt, id) {
3601                         struct i915_gem_context *ctx = smoke_context(smoke);
3602                         int err;
3603
3604                         err = smoke_submit(smoke,
3605                                            ctx, random_priority(&smoke->prng),
3606                                            flags & BATCH ? smoke->batch : NULL);
3607                         if (err)
3608                                 return err;
3609
3610                         count++;
3611                 }
3612         } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3613
3614         pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3615                 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3616         return 0;
3617 }
3618
3619 static int live_preempt_smoke(void *arg)
3620 {
3621         struct preempt_smoke smoke = {
3622                 .gt = arg,
3623                 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3624                 .ncontext = 256,
3625         };
3626         const unsigned int phase[] = { 0, BATCH };
3627         struct igt_live_test t;
3628         int err = -ENOMEM;
3629         u32 *cs;
3630         int n;
3631
3632         smoke.contexts = kmalloc_array(smoke.ncontext,
3633                                        sizeof(*smoke.contexts),
3634                                        GFP_KERNEL);
3635         if (!smoke.contexts)
3636                 return -ENOMEM;
3637
3638         smoke.batch =
3639                 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3640         if (IS_ERR(smoke.batch)) {
3641                 err = PTR_ERR(smoke.batch);
3642                 goto err_free;
3643         }
3644
3645         cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3646         if (IS_ERR(cs)) {
3647                 err = PTR_ERR(cs);
3648                 goto err_batch;
3649         }
3650         for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3651                 cs[n] = MI_ARB_CHECK;
3652         cs[n] = MI_BATCH_BUFFER_END;
3653         i915_gem_object_flush_map(smoke.batch);
3654         i915_gem_object_unpin_map(smoke.batch);
3655
3656         if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3657                 err = -EIO;
3658                 goto err_batch;
3659         }
3660
3661         for (n = 0; n < smoke.ncontext; n++) {
3662                 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3663                 if (!smoke.contexts[n])
3664                         goto err_ctx;
3665         }
3666
3667         for (n = 0; n < ARRAY_SIZE(phase); n++) {
3668                 err = smoke_crescendo(&smoke, phase[n]);
3669                 if (err)
3670                         goto err_ctx;
3671
3672                 err = smoke_random(&smoke, phase[n]);
3673                 if (err)
3674                         goto err_ctx;
3675         }
3676
3677 err_ctx:
3678         if (igt_live_test_end(&t))
3679                 err = -EIO;
3680
3681         for (n = 0; n < smoke.ncontext; n++) {
3682                 if (!smoke.contexts[n])
3683                         break;
3684                 kernel_context_close(smoke.contexts[n]);
3685         }
3686
3687 err_batch:
3688         i915_gem_object_put(smoke.batch);
3689 err_free:
3690         kfree(smoke.contexts);
3691
3692         return err;
3693 }
3694
3695 static int nop_virtual_engine(struct intel_gt *gt,
3696                               struct intel_engine_cs **siblings,
3697                               unsigned int nsibling,
3698                               unsigned int nctx,
3699                               unsigned int flags)
3700 #define CHAIN BIT(0)
3701 {
3702         IGT_TIMEOUT(end_time);
3703         struct i915_request *request[16] = {};
3704         struct intel_context *ve[16];
3705         unsigned long n, prime, nc;
3706         struct igt_live_test t;
3707         ktime_t times[2] = {};
3708         int err;
3709
3710         GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3711
3712         for (n = 0; n < nctx; n++) {
3713                 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3714                 if (IS_ERR(ve[n])) {
3715                         err = PTR_ERR(ve[n]);
3716                         nctx = n;
3717                         goto out;
3718                 }
3719
3720                 err = intel_context_pin(ve[n]);
3721                 if (err) {
3722                         intel_context_put(ve[n]);
3723                         nctx = n;
3724                         goto out;
3725                 }
3726         }
3727
3728         err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3729         if (err)
3730                 goto out;
3731
3732         for_each_prime_number_from(prime, 1, 8192) {
3733                 times[1] = ktime_get_raw();
3734
3735                 if (flags & CHAIN) {
3736                         for (nc = 0; nc < nctx; nc++) {
3737                                 for (n = 0; n < prime; n++) {
3738                                         struct i915_request *rq;
3739
3740                                         rq = i915_request_create(ve[nc]);
3741                                         if (IS_ERR(rq)) {
3742                                                 err = PTR_ERR(rq);
3743                                                 goto out;
3744                                         }
3745
3746                                         if (request[nc])
3747                                                 i915_request_put(request[nc]);
3748                                         request[nc] = i915_request_get(rq);
3749                                         i915_request_add(rq);
3750                                 }
3751                         }
3752                 } else {
3753                         for (n = 0; n < prime; n++) {
3754                                 for (nc = 0; nc < nctx; nc++) {
3755                                         struct i915_request *rq;
3756
3757                                         rq = i915_request_create(ve[nc]);
3758                                         if (IS_ERR(rq)) {
3759                                                 err = PTR_ERR(rq);
3760                                                 goto out;
3761                                         }
3762
3763                                         if (request[nc])
3764                                                 i915_request_put(request[nc]);
3765                                         request[nc] = i915_request_get(rq);
3766                                         i915_request_add(rq);
3767                                 }
3768                         }
3769                 }
3770
3771                 for (nc = 0; nc < nctx; nc++) {
3772                         if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3773                                 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3774                                        __func__, ve[0]->engine->name,
3775                                        request[nc]->fence.context,
3776                                        request[nc]->fence.seqno);
3777
3778                                 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3779                                           __func__, ve[0]->engine->name,
3780                                           request[nc]->fence.context,
3781                                           request[nc]->fence.seqno);
3782                                 GEM_TRACE_DUMP();
3783                                 intel_gt_set_wedged(gt);
3784                                 break;
3785                         }
3786                 }
3787
3788                 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3789                 if (prime == 1)
3790                         times[0] = times[1];
3791
3792                 for (nc = 0; nc < nctx; nc++) {
3793                         i915_request_put(request[nc]);
3794                         request[nc] = NULL;
3795                 }
3796
3797                 if (__igt_timeout(end_time, NULL))
3798                         break;
3799         }
3800
3801         err = igt_live_test_end(&t);
3802         if (err)
3803                 goto out;
3804
3805         pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3806                 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3807                 prime, div64_u64(ktime_to_ns(times[1]), prime));
3808
3809 out:
3810         if (igt_flush_test(gt->i915))
3811                 err = -EIO;
3812
3813         for (nc = 0; nc < nctx; nc++) {
3814                 i915_request_put(request[nc]);
3815                 intel_context_unpin(ve[nc]);
3816                 intel_context_put(ve[nc]);
3817         }
3818         return err;
3819 }
3820
3821 static unsigned int
3822 __select_siblings(struct intel_gt *gt,
3823                   unsigned int class,
3824                   struct intel_engine_cs **siblings,
3825                   bool (*filter)(const struct intel_engine_cs *))
3826 {
3827         unsigned int n = 0;
3828         unsigned int inst;
3829
3830         for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3831                 if (!gt->engine_class[class][inst])
3832                         continue;
3833
3834                 if (filter && !filter(gt->engine_class[class][inst]))
3835                         continue;
3836
3837                 siblings[n++] = gt->engine_class[class][inst];
3838         }
3839
3840         return n;
3841 }
3842
3843 static unsigned int
3844 select_siblings(struct intel_gt *gt,
3845                 unsigned int class,
3846                 struct intel_engine_cs **siblings)
3847 {
3848         return __select_siblings(gt, class, siblings, NULL);
3849 }
3850
3851 static int live_virtual_engine(void *arg)
3852 {
3853         struct intel_gt *gt = arg;
3854         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3855         struct intel_engine_cs *engine;
3856         enum intel_engine_id id;
3857         unsigned int class;
3858         int err;
3859
3860         if (intel_uc_uses_guc_submission(&gt->uc))
3861                 return 0;
3862
3863         for_each_engine(engine, gt, id) {
3864                 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3865                 if (err) {
3866                         pr_err("Failed to wrap engine %s: err=%d\n",
3867                                engine->name, err);
3868                         return err;
3869                 }
3870         }
3871
3872         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3873                 int nsibling, n;
3874
3875                 nsibling = select_siblings(gt, class, siblings);
3876                 if (nsibling < 2)
3877                         continue;
3878
3879                 for (n = 1; n <= nsibling + 1; n++) {
3880                         err = nop_virtual_engine(gt, siblings, nsibling,
3881                                                  n, 0);
3882                         if (err)
3883                                 return err;
3884                 }
3885
3886                 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3887                 if (err)
3888                         return err;
3889         }
3890
3891         return 0;
3892 }
3893
3894 static int mask_virtual_engine(struct intel_gt *gt,
3895                                struct intel_engine_cs **siblings,
3896                                unsigned int nsibling)
3897 {
3898         struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3899         struct intel_context *ve;
3900         struct igt_live_test t;
3901         unsigned int n;
3902         int err;
3903
3904         /*
3905          * Check that by setting the execution mask on a request, we can
3906          * restrict it to our desired engine within the virtual engine.
3907          */
3908
3909         ve = intel_execlists_create_virtual(siblings, nsibling);
3910         if (IS_ERR(ve)) {
3911                 err = PTR_ERR(ve);
3912                 goto out_close;
3913         }
3914
3915         err = intel_context_pin(ve);
3916         if (err)
3917                 goto out_put;
3918
3919         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3920         if (err)
3921                 goto out_unpin;
3922
3923         for (n = 0; n < nsibling; n++) {
3924                 request[n] = i915_request_create(ve);
3925                 if (IS_ERR(request[n])) {
3926                         err = PTR_ERR(request[n]);
3927                         nsibling = n;
3928                         goto out;
3929                 }
3930
3931                 /* Reverse order as it's more likely to be unnatural */
3932                 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3933
3934                 i915_request_get(request[n]);
3935                 i915_request_add(request[n]);
3936         }
3937
3938         for (n = 0; n < nsibling; n++) {
3939                 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3940                         pr_err("%s(%s): wait for %llx:%lld timed out\n",
3941                                __func__, ve->engine->name,
3942                                request[n]->fence.context,
3943                                request[n]->fence.seqno);
3944
3945                         GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3946                                   __func__, ve->engine->name,
3947                                   request[n]->fence.context,
3948                                   request[n]->fence.seqno);
3949                         GEM_TRACE_DUMP();
3950                         intel_gt_set_wedged(gt);
3951                         err = -EIO;
3952                         goto out;
3953                 }
3954
3955                 if (request[n]->engine != siblings[nsibling - n - 1]) {
3956                         pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3957                                request[n]->engine->name,
3958                                siblings[nsibling - n - 1]->name);
3959                         err = -EINVAL;
3960                         goto out;
3961                 }
3962         }
3963
3964         err = igt_live_test_end(&t);
3965 out:
3966         if (igt_flush_test(gt->i915))
3967                 err = -EIO;
3968
3969         for (n = 0; n < nsibling; n++)
3970                 i915_request_put(request[n]);
3971
3972 out_unpin:
3973         intel_context_unpin(ve);
3974 out_put:
3975         intel_context_put(ve);
3976 out_close:
3977         return err;
3978 }
3979
3980 static int live_virtual_mask(void *arg)
3981 {
3982         struct intel_gt *gt = arg;
3983         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3984         unsigned int class;
3985         int err;
3986
3987         if (intel_uc_uses_guc_submission(&gt->uc))
3988                 return 0;
3989
3990         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3991                 unsigned int nsibling;
3992
3993                 nsibling = select_siblings(gt, class, siblings);
3994                 if (nsibling < 2)
3995                         continue;
3996
3997                 err = mask_virtual_engine(gt, siblings, nsibling);
3998                 if (err)
3999                         return err;
4000         }
4001
4002         return 0;
4003 }
4004
4005 static int slicein_virtual_engine(struct intel_gt *gt,
4006                                   struct intel_engine_cs **siblings,
4007                                   unsigned int nsibling)
4008 {
4009         const long timeout = slice_timeout(siblings[0]);
4010         struct intel_context *ce;
4011         struct i915_request *rq;
4012         struct igt_spinner spin;
4013         unsigned int n;
4014         int err = 0;
4015
4016         /*
4017          * Virtual requests must take part in timeslicing on the target engines.
4018          */
4019
4020         if (igt_spinner_init(&spin, gt))
4021                 return -ENOMEM;
4022
4023         for (n = 0; n < nsibling; n++) {
4024                 ce = intel_context_create(siblings[n]);
4025                 if (IS_ERR(ce)) {
4026                         err = PTR_ERR(ce);
4027                         goto out;
4028                 }
4029
4030                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4031                 intel_context_put(ce);
4032                 if (IS_ERR(rq)) {
4033                         err = PTR_ERR(rq);
4034                         goto out;
4035                 }
4036
4037                 i915_request_add(rq);
4038         }
4039
4040         ce = intel_execlists_create_virtual(siblings, nsibling);
4041         if (IS_ERR(ce)) {
4042                 err = PTR_ERR(ce);
4043                 goto out;
4044         }
4045
4046         rq = intel_context_create_request(ce);
4047         intel_context_put(ce);
4048         if (IS_ERR(rq)) {
4049                 err = PTR_ERR(rq);
4050                 goto out;
4051         }
4052
4053         i915_request_get(rq);
4054         i915_request_add(rq);
4055         if (i915_request_wait(rq, 0, timeout) < 0) {
4056                 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4057                               __func__, rq->engine->name);
4058                 GEM_TRACE_DUMP();
4059                 intel_gt_set_wedged(gt);
4060                 err = -EIO;
4061         }
4062         i915_request_put(rq);
4063
4064 out:
4065         igt_spinner_end(&spin);
4066         if (igt_flush_test(gt->i915))
4067                 err = -EIO;
4068         igt_spinner_fini(&spin);
4069         return err;
4070 }
4071
4072 static int sliceout_virtual_engine(struct intel_gt *gt,
4073                                    struct intel_engine_cs **siblings,
4074                                    unsigned int nsibling)
4075 {
4076         const long timeout = slice_timeout(siblings[0]);
4077         struct intel_context *ce;
4078         struct i915_request *rq;
4079         struct igt_spinner spin;
4080         unsigned int n;
4081         int err = 0;
4082
4083         /*
4084          * Virtual requests must allow others a fair timeslice.
4085          */
4086
4087         if (igt_spinner_init(&spin, gt))
4088                 return -ENOMEM;
4089
4090         /* XXX We do not handle oversubscription and fairness with normal rq */
4091         for (n = 0; n < nsibling; n++) {
4092                 ce = intel_execlists_create_virtual(siblings, nsibling);
4093                 if (IS_ERR(ce)) {
4094                         err = PTR_ERR(ce);
4095                         goto out;
4096                 }
4097
4098                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4099                 intel_context_put(ce);
4100                 if (IS_ERR(rq)) {
4101                         err = PTR_ERR(rq);
4102                         goto out;
4103                 }
4104
4105                 i915_request_add(rq);
4106         }
4107
4108         for (n = 0; !err && n < nsibling; n++) {
4109                 ce = intel_context_create(siblings[n]);
4110                 if (IS_ERR(ce)) {
4111                         err = PTR_ERR(ce);
4112                         goto out;
4113                 }
4114
4115                 rq = intel_context_create_request(ce);
4116                 intel_context_put(ce);
4117                 if (IS_ERR(rq)) {
4118                         err = PTR_ERR(rq);
4119                         goto out;
4120                 }
4121
4122                 i915_request_get(rq);
4123                 i915_request_add(rq);
4124                 if (i915_request_wait(rq, 0, timeout) < 0) {
4125                         GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4126                                       __func__, siblings[n]->name);
4127                         GEM_TRACE_DUMP();
4128                         intel_gt_set_wedged(gt);
4129                         err = -EIO;
4130                 }
4131                 i915_request_put(rq);
4132         }
4133
4134 out:
4135         igt_spinner_end(&spin);
4136         if (igt_flush_test(gt->i915))
4137                 err = -EIO;
4138         igt_spinner_fini(&spin);
4139         return err;
4140 }
4141
4142 static int live_virtual_slice(void *arg)
4143 {
4144         struct intel_gt *gt = arg;
4145         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4146         unsigned int class;
4147         int err;
4148
4149         if (intel_uc_uses_guc_submission(&gt->uc))
4150                 return 0;
4151
4152         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4153                 unsigned int nsibling;
4154
4155                 nsibling = __select_siblings(gt, class, siblings,
4156                                              intel_engine_has_timeslices);
4157                 if (nsibling < 2)
4158                         continue;
4159
4160                 err = slicein_virtual_engine(gt, siblings, nsibling);
4161                 if (err)
4162                         return err;
4163
4164                 err = sliceout_virtual_engine(gt, siblings, nsibling);
4165                 if (err)
4166                         return err;
4167         }
4168
4169         return 0;
4170 }
4171
4172 static int preserved_virtual_engine(struct intel_gt *gt,
4173                                     struct intel_engine_cs **siblings,
4174                                     unsigned int nsibling)
4175 {
4176         struct i915_request *last = NULL;
4177         struct intel_context *ve;
4178         struct i915_vma *scratch;
4179         struct igt_live_test t;
4180         unsigned int n;
4181         int err = 0;
4182         u32 *cs;
4183
4184         scratch =
4185                 __vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4186                                                     PAGE_SIZE);
4187         if (IS_ERR(scratch))
4188                 return PTR_ERR(scratch);
4189
4190         err = i915_vma_sync(scratch);
4191         if (err)
4192                 goto out_scratch;
4193
4194         ve = intel_execlists_create_virtual(siblings, nsibling);
4195         if (IS_ERR(ve)) {
4196                 err = PTR_ERR(ve);
4197                 goto out_scratch;
4198         }
4199
4200         err = intel_context_pin(ve);
4201         if (err)
4202                 goto out_put;
4203
4204         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4205         if (err)
4206                 goto out_unpin;
4207
4208         for (n = 0; n < NUM_GPR_DW; n++) {
4209                 struct intel_engine_cs *engine = siblings[n % nsibling];
4210                 struct i915_request *rq;
4211
4212                 rq = i915_request_create(ve);
4213                 if (IS_ERR(rq)) {
4214                         err = PTR_ERR(rq);
4215                         goto out_end;
4216                 }
4217
4218                 i915_request_put(last);
4219                 last = i915_request_get(rq);
4220
4221                 cs = intel_ring_begin(rq, 8);
4222                 if (IS_ERR(cs)) {
4223                         i915_request_add(rq);
4224                         err = PTR_ERR(cs);
4225                         goto out_end;
4226                 }
4227
4228                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4229                 *cs++ = CS_GPR(engine, n);
4230                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4231                 *cs++ = 0;
4232
4233                 *cs++ = MI_LOAD_REGISTER_IMM(1);
4234                 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4235                 *cs++ = n + 1;
4236
4237                 *cs++ = MI_NOOP;
4238                 intel_ring_advance(rq, cs);
4239
4240                 /* Restrict this request to run on a particular engine */
4241                 rq->execution_mask = engine->mask;
4242                 i915_request_add(rq);
4243         }
4244
4245         if (i915_request_wait(last, 0, HZ / 5) < 0) {
4246                 err = -ETIME;
4247                 goto out_end;
4248         }
4249
4250         cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4251         if (IS_ERR(cs)) {
4252                 err = PTR_ERR(cs);
4253                 goto out_end;
4254         }
4255
4256         for (n = 0; n < NUM_GPR_DW; n++) {
4257                 if (cs[n] != n) {
4258                         pr_err("Incorrect value[%d] found for GPR[%d]\n",
4259                                cs[n], n);
4260                         err = -EINVAL;
4261                         break;
4262                 }
4263         }
4264
4265         i915_gem_object_unpin_map(scratch->obj);
4266
4267 out_end:
4268         if (igt_live_test_end(&t))
4269                 err = -EIO;
4270         i915_request_put(last);
4271 out_unpin:
4272         intel_context_unpin(ve);
4273 out_put:
4274         intel_context_put(ve);
4275 out_scratch:
4276         i915_vma_unpin_and_release(&scratch, 0);
4277         return err;
4278 }
4279
4280 static int live_virtual_preserved(void *arg)
4281 {
4282         struct intel_gt *gt = arg;
4283         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4284         unsigned int class;
4285
4286         /*
4287          * Check that the context image retains non-privileged (user) registers
4288          * from one engine to the next. For this we check that the CS_GPR
4289          * are preserved.
4290          */
4291
4292         if (intel_uc_uses_guc_submission(&gt->uc))
4293                 return 0;
4294
4295         /* As we use CS_GPR we cannot run before they existed on all engines. */
4296         if (INTEL_GEN(gt->i915) < 9)
4297                 return 0;
4298
4299         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4300                 int nsibling, err;
4301
4302                 nsibling = select_siblings(gt, class, siblings);
4303                 if (nsibling < 2)
4304                         continue;
4305
4306                 err = preserved_virtual_engine(gt, siblings, nsibling);
4307                 if (err)
4308                         return err;
4309         }
4310
4311         return 0;
4312 }
4313
4314 static int bond_virtual_engine(struct intel_gt *gt,
4315                                unsigned int class,
4316                                struct intel_engine_cs **siblings,
4317                                unsigned int nsibling,
4318                                unsigned int flags)
4319 #define BOND_SCHEDULE BIT(0)
4320 {
4321         struct intel_engine_cs *master;
4322         struct i915_request *rq[16];
4323         enum intel_engine_id id;
4324         struct igt_spinner spin;
4325         unsigned long n;
4326         int err;
4327
4328         /*
4329          * A set of bonded requests is intended to be run concurrently
4330          * across a number of engines. We use one request per-engine
4331          * and a magic fence to schedule each of the bonded requests
4332          * at the same time. A consequence of our current scheduler is that
4333          * we only move requests to the HW ready queue when the request
4334          * becomes ready, that is when all of its prerequisite fences have
4335          * been signaled. As one of those fences is the master submit fence,
4336          * there is a delay on all secondary fences as the HW may be
4337          * currently busy. Equally, as all the requests are independent,
4338          * they may have other fences that delay individual request
4339          * submission to HW. Ergo, we do not guarantee that all requests are
4340          * immediately submitted to HW at the same time, just that if the
4341          * rules are abided by, they are ready at the same time as the
4342          * first is submitted. Userspace can embed semaphores in its batch
4343          * to ensure parallel execution of its phases as it requires.
4344          * Though naturally it gets requested that perhaps the scheduler should
4345          * take care of parallel execution, even across preemption events on
4346          * different HW. (The proper answer is of course "lalalala".)
4347          *
4348          * With the submit-fence, we have identified three possible phases
4349          * of synchronisation depending on the master fence: queued (not
4350          * ready), executing, and signaled. The first two are quite simple
4351          * and checked below. However, the signaled master fence handling is
4352          * contentious. Currently we do not distinguish between a signaled
4353          * fence and an expired fence, as once signaled it does not convey
4354          * any information about the previous execution. It may even be freed
4355          * and hence checking later it may not exist at all. Ergo we currently
4356          * do not apply the bonding constraint for an already signaled fence,
4357          * as our expectation is that it should not constrain the secondaries
4358          * and is outside of the scope of the bonded request API (i.e. all
4359          * userspace requests are meant to be running in parallel). As
4360          * it imposes no constraint, and is effectively a no-op, we do not
4361          * check below as normal execution flows are checked extensively above.
4362          *
4363          * XXX Is the degenerate handling of signaled submit fences the
4364          * expected behaviour for userpace?
4365          */
4366
4367         GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4368
4369         if (igt_spinner_init(&spin, gt))
4370                 return -ENOMEM;
4371
4372         err = 0;
4373         rq[0] = ERR_PTR(-ENOMEM);
4374         for_each_engine(master, gt, id) {
4375                 struct i915_sw_fence fence = {};
4376                 struct intel_context *ce;
4377
4378                 if (master->class == class)
4379                         continue;
4380
4381                 ce = intel_context_create(master);
4382                 if (IS_ERR(ce)) {
4383                         err = PTR_ERR(ce);
4384                         goto out;
4385                 }
4386
4387                 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4388
4389                 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4390                 intel_context_put(ce);
4391                 if (IS_ERR(rq[0])) {
4392                         err = PTR_ERR(rq[0]);
4393                         goto out;
4394                 }
4395                 i915_request_get(rq[0]);
4396
4397                 if (flags & BOND_SCHEDULE) {
4398                         onstack_fence_init(&fence);
4399                         err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4400                                                                &fence,
4401                                                                GFP_KERNEL);
4402                 }
4403
4404                 i915_request_add(rq[0]);
4405                 if (err < 0)
4406                         goto out;
4407
4408                 if (!(flags & BOND_SCHEDULE) &&
4409                     !igt_wait_for_spinner(&spin, rq[0])) {
4410                         err = -EIO;
4411                         goto out;
4412                 }
4413
4414                 for (n = 0; n < nsibling; n++) {
4415                         struct intel_context *ve;
4416
4417                         ve = intel_execlists_create_virtual(siblings, nsibling);
4418                         if (IS_ERR(ve)) {
4419                                 err = PTR_ERR(ve);
4420                                 onstack_fence_fini(&fence);
4421                                 goto out;
4422                         }
4423
4424                         err = intel_virtual_engine_attach_bond(ve->engine,
4425                                                                master,
4426                                                                siblings[n]);
4427                         if (err) {
4428                                 intel_context_put(ve);
4429                                 onstack_fence_fini(&fence);
4430                                 goto out;
4431                         }
4432
4433                         err = intel_context_pin(ve);
4434                         intel_context_put(ve);
4435                         if (err) {
4436                                 onstack_fence_fini(&fence);
4437                                 goto out;
4438                         }
4439
4440                         rq[n + 1] = i915_request_create(ve);
4441                         intel_context_unpin(ve);
4442                         if (IS_ERR(rq[n + 1])) {
4443                                 err = PTR_ERR(rq[n + 1]);
4444                                 onstack_fence_fini(&fence);
4445                                 goto out;
4446                         }
4447                         i915_request_get(rq[n + 1]);
4448
4449                         err = i915_request_await_execution(rq[n + 1],
4450                                                            &rq[0]->fence,
4451                                                            ve->engine->bond_execute);
4452                         i915_request_add(rq[n + 1]);
4453                         if (err < 0) {
4454                                 onstack_fence_fini(&fence);
4455                                 goto out;
4456                         }
4457                 }
4458                 onstack_fence_fini(&fence);
4459                 intel_engine_flush_submission(master);
4460                 igt_spinner_end(&spin);
4461
4462                 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4463                         pr_err("Master request did not execute (on %s)!\n",
4464                                rq[0]->engine->name);
4465                         err = -EIO;
4466                         goto out;
4467                 }
4468
4469                 for (n = 0; n < nsibling; n++) {
4470                         if (i915_request_wait(rq[n + 1], 0,
4471                                               MAX_SCHEDULE_TIMEOUT) < 0) {
4472                                 err = -EIO;
4473                                 goto out;
4474                         }
4475
4476                         if (rq[n + 1]->engine != siblings[n]) {
4477                                 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4478                                        siblings[n]->name,
4479                                        rq[n + 1]->engine->name,
4480                                        rq[0]->engine->name);
4481                                 err = -EINVAL;
4482                                 goto out;
4483                         }
4484                 }
4485
4486                 for (n = 0; !IS_ERR(rq[n]); n++)
4487                         i915_request_put(rq[n]);
4488                 rq[0] = ERR_PTR(-ENOMEM);
4489         }
4490
4491 out:
4492         for (n = 0; !IS_ERR(rq[n]); n++)
4493                 i915_request_put(rq[n]);
4494         if (igt_flush_test(gt->i915))
4495                 err = -EIO;
4496
4497         igt_spinner_fini(&spin);
4498         return err;
4499 }
4500
4501 static int live_virtual_bond(void *arg)
4502 {
4503         static const struct phase {
4504                 const char *name;
4505                 unsigned int flags;
4506         } phases[] = {
4507                 { "", 0 },
4508                 { "schedule", BOND_SCHEDULE },
4509                 { },
4510         };
4511         struct intel_gt *gt = arg;
4512         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4513         unsigned int class;
4514         int err;
4515
4516         if (intel_uc_uses_guc_submission(&gt->uc))
4517                 return 0;
4518
4519         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4520                 const struct phase *p;
4521                 int nsibling;
4522
4523                 nsibling = select_siblings(gt, class, siblings);
4524                 if (nsibling < 2)
4525                         continue;
4526
4527                 for (p = phases; p->name; p++) {
4528                         err = bond_virtual_engine(gt,
4529                                                   class, siblings, nsibling,
4530                                                   p->flags);
4531                         if (err) {
4532                                 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4533                                        __func__, p->name, class, nsibling, err);
4534                                 return err;
4535                         }
4536                 }
4537         }
4538
4539         return 0;
4540 }
4541
4542 static int reset_virtual_engine(struct intel_gt *gt,
4543                                 struct intel_engine_cs **siblings,
4544                                 unsigned int nsibling)
4545 {
4546         struct intel_engine_cs *engine;
4547         struct intel_context *ve;
4548         struct igt_spinner spin;
4549         struct i915_request *rq;
4550         unsigned int n;
4551         int err = 0;
4552
4553         /*
4554          * In order to support offline error capture for fast preempt reset,
4555          * we need to decouple the guilty request and ensure that it and its
4556          * descendents are not executed while the capture is in progress.
4557          */
4558
4559         if (igt_spinner_init(&spin, gt))
4560                 return -ENOMEM;
4561
4562         ve = intel_execlists_create_virtual(siblings, nsibling);
4563         if (IS_ERR(ve)) {
4564                 err = PTR_ERR(ve);
4565                 goto out_spin;
4566         }
4567
4568         for (n = 0; n < nsibling; n++)
4569                 st_engine_heartbeat_disable(siblings[n]);
4570
4571         rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4572         if (IS_ERR(rq)) {
4573                 err = PTR_ERR(rq);
4574                 goto out_heartbeat;
4575         }
4576         i915_request_add(rq);
4577
4578         if (!igt_wait_for_spinner(&spin, rq)) {
4579                 intel_gt_set_wedged(gt);
4580                 err = -ETIME;
4581                 goto out_heartbeat;
4582         }
4583
4584         engine = rq->engine;
4585         GEM_BUG_ON(engine == ve->engine);
4586
4587         /* Take ownership of the reset and tasklet */
4588         local_bh_disable();
4589         if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4590                              &gt->reset.flags)) {
4591                 local_bh_enable();
4592                 intel_gt_set_wedged(gt);
4593                 err = -EBUSY;
4594                 goto out_heartbeat;
4595         }
4596         tasklet_disable(&engine->execlists.tasklet);
4597
4598         engine->execlists.tasklet.callback(&engine->execlists.tasklet);
4599         GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4600
4601         /* Fake a preemption event; failed of course */
4602         spin_lock_irq(&engine->active.lock);
4603         __unwind_incomplete_requests(engine);
4604         spin_unlock_irq(&engine->active.lock);
4605         GEM_BUG_ON(rq->engine != engine);
4606
4607         /* Reset the engine while keeping our active request on hold */
4608         execlists_hold(engine, rq);
4609         GEM_BUG_ON(!i915_request_on_hold(rq));
4610
4611         __intel_engine_reset_bh(engine, NULL);
4612         GEM_BUG_ON(rq->fence.error != -EIO);
4613
4614         /* Release our grasp on the engine, letting CS flow again */
4615         tasklet_enable(&engine->execlists.tasklet);
4616         clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
4617         local_bh_enable();
4618
4619         /* Check that we do not resubmit the held request */
4620         i915_request_get(rq);
4621         if (!i915_request_wait(rq, 0, HZ / 5)) {
4622                 pr_err("%s: on hold request completed!\n",
4623                        engine->name);
4624                 intel_gt_set_wedged(gt);
4625                 err = -EIO;
4626                 goto out_rq;
4627         }
4628         GEM_BUG_ON(!i915_request_on_hold(rq));
4629
4630         /* But is resubmitted on release */
4631         execlists_unhold(engine, rq);
4632         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4633                 pr_err("%s: held request did not complete!\n",
4634                        engine->name);
4635                 intel_gt_set_wedged(gt);
4636                 err = -ETIME;
4637         }
4638
4639 out_rq:
4640         i915_request_put(rq);
4641 out_heartbeat:
4642         for (n = 0; n < nsibling; n++)
4643                 st_engine_heartbeat_enable(siblings[n]);
4644
4645         intel_context_put(ve);
4646 out_spin:
4647         igt_spinner_fini(&spin);
4648         return err;
4649 }
4650
4651 static int live_virtual_reset(void *arg)
4652 {
4653         struct intel_gt *gt = arg;
4654         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4655         unsigned int class;
4656
4657         /*
4658          * Check that we handle a reset event within a virtual engine.
4659          * Only the physical engine is reset, but we have to check the flow
4660          * of the virtual requests around the reset, and make sure it is not
4661          * forgotten.
4662          */
4663
4664         if (intel_uc_uses_guc_submission(&gt->uc))
4665                 return 0;
4666
4667         if (!intel_has_reset_engine(gt))
4668                 return 0;
4669
4670         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4671                 int nsibling, err;
4672
4673                 nsibling = select_siblings(gt, class, siblings);
4674                 if (nsibling < 2)
4675                         continue;
4676
4677                 err = reset_virtual_engine(gt, siblings, nsibling);
4678                 if (err)
4679                         return err;
4680         }
4681
4682         return 0;
4683 }
4684
4685 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4686 {
4687         static const struct i915_subtest tests[] = {
4688                 SUBTEST(live_sanitycheck),
4689                 SUBTEST(live_unlite_switch),
4690                 SUBTEST(live_unlite_preempt),
4691                 SUBTEST(live_unlite_ring),
4692                 SUBTEST(live_pin_rewind),
4693                 SUBTEST(live_hold_reset),
4694                 SUBTEST(live_error_interrupt),
4695                 SUBTEST(live_timeslice_preempt),
4696                 SUBTEST(live_timeslice_rewind),
4697                 SUBTEST(live_timeslice_queue),
4698                 SUBTEST(live_timeslice_nopreempt),
4699                 SUBTEST(live_busywait_preempt),
4700                 SUBTEST(live_preempt),
4701                 SUBTEST(live_late_preempt),
4702                 SUBTEST(live_nopreempt),
4703                 SUBTEST(live_preempt_cancel),
4704                 SUBTEST(live_suppress_self_preempt),
4705                 SUBTEST(live_chain_preempt),
4706                 SUBTEST(live_preempt_ring),
4707                 SUBTEST(live_preempt_gang),
4708                 SUBTEST(live_preempt_timeout),
4709                 SUBTEST(live_preempt_user),
4710                 SUBTEST(live_preempt_smoke),
4711                 SUBTEST(live_virtual_engine),
4712                 SUBTEST(live_virtual_mask),
4713                 SUBTEST(live_virtual_preserved),
4714                 SUBTEST(live_virtual_slice),
4715                 SUBTEST(live_virtual_bond),
4716                 SUBTEST(live_virtual_reset),
4717         };
4718
4719         if (!HAS_EXECLISTS(i915))
4720                 return 0;
4721
4722         if (intel_gt_is_wedged(&i915->gt))
4723                 return 0;
4724
4725         return intel_gt_live_subtests(tests, &i915->gt);
4726 }