drm/i915/gt: Prevent timeslicing into unpreemptable requests
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / selftest_lrc.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6
7 #include <linux/prime_numbers.h>
8
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR 16
25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
26
27 static struct i915_vma *create_scratch(struct intel_gt *gt)
28 {
29         struct drm_i915_gem_object *obj;
30         struct i915_vma *vma;
31         int err;
32
33         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
34         if (IS_ERR(obj))
35                 return ERR_CAST(obj);
36
37         i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
38
39         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
40         if (IS_ERR(vma)) {
41                 i915_gem_object_put(obj);
42                 return vma;
43         }
44
45         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
46         if (err) {
47                 i915_gem_object_put(obj);
48                 return ERR_PTR(err);
49         }
50
51         return vma;
52 }
53
54 static void engine_heartbeat_disable(struct intel_engine_cs *engine)
55 {
56         engine->props.heartbeat_interval_ms = 0;
57
58         intel_engine_pm_get(engine);
59         intel_engine_park_heartbeat(engine);
60 }
61
62 static void engine_heartbeat_enable(struct intel_engine_cs *engine)
63 {
64         intel_engine_pm_put(engine);
65
66         engine->props.heartbeat_interval_ms =
67                 engine->defaults.heartbeat_interval_ms;
68 }
69
70 static bool is_active(struct i915_request *rq)
71 {
72         if (i915_request_is_active(rq))
73                 return true;
74
75         if (i915_request_on_hold(rq))
76                 return true;
77
78         if (i915_request_started(rq))
79                 return true;
80
81         return false;
82 }
83
84 static int wait_for_submit(struct intel_engine_cs *engine,
85                            struct i915_request *rq,
86                            unsigned long timeout)
87 {
88         timeout += jiffies;
89         do {
90                 bool done = time_after(jiffies, timeout);
91
92                 if (i915_request_completed(rq)) /* that was quick! */
93                         return 0;
94
95                 /* Wait until the HW has acknowleged the submission (or err) */
96                 intel_engine_flush_submission(engine);
97                 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
98                         return 0;
99
100                 if (done)
101                         return -ETIME;
102
103                 cond_resched();
104         } while (1);
105 }
106
107 static int wait_for_reset(struct intel_engine_cs *engine,
108                           struct i915_request *rq,
109                           unsigned long timeout)
110 {
111         timeout += jiffies;
112
113         do {
114                 cond_resched();
115                 intel_engine_flush_submission(engine);
116
117                 if (READ_ONCE(engine->execlists.pending[0]))
118                         continue;
119
120                 if (i915_request_completed(rq))
121                         break;
122
123                 if (READ_ONCE(rq->fence.error))
124                         break;
125         } while (time_before(jiffies, timeout));
126
127         flush_scheduled_work();
128
129         if (rq->fence.error != -EIO) {
130                 pr_err("%s: hanging request %llx:%lld not reset\n",
131                        engine->name,
132                        rq->fence.context,
133                        rq->fence.seqno);
134                 return -EINVAL;
135         }
136
137         /* Give the request a jiffie to complete after flushing the worker */
138         if (i915_request_wait(rq, 0,
139                               max(0l, (long)(timeout - jiffies)) + 1) < 0) {
140                 pr_err("%s: hanging request %llx:%lld did not complete\n",
141                        engine->name,
142                        rq->fence.context,
143                        rq->fence.seqno);
144                 return -ETIME;
145         }
146
147         return 0;
148 }
149
150 static int live_sanitycheck(void *arg)
151 {
152         struct intel_gt *gt = arg;
153         struct intel_engine_cs *engine;
154         enum intel_engine_id id;
155         struct igt_spinner spin;
156         int err = 0;
157
158         if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
159                 return 0;
160
161         if (igt_spinner_init(&spin, gt))
162                 return -ENOMEM;
163
164         for_each_engine(engine, gt, id) {
165                 struct intel_context *ce;
166                 struct i915_request *rq;
167
168                 ce = intel_context_create(engine);
169                 if (IS_ERR(ce)) {
170                         err = PTR_ERR(ce);
171                         break;
172                 }
173
174                 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
175                 if (IS_ERR(rq)) {
176                         err = PTR_ERR(rq);
177                         goto out_ctx;
178                 }
179
180                 i915_request_add(rq);
181                 if (!igt_wait_for_spinner(&spin, rq)) {
182                         GEM_TRACE("spinner failed to start\n");
183                         GEM_TRACE_DUMP();
184                         intel_gt_set_wedged(gt);
185                         err = -EIO;
186                         goto out_ctx;
187                 }
188
189                 igt_spinner_end(&spin);
190                 if (igt_flush_test(gt->i915)) {
191                         err = -EIO;
192                         goto out_ctx;
193                 }
194
195 out_ctx:
196                 intel_context_put(ce);
197                 if (err)
198                         break;
199         }
200
201         igt_spinner_fini(&spin);
202         return err;
203 }
204
205 static int live_unlite_restore(struct intel_gt *gt, int prio)
206 {
207         struct intel_engine_cs *engine;
208         enum intel_engine_id id;
209         struct igt_spinner spin;
210         int err = -ENOMEM;
211
212         /*
213          * Check that we can correctly context switch between 2 instances
214          * on the same engine from the same parent context.
215          */
216
217         if (igt_spinner_init(&spin, gt))
218                 return err;
219
220         err = 0;
221         for_each_engine(engine, gt, id) {
222                 struct intel_context *ce[2] = {};
223                 struct i915_request *rq[2];
224                 struct igt_live_test t;
225                 int n;
226
227                 if (prio && !intel_engine_has_preemption(engine))
228                         continue;
229
230                 if (!intel_engine_can_store_dword(engine))
231                         continue;
232
233                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
234                         err = -EIO;
235                         break;
236                 }
237                 engine_heartbeat_disable(engine);
238
239                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
240                         struct intel_context *tmp;
241
242                         tmp = intel_context_create(engine);
243                         if (IS_ERR(tmp)) {
244                                 err = PTR_ERR(tmp);
245                                 goto err_ce;
246                         }
247
248                         err = intel_context_pin(tmp);
249                         if (err) {
250                                 intel_context_put(tmp);
251                                 goto err_ce;
252                         }
253
254                         /*
255                          * Setup the pair of contexts such that if we
256                          * lite-restore using the RING_TAIL from ce[1] it
257                          * will execute garbage from ce[0]->ring.
258                          */
259                         memset(tmp->ring->vaddr,
260                                POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
261                                tmp->ring->vma->size);
262
263                         ce[n] = tmp;
264                 }
265                 GEM_BUG_ON(!ce[1]->ring->size);
266                 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
267                 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
268
269                 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
270                 if (IS_ERR(rq[0])) {
271                         err = PTR_ERR(rq[0]);
272                         goto err_ce;
273                 }
274
275                 i915_request_get(rq[0]);
276                 i915_request_add(rq[0]);
277                 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
278
279                 if (!igt_wait_for_spinner(&spin, rq[0])) {
280                         i915_request_put(rq[0]);
281                         goto err_ce;
282                 }
283
284                 rq[1] = i915_request_create(ce[1]);
285                 if (IS_ERR(rq[1])) {
286                         err = PTR_ERR(rq[1]);
287                         i915_request_put(rq[0]);
288                         goto err_ce;
289                 }
290
291                 if (!prio) {
292                         /*
293                          * Ensure we do the switch to ce[1] on completion.
294                          *
295                          * rq[0] is already submitted, so this should reduce
296                          * to a no-op (a wait on a request on the same engine
297                          * uses the submit fence, not the completion fence),
298                          * but it will install a dependency on rq[1] for rq[0]
299                          * that will prevent the pair being reordered by
300                          * timeslicing.
301                          */
302                         i915_request_await_dma_fence(rq[1], &rq[0]->fence);
303                 }
304
305                 i915_request_get(rq[1]);
306                 i915_request_add(rq[1]);
307                 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
308                 i915_request_put(rq[0]);
309
310                 if (prio) {
311                         struct i915_sched_attr attr = {
312                                 .priority = prio,
313                         };
314
315                         /* Alternatively preempt the spinner with ce[1] */
316                         engine->schedule(rq[1], &attr);
317                 }
318
319                 /* And switch back to ce[0] for good measure */
320                 rq[0] = i915_request_create(ce[0]);
321                 if (IS_ERR(rq[0])) {
322                         err = PTR_ERR(rq[0]);
323                         i915_request_put(rq[1]);
324                         goto err_ce;
325                 }
326
327                 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
328                 i915_request_get(rq[0]);
329                 i915_request_add(rq[0]);
330                 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
331                 i915_request_put(rq[1]);
332                 i915_request_put(rq[0]);
333
334 err_ce:
335                 tasklet_kill(&engine->execlists.tasklet); /* flush submission */
336                 igt_spinner_end(&spin);
337                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
338                         if (IS_ERR_OR_NULL(ce[n]))
339                                 break;
340
341                         intel_context_unpin(ce[n]);
342                         intel_context_put(ce[n]);
343                 }
344
345                 engine_heartbeat_enable(engine);
346                 if (igt_live_test_end(&t))
347                         err = -EIO;
348                 if (err)
349                         break;
350         }
351
352         igt_spinner_fini(&spin);
353         return err;
354 }
355
356 static int live_unlite_switch(void *arg)
357 {
358         return live_unlite_restore(arg, 0);
359 }
360
361 static int live_unlite_preempt(void *arg)
362 {
363         return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
364 }
365
366 static int live_pin_rewind(void *arg)
367 {
368         struct intel_gt *gt = arg;
369         struct intel_engine_cs *engine;
370         enum intel_engine_id id;
371         int err = 0;
372
373         /*
374          * We have to be careful not to trust intel_ring too much, for example
375          * ring->head is updated upon retire which is out of sync with pinning
376          * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
377          * or else we risk writing an older, stale value.
378          *
379          * To simulate this, let's apply a bit of deliberate sabotague.
380          */
381
382         for_each_engine(engine, gt, id) {
383                 struct intel_context *ce;
384                 struct i915_request *rq;
385                 struct intel_ring *ring;
386                 struct igt_live_test t;
387
388                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
389                         err = -EIO;
390                         break;
391                 }
392
393                 ce = intel_context_create(engine);
394                 if (IS_ERR(ce)) {
395                         err = PTR_ERR(ce);
396                         break;
397                 }
398
399                 err = intel_context_pin(ce);
400                 if (err) {
401                         intel_context_put(ce);
402                         break;
403                 }
404
405                 /* Keep the context awake while we play games */
406                 err = i915_active_acquire(&ce->active);
407                 if (err) {
408                         intel_context_unpin(ce);
409                         intel_context_put(ce);
410                         break;
411                 }
412                 ring = ce->ring;
413
414                 /* Poison the ring, and offset the next request from HEAD */
415                 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
416                 ring->emit = ring->size / 2;
417                 ring->tail = ring->emit;
418                 GEM_BUG_ON(ring->head);
419
420                 intel_context_unpin(ce);
421
422                 /* Submit a simple nop request */
423                 GEM_BUG_ON(intel_context_is_pinned(ce));
424                 rq = intel_context_create_request(ce);
425                 i915_active_release(&ce->active); /* e.g. async retire */
426                 intel_context_put(ce);
427                 if (IS_ERR(rq)) {
428                         err = PTR_ERR(rq);
429                         break;
430                 }
431                 GEM_BUG_ON(!rq->head);
432                 i915_request_add(rq);
433
434                 /* Expect not to hang! */
435                 if (igt_live_test_end(&t)) {
436                         err = -EIO;
437                         break;
438                 }
439         }
440
441         return err;
442 }
443
444 static int live_hold_reset(void *arg)
445 {
446         struct intel_gt *gt = arg;
447         struct intel_engine_cs *engine;
448         enum intel_engine_id id;
449         struct igt_spinner spin;
450         int err = 0;
451
452         /*
453          * In order to support offline error capture for fast preempt reset,
454          * we need to decouple the guilty request and ensure that it and its
455          * descendents are not executed while the capture is in progress.
456          */
457
458         if (!intel_has_reset_engine(gt))
459                 return 0;
460
461         if (igt_spinner_init(&spin, gt))
462                 return -ENOMEM;
463
464         for_each_engine(engine, gt, id) {
465                 struct intel_context *ce;
466                 struct i915_request *rq;
467
468                 ce = intel_context_create(engine);
469                 if (IS_ERR(ce)) {
470                         err = PTR_ERR(ce);
471                         break;
472                 }
473
474                 engine_heartbeat_disable(engine);
475
476                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
477                 if (IS_ERR(rq)) {
478                         err = PTR_ERR(rq);
479                         goto out;
480                 }
481                 i915_request_add(rq);
482
483                 if (!igt_wait_for_spinner(&spin, rq)) {
484                         intel_gt_set_wedged(gt);
485                         err = -ETIME;
486                         goto out;
487                 }
488
489                 /* We have our request executing, now remove it and reset */
490
491                 if (test_and_set_bit(I915_RESET_ENGINE + id,
492                                      &gt->reset.flags)) {
493                         intel_gt_set_wedged(gt);
494                         err = -EBUSY;
495                         goto out;
496                 }
497                 tasklet_disable(&engine->execlists.tasklet);
498
499                 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
500                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
501
502                 i915_request_get(rq);
503                 execlists_hold(engine, rq);
504                 GEM_BUG_ON(!i915_request_on_hold(rq));
505
506                 intel_engine_reset(engine, NULL);
507                 GEM_BUG_ON(rq->fence.error != -EIO);
508
509                 tasklet_enable(&engine->execlists.tasklet);
510                 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
511                                       &gt->reset.flags);
512
513                 /* Check that we do not resubmit the held request */
514                 if (!i915_request_wait(rq, 0, HZ / 5)) {
515                         pr_err("%s: on hold request completed!\n",
516                                engine->name);
517                         i915_request_put(rq);
518                         err = -EIO;
519                         goto out;
520                 }
521                 GEM_BUG_ON(!i915_request_on_hold(rq));
522
523                 /* But is resubmitted on release */
524                 execlists_unhold(engine, rq);
525                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
526                         pr_err("%s: held request did not complete!\n",
527                                engine->name);
528                         intel_gt_set_wedged(gt);
529                         err = -ETIME;
530                 }
531                 i915_request_put(rq);
532
533 out:
534                 engine_heartbeat_enable(engine);
535                 intel_context_put(ce);
536                 if (err)
537                         break;
538         }
539
540         igt_spinner_fini(&spin);
541         return err;
542 }
543
544 static const char *error_repr(int err)
545 {
546         return err ? "bad" : "good";
547 }
548
549 static int live_error_interrupt(void *arg)
550 {
551         static const struct error_phase {
552                 enum { GOOD = 0, BAD = -EIO } error[2];
553         } phases[] = {
554                 { { BAD,  GOOD } },
555                 { { BAD,  BAD  } },
556                 { { BAD,  GOOD } },
557                 { { GOOD, GOOD } }, /* sentinel */
558         };
559         struct intel_gt *gt = arg;
560         struct intel_engine_cs *engine;
561         enum intel_engine_id id;
562
563         /*
564          * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
565          * of invalid commands in user batches that will cause a GPU hang.
566          * This is a faster mechanism than using hangcheck/heartbeats, but
567          * only detects problems the HW knows about -- it will not warn when
568          * we kill the HW!
569          *
570          * To verify our detection and reset, we throw some invalid commands
571          * at the HW and wait for the interrupt.
572          */
573
574         if (!intel_has_reset_engine(gt))
575                 return 0;
576
577         for_each_engine(engine, gt, id) {
578                 const struct error_phase *p;
579                 int err = 0;
580
581                 engine_heartbeat_disable(engine);
582
583                 for (p = phases; p->error[0] != GOOD; p++) {
584                         struct i915_request *client[ARRAY_SIZE(phases->error)];
585                         u32 *cs;
586                         int i;
587
588                         memset(client, 0, sizeof(*client));
589                         for (i = 0; i < ARRAY_SIZE(client); i++) {
590                                 struct intel_context *ce;
591                                 struct i915_request *rq;
592
593                                 ce = intel_context_create(engine);
594                                 if (IS_ERR(ce)) {
595                                         err = PTR_ERR(ce);
596                                         goto out;
597                                 }
598
599                                 rq = intel_context_create_request(ce);
600                                 intel_context_put(ce);
601                                 if (IS_ERR(rq)) {
602                                         err = PTR_ERR(rq);
603                                         goto out;
604                                 }
605
606                                 if (rq->engine->emit_init_breadcrumb) {
607                                         err = rq->engine->emit_init_breadcrumb(rq);
608                                         if (err) {
609                                                 i915_request_add(rq);
610                                                 goto out;
611                                         }
612                                 }
613
614                                 cs = intel_ring_begin(rq, 2);
615                                 if (IS_ERR(cs)) {
616                                         i915_request_add(rq);
617                                         err = PTR_ERR(cs);
618                                         goto out;
619                                 }
620
621                                 if (p->error[i]) {
622                                         *cs++ = 0xdeadbeef;
623                                         *cs++ = 0xdeadbeef;
624                                 } else {
625                                         *cs++ = MI_NOOP;
626                                         *cs++ = MI_NOOP;
627                                 }
628
629                                 client[i] = i915_request_get(rq);
630                                 i915_request_add(rq);
631                         }
632
633                         err = wait_for_submit(engine, client[0], HZ / 2);
634                         if (err) {
635                                 pr_err("%s: first request did not start within time!\n",
636                                        engine->name);
637                                 err = -ETIME;
638                                 goto out;
639                         }
640
641                         for (i = 0; i < ARRAY_SIZE(client); i++) {
642                                 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
643                                         pr_debug("%s: %s request incomplete!\n",
644                                                  engine->name,
645                                                  error_repr(p->error[i]));
646
647                                 if (!i915_request_started(client[i])) {
648                                         pr_err("%s: %s request not started!\n",
649                                                engine->name,
650                                                error_repr(p->error[i]));
651                                         err = -ETIME;
652                                         goto out;
653                                 }
654
655                                 /* Kick the tasklet to process the error */
656                                 intel_engine_flush_submission(engine);
657                                 if (client[i]->fence.error != p->error[i]) {
658                                         pr_err("%s: %s request (%s) with wrong error code: %d\n",
659                                                engine->name,
660                                                error_repr(p->error[i]),
661                                                i915_request_completed(client[i]) ? "completed" : "running",
662                                                client[i]->fence.error);
663                                         err = -EINVAL;
664                                         goto out;
665                                 }
666                         }
667
668 out:
669                         for (i = 0; i < ARRAY_SIZE(client); i++)
670                                 if (client[i])
671                                         i915_request_put(client[i]);
672                         if (err) {
673                                 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
674                                        engine->name, p - phases,
675                                        p->error[0], p->error[1]);
676                                 break;
677                         }
678                 }
679
680                 engine_heartbeat_enable(engine);
681                 if (err) {
682                         intel_gt_set_wedged(gt);
683                         return err;
684                 }
685         }
686
687         return 0;
688 }
689
690 static int
691 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
692 {
693         u32 *cs;
694
695         cs = intel_ring_begin(rq, 10);
696         if (IS_ERR(cs))
697                 return PTR_ERR(cs);
698
699         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
700
701         *cs++ = MI_SEMAPHORE_WAIT |
702                 MI_SEMAPHORE_GLOBAL_GTT |
703                 MI_SEMAPHORE_POLL |
704                 MI_SEMAPHORE_SAD_NEQ_SDD;
705         *cs++ = 0;
706         *cs++ = i915_ggtt_offset(vma) + 4 * idx;
707         *cs++ = 0;
708
709         if (idx > 0) {
710                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
711                 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
712                 *cs++ = 0;
713                 *cs++ = 1;
714         } else {
715                 *cs++ = MI_NOOP;
716                 *cs++ = MI_NOOP;
717                 *cs++ = MI_NOOP;
718                 *cs++ = MI_NOOP;
719         }
720
721         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
722
723         intel_ring_advance(rq, cs);
724         return 0;
725 }
726
727 static struct i915_request *
728 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
729 {
730         struct intel_context *ce;
731         struct i915_request *rq;
732         int err;
733
734         ce = intel_context_create(engine);
735         if (IS_ERR(ce))
736                 return ERR_CAST(ce);
737
738         rq = intel_context_create_request(ce);
739         if (IS_ERR(rq))
740                 goto out_ce;
741
742         err = 0;
743         if (rq->engine->emit_init_breadcrumb)
744                 err = rq->engine->emit_init_breadcrumb(rq);
745         if (err == 0)
746                 err = emit_semaphore_chain(rq, vma, idx);
747         if (err == 0)
748                 i915_request_get(rq);
749         i915_request_add(rq);
750         if (err)
751                 rq = ERR_PTR(err);
752
753 out_ce:
754         intel_context_put(ce);
755         return rq;
756 }
757
758 static int
759 release_queue(struct intel_engine_cs *engine,
760               struct i915_vma *vma,
761               int idx, int prio)
762 {
763         struct i915_sched_attr attr = {
764                 .priority = prio,
765         };
766         struct i915_request *rq;
767         u32 *cs;
768
769         rq = intel_engine_create_kernel_request(engine);
770         if (IS_ERR(rq))
771                 return PTR_ERR(rq);
772
773         cs = intel_ring_begin(rq, 4);
774         if (IS_ERR(cs)) {
775                 i915_request_add(rq);
776                 return PTR_ERR(cs);
777         }
778
779         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
780         *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
781         *cs++ = 0;
782         *cs++ = 1;
783
784         intel_ring_advance(rq, cs);
785
786         i915_request_get(rq);
787         i915_request_add(rq);
788
789         local_bh_disable();
790         engine->schedule(rq, &attr);
791         local_bh_enable(); /* kick tasklet */
792
793         i915_request_put(rq);
794
795         return 0;
796 }
797
798 static int
799 slice_semaphore_queue(struct intel_engine_cs *outer,
800                       struct i915_vma *vma,
801                       int count)
802 {
803         struct intel_engine_cs *engine;
804         struct i915_request *head;
805         enum intel_engine_id id;
806         int err, i, n = 0;
807
808         head = semaphore_queue(outer, vma, n++);
809         if (IS_ERR(head))
810                 return PTR_ERR(head);
811
812         for_each_engine(engine, outer->gt, id) {
813                 for (i = 0; i < count; i++) {
814                         struct i915_request *rq;
815
816                         rq = semaphore_queue(engine, vma, n++);
817                         if (IS_ERR(rq)) {
818                                 err = PTR_ERR(rq);
819                                 goto out;
820                         }
821
822                         i915_request_put(rq);
823                 }
824         }
825
826         err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
827         if (err)
828                 goto out;
829
830         if (i915_request_wait(head, 0,
831                               2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
832                 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
833                        count, n);
834                 GEM_TRACE_DUMP();
835                 intel_gt_set_wedged(outer->gt);
836                 err = -EIO;
837         }
838
839 out:
840         i915_request_put(head);
841         return err;
842 }
843
844 static int live_timeslice_preempt(void *arg)
845 {
846         struct intel_gt *gt = arg;
847         struct drm_i915_gem_object *obj;
848         struct i915_vma *vma;
849         void *vaddr;
850         int err = 0;
851         int count;
852
853         /*
854          * If a request takes too long, we would like to give other users
855          * a fair go on the GPU. In particular, users may create batches
856          * that wait upon external input, where that input may even be
857          * supplied by another GPU job. To avoid blocking forever, we
858          * need to preempt the current task and replace it with another
859          * ready task.
860          */
861         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
862                 return 0;
863
864         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
865         if (IS_ERR(obj))
866                 return PTR_ERR(obj);
867
868         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
869         if (IS_ERR(vma)) {
870                 err = PTR_ERR(vma);
871                 goto err_obj;
872         }
873
874         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
875         if (IS_ERR(vaddr)) {
876                 err = PTR_ERR(vaddr);
877                 goto err_obj;
878         }
879
880         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
881         if (err)
882                 goto err_map;
883
884         err = i915_vma_sync(vma);
885         if (err)
886                 goto err_pin;
887
888         for_each_prime_number_from(count, 1, 16) {
889                 struct intel_engine_cs *engine;
890                 enum intel_engine_id id;
891
892                 for_each_engine(engine, gt, id) {
893                         if (!intel_engine_has_preemption(engine))
894                                 continue;
895
896                         memset(vaddr, 0, PAGE_SIZE);
897
898                         engine_heartbeat_disable(engine);
899                         err = slice_semaphore_queue(engine, vma, count);
900                         engine_heartbeat_enable(engine);
901                         if (err)
902                                 goto err_pin;
903
904                         if (igt_flush_test(gt->i915)) {
905                                 err = -EIO;
906                                 goto err_pin;
907                         }
908                 }
909         }
910
911 err_pin:
912         i915_vma_unpin(vma);
913 err_map:
914         i915_gem_object_unpin_map(obj);
915 err_obj:
916         i915_gem_object_put(obj);
917         return err;
918 }
919
920 static struct i915_request *
921 create_rewinder(struct intel_context *ce,
922                 struct i915_request *wait,
923                 void *slot, int idx)
924 {
925         const u32 offset =
926                 i915_ggtt_offset(ce->engine->status_page.vma) +
927                 offset_in_page(slot);
928         struct i915_request *rq;
929         u32 *cs;
930         int err;
931
932         rq = intel_context_create_request(ce);
933         if (IS_ERR(rq))
934                 return rq;
935
936         if (wait) {
937                 err = i915_request_await_dma_fence(rq, &wait->fence);
938                 if (err)
939                         goto err;
940         }
941
942         cs = intel_ring_begin(rq, 14);
943         if (IS_ERR(cs)) {
944                 err = PTR_ERR(cs);
945                 goto err;
946         }
947
948         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
949         *cs++ = MI_NOOP;
950
951         *cs++ = MI_SEMAPHORE_WAIT |
952                 MI_SEMAPHORE_GLOBAL_GTT |
953                 MI_SEMAPHORE_POLL |
954                 MI_SEMAPHORE_SAD_GTE_SDD;
955         *cs++ = idx;
956         *cs++ = offset;
957         *cs++ = 0;
958
959         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
960         *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
961         *cs++ = offset + idx * sizeof(u32);
962         *cs++ = 0;
963
964         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
965         *cs++ = offset;
966         *cs++ = 0;
967         *cs++ = idx + 1;
968
969         intel_ring_advance(rq, cs);
970
971         rq->sched.attr.priority = I915_PRIORITY_MASK;
972         err = 0;
973 err:
974         i915_request_get(rq);
975         i915_request_add(rq);
976         if (err) {
977                 i915_request_put(rq);
978                 return ERR_PTR(err);
979         }
980
981         return rq;
982 }
983
984 static int live_timeslice_rewind(void *arg)
985 {
986         struct intel_gt *gt = arg;
987         struct intel_engine_cs *engine;
988         enum intel_engine_id id;
989
990         /*
991          * The usual presumption on timeslice expiration is that we replace
992          * the active context with another. However, given a chain of
993          * dependencies we may end up with replacing the context with itself,
994          * but only a few of those requests, forcing us to rewind the
995          * RING_TAIL of the original request.
996          */
997         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
998                 return 0;
999
1000         for_each_engine(engine, gt, id) {
1001                 enum { A1, A2, B1 };
1002                 enum { X = 1, Z, Y };
1003                 struct i915_request *rq[3] = {};
1004                 struct intel_context *ce;
1005                 unsigned long timeslice;
1006                 int i, err = 0;
1007                 u32 *slot;
1008
1009                 if (!intel_engine_has_timeslices(engine))
1010                         continue;
1011
1012                 /*
1013                  * A:rq1 -- semaphore wait, timestamp X
1014                  * A:rq2 -- write timestamp Y
1015                  *
1016                  * B:rq1 [await A:rq1] -- write timestamp Z
1017                  *
1018                  * Force timeslice, release semaphore.
1019                  *
1020                  * Expect execution/evaluation order XZY
1021                  */
1022
1023                 engine_heartbeat_disable(engine);
1024                 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1025
1026                 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1027
1028                 ce = intel_context_create(engine);
1029                 if (IS_ERR(ce)) {
1030                         err = PTR_ERR(ce);
1031                         goto err;
1032                 }
1033
1034                 rq[0] = create_rewinder(ce, NULL, slot, X);
1035                 if (IS_ERR(rq[0])) {
1036                         intel_context_put(ce);
1037                         goto err;
1038                 }
1039
1040                 rq[1] = create_rewinder(ce, NULL, slot, Y);
1041                 intel_context_put(ce);
1042                 if (IS_ERR(rq[1]))
1043                         goto err;
1044
1045                 err = wait_for_submit(engine, rq[1], HZ / 2);
1046                 if (err) {
1047                         pr_err("%s: failed to submit first context\n",
1048                                engine->name);
1049                         goto err;
1050                 }
1051
1052                 ce = intel_context_create(engine);
1053                 if (IS_ERR(ce)) {
1054                         err = PTR_ERR(ce);
1055                         goto err;
1056                 }
1057
1058                 rq[2] = create_rewinder(ce, rq[0], slot, Z);
1059                 intel_context_put(ce);
1060                 if (IS_ERR(rq[2]))
1061                         goto err;
1062
1063                 err = wait_for_submit(engine, rq[2], HZ / 2);
1064                 if (err) {
1065                         pr_err("%s: failed to submit second context\n",
1066                                engine->name);
1067                         goto err;
1068                 }
1069
1070                 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1071                 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
1072                         /* Wait for the timeslice to kick in */
1073                         del_timer(&engine->execlists.timer);
1074                         tasklet_hi_schedule(&engine->execlists.tasklet);
1075                         intel_engine_flush_submission(engine);
1076                 }
1077                 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1078                 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1079                 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1080                 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1081
1082                 /* Release the hounds! */
1083                 slot[0] = 1;
1084                 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1085
1086                 for (i = 1; i <= 3; i++) {
1087                         unsigned long timeout = jiffies + HZ / 2;
1088
1089                         while (!READ_ONCE(slot[i]) &&
1090                                time_before(jiffies, timeout))
1091                                 ;
1092
1093                         if (!time_before(jiffies, timeout)) {
1094                                 pr_err("%s: rq[%d] timed out\n",
1095                                        engine->name, i - 1);
1096                                 err = -ETIME;
1097                                 goto err;
1098                         }
1099
1100                         pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1101                 }
1102
1103                 /* XZY: XZ < XY */
1104                 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1105                         pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1106                                engine->name,
1107                                slot[Z] - slot[X],
1108                                slot[Y] - slot[X]);
1109                         err = -EINVAL;
1110                 }
1111
1112 err:
1113                 memset32(&slot[0], -1, 4);
1114                 wmb();
1115
1116                 engine->props.timeslice_duration_ms = timeslice;
1117                 engine_heartbeat_enable(engine);
1118                 for (i = 0; i < 3; i++)
1119                         i915_request_put(rq[i]);
1120                 if (igt_flush_test(gt->i915))
1121                         err = -EIO;
1122                 if (err)
1123                         return err;
1124         }
1125
1126         return 0;
1127 }
1128
1129 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1130 {
1131         struct i915_request *rq;
1132
1133         rq = intel_engine_create_kernel_request(engine);
1134         if (IS_ERR(rq))
1135                 return rq;
1136
1137         i915_request_get(rq);
1138         i915_request_add(rq);
1139
1140         return rq;
1141 }
1142
1143 static long timeslice_threshold(const struct intel_engine_cs *engine)
1144 {
1145         return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
1146 }
1147
1148 static int live_timeslice_queue(void *arg)
1149 {
1150         struct intel_gt *gt = arg;
1151         struct drm_i915_gem_object *obj;
1152         struct intel_engine_cs *engine;
1153         enum intel_engine_id id;
1154         struct i915_vma *vma;
1155         void *vaddr;
1156         int err = 0;
1157
1158         /*
1159          * Make sure that even if ELSP[0] and ELSP[1] are filled with
1160          * timeslicing between them disabled, we *do* enable timeslicing
1161          * if the queue demands it. (Normally, we do not submit if
1162          * ELSP[1] is already occupied, so must rely on timeslicing to
1163          * eject ELSP[0] in favour of the queue.)
1164          */
1165         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1166                 return 0;
1167
1168         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1169         if (IS_ERR(obj))
1170                 return PTR_ERR(obj);
1171
1172         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1173         if (IS_ERR(vma)) {
1174                 err = PTR_ERR(vma);
1175                 goto err_obj;
1176         }
1177
1178         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1179         if (IS_ERR(vaddr)) {
1180                 err = PTR_ERR(vaddr);
1181                 goto err_obj;
1182         }
1183
1184         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1185         if (err)
1186                 goto err_map;
1187
1188         err = i915_vma_sync(vma);
1189         if (err)
1190                 goto err_pin;
1191
1192         for_each_engine(engine, gt, id) {
1193                 struct i915_sched_attr attr = {
1194                         .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1195                 };
1196                 struct i915_request *rq, *nop;
1197
1198                 if (!intel_engine_has_preemption(engine))
1199                         continue;
1200
1201                 engine_heartbeat_disable(engine);
1202                 memset(vaddr, 0, PAGE_SIZE);
1203
1204                 /* ELSP[0]: semaphore wait */
1205                 rq = semaphore_queue(engine, vma, 0);
1206                 if (IS_ERR(rq)) {
1207                         err = PTR_ERR(rq);
1208                         goto err_heartbeat;
1209                 }
1210                 engine->schedule(rq, &attr);
1211                 err = wait_for_submit(engine, rq, HZ / 2);
1212                 if (err) {
1213                         pr_err("%s: Timed out trying to submit semaphores\n",
1214                                engine->name);
1215                         goto err_rq;
1216                 }
1217
1218                 /* ELSP[1]: nop request */
1219                 nop = nop_request(engine);
1220                 if (IS_ERR(nop)) {
1221                         err = PTR_ERR(nop);
1222                         goto err_rq;
1223                 }
1224                 err = wait_for_submit(engine, nop, HZ / 2);
1225                 i915_request_put(nop);
1226                 if (err) {
1227                         pr_err("%s: Timed out trying to submit nop\n",
1228                                engine->name);
1229                         goto err_rq;
1230                 }
1231
1232                 GEM_BUG_ON(i915_request_completed(rq));
1233                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1234
1235                 /* Queue: semaphore signal, matching priority as semaphore */
1236                 err = release_queue(engine, vma, 1, effective_prio(rq));
1237                 if (err)
1238                         goto err_rq;
1239
1240                 /* Wait until we ack the release_queue and start timeslicing */
1241                 do {
1242                         cond_resched();
1243                         intel_engine_flush_submission(engine);
1244                 } while (READ_ONCE(engine->execlists.pending[0]));
1245
1246                 if (!READ_ONCE(engine->execlists.timer.expires) &&
1247                     execlists_active(&engine->execlists) == rq &&
1248                     !i915_request_completed(rq)) {
1249                         struct drm_printer p =
1250                                 drm_info_printer(gt->i915->drm.dev);
1251
1252                         GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
1253                                       engine->name);
1254                         intel_engine_dump(engine, &p,
1255                                           "%s\n", engine->name);
1256                         GEM_TRACE_DUMP();
1257
1258                         memset(vaddr, 0xff, PAGE_SIZE);
1259                         err = -EINVAL;
1260                 }
1261
1262                 /* Timeslice every jiffy, so within 2 we should signal */
1263                 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
1264                         struct drm_printer p =
1265                                 drm_info_printer(gt->i915->drm.dev);
1266
1267                         pr_err("%s: Failed to timeslice into queue\n",
1268                                engine->name);
1269                         intel_engine_dump(engine, &p,
1270                                           "%s\n", engine->name);
1271
1272                         memset(vaddr, 0xff, PAGE_SIZE);
1273                         err = -EIO;
1274                 }
1275 err_rq:
1276                 i915_request_put(rq);
1277 err_heartbeat:
1278                 engine_heartbeat_enable(engine);
1279                 if (err)
1280                         break;
1281         }
1282
1283 err_pin:
1284         i915_vma_unpin(vma);
1285 err_map:
1286         i915_gem_object_unpin_map(obj);
1287 err_obj:
1288         i915_gem_object_put(obj);
1289         return err;
1290 }
1291
1292 static int live_timeslice_nopreempt(void *arg)
1293 {
1294         struct intel_gt *gt = arg;
1295         struct intel_engine_cs *engine;
1296         enum intel_engine_id id;
1297         struct igt_spinner spin;
1298         int err = 0;
1299
1300         /*
1301          * We should not timeslice into a request that is marked with
1302          * I915_REQUEST_NOPREEMPT.
1303          */
1304         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1305                 return 0;
1306
1307         if (igt_spinner_init(&spin, gt))
1308                 return -ENOMEM;
1309
1310         for_each_engine(engine, gt, id) {
1311                 struct intel_context *ce;
1312                 struct i915_request *rq;
1313                 unsigned long timeslice;
1314
1315                 if (!intel_engine_has_preemption(engine))
1316                         continue;
1317
1318                 ce = intel_context_create(engine);
1319                 if (IS_ERR(ce)) {
1320                         err = PTR_ERR(ce);
1321                         break;
1322                 }
1323
1324                 engine_heartbeat_disable(engine);
1325                 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1326
1327                 /* Create an unpreemptible spinner */
1328
1329                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1330                 intel_context_put(ce);
1331                 if (IS_ERR(rq)) {
1332                         err = PTR_ERR(rq);
1333                         goto out_heartbeat;
1334                 }
1335
1336                 i915_request_get(rq);
1337                 i915_request_add(rq);
1338
1339                 if (!igt_wait_for_spinner(&spin, rq)) {
1340                         i915_request_put(rq);
1341                         err = -ETIME;
1342                         goto out_spin;
1343                 }
1344
1345                 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1346                 i915_request_put(rq);
1347
1348                 /* Followed by a maximum priority barrier (heartbeat) */
1349
1350                 ce = intel_context_create(engine);
1351                 if (IS_ERR(ce)) {
1352                         err = PTR_ERR(rq);
1353                         goto out_spin;
1354                 }
1355
1356                 rq = intel_context_create_request(ce);
1357                 intel_context_put(ce);
1358                 if (IS_ERR(rq)) {
1359                         err = PTR_ERR(rq);
1360                         goto out_spin;
1361                 }
1362
1363                 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1364                 i915_request_get(rq);
1365                 i915_request_add(rq);
1366
1367                 /*
1368                  * Wait until the barrier is in ELSP, and we know timeslicing
1369                  * will have been activated.
1370                  */
1371                 if (wait_for_submit(engine, rq, HZ / 2)) {
1372                         i915_request_put(rq);
1373                         err = -ETIME;
1374                         goto out_spin;
1375                 }
1376
1377                 /*
1378                  * Since the ELSP[0] request is unpreemptible, it should not
1379                  * allow the maximum priority barrier through. Wait long
1380                  * enough to see if it is timesliced in by mistake.
1381                  */
1382                 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) {
1383                         pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1384                                engine->name);
1385                         err = -EINVAL;
1386                 }
1387                 i915_request_put(rq);
1388
1389 out_spin:
1390                 igt_spinner_end(&spin);
1391 out_heartbeat:
1392                 xchg(&engine->props.timeslice_duration_ms, timeslice);
1393                 engine_heartbeat_enable(engine);
1394                 if (err)
1395                         break;
1396
1397                 if (igt_flush_test(gt->i915)) {
1398                         err = -EIO;
1399                         break;
1400                 }
1401         }
1402
1403         igt_spinner_fini(&spin);
1404         return err;
1405 }
1406
1407 static int live_busywait_preempt(void *arg)
1408 {
1409         struct intel_gt *gt = arg;
1410         struct i915_gem_context *ctx_hi, *ctx_lo;
1411         struct intel_engine_cs *engine;
1412         struct drm_i915_gem_object *obj;
1413         struct i915_vma *vma;
1414         enum intel_engine_id id;
1415         int err = -ENOMEM;
1416         u32 *map;
1417
1418         /*
1419          * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1420          * preempt the busywaits used to synchronise between rings.
1421          */
1422
1423         ctx_hi = kernel_context(gt->i915);
1424         if (!ctx_hi)
1425                 return -ENOMEM;
1426         ctx_hi->sched.priority =
1427                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1428
1429         ctx_lo = kernel_context(gt->i915);
1430         if (!ctx_lo)
1431                 goto err_ctx_hi;
1432         ctx_lo->sched.priority =
1433                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1434
1435         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1436         if (IS_ERR(obj)) {
1437                 err = PTR_ERR(obj);
1438                 goto err_ctx_lo;
1439         }
1440
1441         map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1442         if (IS_ERR(map)) {
1443                 err = PTR_ERR(map);
1444                 goto err_obj;
1445         }
1446
1447         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1448         if (IS_ERR(vma)) {
1449                 err = PTR_ERR(vma);
1450                 goto err_map;
1451         }
1452
1453         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1454         if (err)
1455                 goto err_map;
1456
1457         err = i915_vma_sync(vma);
1458         if (err)
1459                 goto err_vma;
1460
1461         for_each_engine(engine, gt, id) {
1462                 struct i915_request *lo, *hi;
1463                 struct igt_live_test t;
1464                 u32 *cs;
1465
1466                 if (!intel_engine_has_preemption(engine))
1467                         continue;
1468
1469                 if (!intel_engine_can_store_dword(engine))
1470                         continue;
1471
1472                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1473                         err = -EIO;
1474                         goto err_vma;
1475                 }
1476
1477                 /*
1478                  * We create two requests. The low priority request
1479                  * busywaits on a semaphore (inside the ringbuffer where
1480                  * is should be preemptible) and the high priority requests
1481                  * uses a MI_STORE_DWORD_IMM to update the semaphore value
1482                  * allowing the first request to complete. If preemption
1483                  * fails, we hang instead.
1484                  */
1485
1486                 lo = igt_request_alloc(ctx_lo, engine);
1487                 if (IS_ERR(lo)) {
1488                         err = PTR_ERR(lo);
1489                         goto err_vma;
1490                 }
1491
1492                 cs = intel_ring_begin(lo, 8);
1493                 if (IS_ERR(cs)) {
1494                         err = PTR_ERR(cs);
1495                         i915_request_add(lo);
1496                         goto err_vma;
1497                 }
1498
1499                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1500                 *cs++ = i915_ggtt_offset(vma);
1501                 *cs++ = 0;
1502                 *cs++ = 1;
1503
1504                 /* XXX Do we need a flush + invalidate here? */
1505
1506                 *cs++ = MI_SEMAPHORE_WAIT |
1507                         MI_SEMAPHORE_GLOBAL_GTT |
1508                         MI_SEMAPHORE_POLL |
1509                         MI_SEMAPHORE_SAD_EQ_SDD;
1510                 *cs++ = 0;
1511                 *cs++ = i915_ggtt_offset(vma);
1512                 *cs++ = 0;
1513
1514                 intel_ring_advance(lo, cs);
1515
1516                 i915_request_get(lo);
1517                 i915_request_add(lo);
1518
1519                 if (wait_for(READ_ONCE(*map), 10)) {
1520                         i915_request_put(lo);
1521                         err = -ETIMEDOUT;
1522                         goto err_vma;
1523                 }
1524
1525                 /* Low priority request should be busywaiting now */
1526                 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1527                         i915_request_put(lo);
1528                         pr_err("%s: Busywaiting request did not!\n",
1529                                engine->name);
1530                         err = -EIO;
1531                         goto err_vma;
1532                 }
1533
1534                 hi = igt_request_alloc(ctx_hi, engine);
1535                 if (IS_ERR(hi)) {
1536                         err = PTR_ERR(hi);
1537                         i915_request_put(lo);
1538                         goto err_vma;
1539                 }
1540
1541                 cs = intel_ring_begin(hi, 4);
1542                 if (IS_ERR(cs)) {
1543                         err = PTR_ERR(cs);
1544                         i915_request_add(hi);
1545                         i915_request_put(lo);
1546                         goto err_vma;
1547                 }
1548
1549                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1550                 *cs++ = i915_ggtt_offset(vma);
1551                 *cs++ = 0;
1552                 *cs++ = 0;
1553
1554                 intel_ring_advance(hi, cs);
1555                 i915_request_add(hi);
1556
1557                 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1558                         struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1559
1560                         pr_err("%s: Failed to preempt semaphore busywait!\n",
1561                                engine->name);
1562
1563                         intel_engine_dump(engine, &p, "%s\n", engine->name);
1564                         GEM_TRACE_DUMP();
1565
1566                         i915_request_put(lo);
1567                         intel_gt_set_wedged(gt);
1568                         err = -EIO;
1569                         goto err_vma;
1570                 }
1571                 GEM_BUG_ON(READ_ONCE(*map));
1572                 i915_request_put(lo);
1573
1574                 if (igt_live_test_end(&t)) {
1575                         err = -EIO;
1576                         goto err_vma;
1577                 }
1578         }
1579
1580         err = 0;
1581 err_vma:
1582         i915_vma_unpin(vma);
1583 err_map:
1584         i915_gem_object_unpin_map(obj);
1585 err_obj:
1586         i915_gem_object_put(obj);
1587 err_ctx_lo:
1588         kernel_context_close(ctx_lo);
1589 err_ctx_hi:
1590         kernel_context_close(ctx_hi);
1591         return err;
1592 }
1593
1594 static struct i915_request *
1595 spinner_create_request(struct igt_spinner *spin,
1596                        struct i915_gem_context *ctx,
1597                        struct intel_engine_cs *engine,
1598                        u32 arb)
1599 {
1600         struct intel_context *ce;
1601         struct i915_request *rq;
1602
1603         ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1604         if (IS_ERR(ce))
1605                 return ERR_CAST(ce);
1606
1607         rq = igt_spinner_create_request(spin, ce, arb);
1608         intel_context_put(ce);
1609         return rq;
1610 }
1611
1612 static int live_preempt(void *arg)
1613 {
1614         struct intel_gt *gt = arg;
1615         struct i915_gem_context *ctx_hi, *ctx_lo;
1616         struct igt_spinner spin_hi, spin_lo;
1617         struct intel_engine_cs *engine;
1618         enum intel_engine_id id;
1619         int err = -ENOMEM;
1620
1621         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1622                 return 0;
1623
1624         if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1625                 pr_err("Logical preemption supported, but not exposed\n");
1626
1627         if (igt_spinner_init(&spin_hi, gt))
1628                 return -ENOMEM;
1629
1630         if (igt_spinner_init(&spin_lo, gt))
1631                 goto err_spin_hi;
1632
1633         ctx_hi = kernel_context(gt->i915);
1634         if (!ctx_hi)
1635                 goto err_spin_lo;
1636         ctx_hi->sched.priority =
1637                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1638
1639         ctx_lo = kernel_context(gt->i915);
1640         if (!ctx_lo)
1641                 goto err_ctx_hi;
1642         ctx_lo->sched.priority =
1643                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1644
1645         for_each_engine(engine, gt, id) {
1646                 struct igt_live_test t;
1647                 struct i915_request *rq;
1648
1649                 if (!intel_engine_has_preemption(engine))
1650                         continue;
1651
1652                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1653                         err = -EIO;
1654                         goto err_ctx_lo;
1655                 }
1656
1657                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1658                                             MI_ARB_CHECK);
1659                 if (IS_ERR(rq)) {
1660                         err = PTR_ERR(rq);
1661                         goto err_ctx_lo;
1662                 }
1663
1664                 i915_request_add(rq);
1665                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1666                         GEM_TRACE("lo spinner failed to start\n");
1667                         GEM_TRACE_DUMP();
1668                         intel_gt_set_wedged(gt);
1669                         err = -EIO;
1670                         goto err_ctx_lo;
1671                 }
1672
1673                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1674                                             MI_ARB_CHECK);
1675                 if (IS_ERR(rq)) {
1676                         igt_spinner_end(&spin_lo);
1677                         err = PTR_ERR(rq);
1678                         goto err_ctx_lo;
1679                 }
1680
1681                 i915_request_add(rq);
1682                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1683                         GEM_TRACE("hi spinner failed to start\n");
1684                         GEM_TRACE_DUMP();
1685                         intel_gt_set_wedged(gt);
1686                         err = -EIO;
1687                         goto err_ctx_lo;
1688                 }
1689
1690                 igt_spinner_end(&spin_hi);
1691                 igt_spinner_end(&spin_lo);
1692
1693                 if (igt_live_test_end(&t)) {
1694                         err = -EIO;
1695                         goto err_ctx_lo;
1696                 }
1697         }
1698
1699         err = 0;
1700 err_ctx_lo:
1701         kernel_context_close(ctx_lo);
1702 err_ctx_hi:
1703         kernel_context_close(ctx_hi);
1704 err_spin_lo:
1705         igt_spinner_fini(&spin_lo);
1706 err_spin_hi:
1707         igt_spinner_fini(&spin_hi);
1708         return err;
1709 }
1710
1711 static int live_late_preempt(void *arg)
1712 {
1713         struct intel_gt *gt = arg;
1714         struct i915_gem_context *ctx_hi, *ctx_lo;
1715         struct igt_spinner spin_hi, spin_lo;
1716         struct intel_engine_cs *engine;
1717         struct i915_sched_attr attr = {};
1718         enum intel_engine_id id;
1719         int err = -ENOMEM;
1720
1721         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1722                 return 0;
1723
1724         if (igt_spinner_init(&spin_hi, gt))
1725                 return -ENOMEM;
1726
1727         if (igt_spinner_init(&spin_lo, gt))
1728                 goto err_spin_hi;
1729
1730         ctx_hi = kernel_context(gt->i915);
1731         if (!ctx_hi)
1732                 goto err_spin_lo;
1733
1734         ctx_lo = kernel_context(gt->i915);
1735         if (!ctx_lo)
1736                 goto err_ctx_hi;
1737
1738         /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1739         ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1740
1741         for_each_engine(engine, gt, id) {
1742                 struct igt_live_test t;
1743                 struct i915_request *rq;
1744
1745                 if (!intel_engine_has_preemption(engine))
1746                         continue;
1747
1748                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1749                         err = -EIO;
1750                         goto err_ctx_lo;
1751                 }
1752
1753                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1754                                             MI_ARB_CHECK);
1755                 if (IS_ERR(rq)) {
1756                         err = PTR_ERR(rq);
1757                         goto err_ctx_lo;
1758                 }
1759
1760                 i915_request_add(rq);
1761                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1762                         pr_err("First context failed to start\n");
1763                         goto err_wedged;
1764                 }
1765
1766                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1767                                             MI_NOOP);
1768                 if (IS_ERR(rq)) {
1769                         igt_spinner_end(&spin_lo);
1770                         err = PTR_ERR(rq);
1771                         goto err_ctx_lo;
1772                 }
1773
1774                 i915_request_add(rq);
1775                 if (igt_wait_for_spinner(&spin_hi, rq)) {
1776                         pr_err("Second context overtook first?\n");
1777                         goto err_wedged;
1778                 }
1779
1780                 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1781                 engine->schedule(rq, &attr);
1782
1783                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1784                         pr_err("High priority context failed to preempt the low priority context\n");
1785                         GEM_TRACE_DUMP();
1786                         goto err_wedged;
1787                 }
1788
1789                 igt_spinner_end(&spin_hi);
1790                 igt_spinner_end(&spin_lo);
1791
1792                 if (igt_live_test_end(&t)) {
1793                         err = -EIO;
1794                         goto err_ctx_lo;
1795                 }
1796         }
1797
1798         err = 0;
1799 err_ctx_lo:
1800         kernel_context_close(ctx_lo);
1801 err_ctx_hi:
1802         kernel_context_close(ctx_hi);
1803 err_spin_lo:
1804         igt_spinner_fini(&spin_lo);
1805 err_spin_hi:
1806         igt_spinner_fini(&spin_hi);
1807         return err;
1808
1809 err_wedged:
1810         igt_spinner_end(&spin_hi);
1811         igt_spinner_end(&spin_lo);
1812         intel_gt_set_wedged(gt);
1813         err = -EIO;
1814         goto err_ctx_lo;
1815 }
1816
1817 struct preempt_client {
1818         struct igt_spinner spin;
1819         struct i915_gem_context *ctx;
1820 };
1821
1822 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1823 {
1824         c->ctx = kernel_context(gt->i915);
1825         if (!c->ctx)
1826                 return -ENOMEM;
1827
1828         if (igt_spinner_init(&c->spin, gt))
1829                 goto err_ctx;
1830
1831         return 0;
1832
1833 err_ctx:
1834         kernel_context_close(c->ctx);
1835         return -ENOMEM;
1836 }
1837
1838 static void preempt_client_fini(struct preempt_client *c)
1839 {
1840         igt_spinner_fini(&c->spin);
1841         kernel_context_close(c->ctx);
1842 }
1843
1844 static int live_nopreempt(void *arg)
1845 {
1846         struct intel_gt *gt = arg;
1847         struct intel_engine_cs *engine;
1848         struct preempt_client a, b;
1849         enum intel_engine_id id;
1850         int err = -ENOMEM;
1851
1852         /*
1853          * Verify that we can disable preemption for an individual request
1854          * that may be being observed and not want to be interrupted.
1855          */
1856
1857         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1858                 return 0;
1859
1860         if (preempt_client_init(gt, &a))
1861                 return -ENOMEM;
1862         if (preempt_client_init(gt, &b))
1863                 goto err_client_a;
1864         b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1865
1866         for_each_engine(engine, gt, id) {
1867                 struct i915_request *rq_a, *rq_b;
1868
1869                 if (!intel_engine_has_preemption(engine))
1870                         continue;
1871
1872                 engine->execlists.preempt_hang.count = 0;
1873
1874                 rq_a = spinner_create_request(&a.spin,
1875                                               a.ctx, engine,
1876                                               MI_ARB_CHECK);
1877                 if (IS_ERR(rq_a)) {
1878                         err = PTR_ERR(rq_a);
1879                         goto err_client_b;
1880                 }
1881
1882                 /* Low priority client, but unpreemptable! */
1883                 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1884
1885                 i915_request_add(rq_a);
1886                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1887                         pr_err("First client failed to start\n");
1888                         goto err_wedged;
1889                 }
1890
1891                 rq_b = spinner_create_request(&b.spin,
1892                                               b.ctx, engine,
1893                                               MI_ARB_CHECK);
1894                 if (IS_ERR(rq_b)) {
1895                         err = PTR_ERR(rq_b);
1896                         goto err_client_b;
1897                 }
1898
1899                 i915_request_add(rq_b);
1900
1901                 /* B is much more important than A! (But A is unpreemptable.) */
1902                 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1903
1904                 /* Wait long enough for preemption and timeslicing */
1905                 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1906                         pr_err("Second client started too early!\n");
1907                         goto err_wedged;
1908                 }
1909
1910                 igt_spinner_end(&a.spin);
1911
1912                 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1913                         pr_err("Second client failed to start\n");
1914                         goto err_wedged;
1915                 }
1916
1917                 igt_spinner_end(&b.spin);
1918
1919                 if (engine->execlists.preempt_hang.count) {
1920                         pr_err("Preemption recorded x%d; should have been suppressed!\n",
1921                                engine->execlists.preempt_hang.count);
1922                         err = -EINVAL;
1923                         goto err_wedged;
1924                 }
1925
1926                 if (igt_flush_test(gt->i915))
1927                         goto err_wedged;
1928         }
1929
1930         err = 0;
1931 err_client_b:
1932         preempt_client_fini(&b);
1933 err_client_a:
1934         preempt_client_fini(&a);
1935         return err;
1936
1937 err_wedged:
1938         igt_spinner_end(&b.spin);
1939         igt_spinner_end(&a.spin);
1940         intel_gt_set_wedged(gt);
1941         err = -EIO;
1942         goto err_client_b;
1943 }
1944
1945 struct live_preempt_cancel {
1946         struct intel_engine_cs *engine;
1947         struct preempt_client a, b;
1948 };
1949
1950 static int __cancel_active0(struct live_preempt_cancel *arg)
1951 {
1952         struct i915_request *rq;
1953         struct igt_live_test t;
1954         int err;
1955
1956         /* Preempt cancel of ELSP0 */
1957         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1958         if (igt_live_test_begin(&t, arg->engine->i915,
1959                                 __func__, arg->engine->name))
1960                 return -EIO;
1961
1962         rq = spinner_create_request(&arg->a.spin,
1963                                     arg->a.ctx, arg->engine,
1964                                     MI_ARB_CHECK);
1965         if (IS_ERR(rq))
1966                 return PTR_ERR(rq);
1967
1968         clear_bit(CONTEXT_BANNED, &rq->context->flags);
1969         i915_request_get(rq);
1970         i915_request_add(rq);
1971         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1972                 err = -EIO;
1973                 goto out;
1974         }
1975
1976         intel_context_set_banned(rq->context);
1977         err = intel_engine_pulse(arg->engine);
1978         if (err)
1979                 goto out;
1980
1981         err = wait_for_reset(arg->engine, rq, HZ / 2);
1982         if (err) {
1983                 pr_err("Cancelled inflight0 request did not reset\n");
1984                 goto out;
1985         }
1986
1987 out:
1988         i915_request_put(rq);
1989         if (igt_live_test_end(&t))
1990                 err = -EIO;
1991         return err;
1992 }
1993
1994 static int __cancel_active1(struct live_preempt_cancel *arg)
1995 {
1996         struct i915_request *rq[2] = {};
1997         struct igt_live_test t;
1998         int err;
1999
2000         /* Preempt cancel of ELSP1 */
2001         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2002         if (igt_live_test_begin(&t, arg->engine->i915,
2003                                 __func__, arg->engine->name))
2004                 return -EIO;
2005
2006         rq[0] = spinner_create_request(&arg->a.spin,
2007                                        arg->a.ctx, arg->engine,
2008                                        MI_NOOP); /* no preemption */
2009         if (IS_ERR(rq[0]))
2010                 return PTR_ERR(rq[0]);
2011
2012         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2013         i915_request_get(rq[0]);
2014         i915_request_add(rq[0]);
2015         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2016                 err = -EIO;
2017                 goto out;
2018         }
2019
2020         rq[1] = spinner_create_request(&arg->b.spin,
2021                                        arg->b.ctx, arg->engine,
2022                                        MI_ARB_CHECK);
2023         if (IS_ERR(rq[1])) {
2024                 err = PTR_ERR(rq[1]);
2025                 goto out;
2026         }
2027
2028         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2029         i915_request_get(rq[1]);
2030         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2031         i915_request_add(rq[1]);
2032         if (err)
2033                 goto out;
2034
2035         intel_context_set_banned(rq[1]->context);
2036         err = intel_engine_pulse(arg->engine);
2037         if (err)
2038                 goto out;
2039
2040         igt_spinner_end(&arg->a.spin);
2041         err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2042         if (err)
2043                 goto out;
2044
2045         if (rq[0]->fence.error != 0) {
2046                 pr_err("Normal inflight0 request did not complete\n");
2047                 err = -EINVAL;
2048                 goto out;
2049         }
2050
2051         if (rq[1]->fence.error != -EIO) {
2052                 pr_err("Cancelled inflight1 request did not report -EIO\n");
2053                 err = -EINVAL;
2054                 goto out;
2055         }
2056
2057 out:
2058         i915_request_put(rq[1]);
2059         i915_request_put(rq[0]);
2060         if (igt_live_test_end(&t))
2061                 err = -EIO;
2062         return err;
2063 }
2064
2065 static int __cancel_queued(struct live_preempt_cancel *arg)
2066 {
2067         struct i915_request *rq[3] = {};
2068         struct igt_live_test t;
2069         int err;
2070
2071         /* Full ELSP and one in the wings */
2072         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2073         if (igt_live_test_begin(&t, arg->engine->i915,
2074                                 __func__, arg->engine->name))
2075                 return -EIO;
2076
2077         rq[0] = spinner_create_request(&arg->a.spin,
2078                                        arg->a.ctx, arg->engine,
2079                                        MI_ARB_CHECK);
2080         if (IS_ERR(rq[0]))
2081                 return PTR_ERR(rq[0]);
2082
2083         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2084         i915_request_get(rq[0]);
2085         i915_request_add(rq[0]);
2086         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2087                 err = -EIO;
2088                 goto out;
2089         }
2090
2091         rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2092         if (IS_ERR(rq[1])) {
2093                 err = PTR_ERR(rq[1]);
2094                 goto out;
2095         }
2096
2097         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2098         i915_request_get(rq[1]);
2099         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2100         i915_request_add(rq[1]);
2101         if (err)
2102                 goto out;
2103
2104         rq[2] = spinner_create_request(&arg->b.spin,
2105                                        arg->a.ctx, arg->engine,
2106                                        MI_ARB_CHECK);
2107         if (IS_ERR(rq[2])) {
2108                 err = PTR_ERR(rq[2]);
2109                 goto out;
2110         }
2111
2112         i915_request_get(rq[2]);
2113         err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2114         i915_request_add(rq[2]);
2115         if (err)
2116                 goto out;
2117
2118         intel_context_set_banned(rq[2]->context);
2119         err = intel_engine_pulse(arg->engine);
2120         if (err)
2121                 goto out;
2122
2123         err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2124         if (err)
2125                 goto out;
2126
2127         if (rq[0]->fence.error != -EIO) {
2128                 pr_err("Cancelled inflight0 request did not report -EIO\n");
2129                 err = -EINVAL;
2130                 goto out;
2131         }
2132
2133         if (rq[1]->fence.error != 0) {
2134                 pr_err("Normal inflight1 request did not complete\n");
2135                 err = -EINVAL;
2136                 goto out;
2137         }
2138
2139         if (rq[2]->fence.error != -EIO) {
2140                 pr_err("Cancelled queued request did not report -EIO\n");
2141                 err = -EINVAL;
2142                 goto out;
2143         }
2144
2145 out:
2146         i915_request_put(rq[2]);
2147         i915_request_put(rq[1]);
2148         i915_request_put(rq[0]);
2149         if (igt_live_test_end(&t))
2150                 err = -EIO;
2151         return err;
2152 }
2153
2154 static int __cancel_hostile(struct live_preempt_cancel *arg)
2155 {
2156         struct i915_request *rq;
2157         int err;
2158
2159         /* Preempt cancel non-preemptible spinner in ELSP0 */
2160         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2161                 return 0;
2162
2163         if (!intel_has_reset_engine(arg->engine->gt))
2164                 return 0;
2165
2166         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2167         rq = spinner_create_request(&arg->a.spin,
2168                                     arg->a.ctx, arg->engine,
2169                                     MI_NOOP); /* preemption disabled */
2170         if (IS_ERR(rq))
2171                 return PTR_ERR(rq);
2172
2173         clear_bit(CONTEXT_BANNED, &rq->context->flags);
2174         i915_request_get(rq);
2175         i915_request_add(rq);
2176         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2177                 err = -EIO;
2178                 goto out;
2179         }
2180
2181         intel_context_set_banned(rq->context);
2182         err = intel_engine_pulse(arg->engine); /* force reset */
2183         if (err)
2184                 goto out;
2185
2186         err = wait_for_reset(arg->engine, rq, HZ / 2);
2187         if (err) {
2188                 pr_err("Cancelled inflight0 request did not reset\n");
2189                 goto out;
2190         }
2191
2192 out:
2193         i915_request_put(rq);
2194         if (igt_flush_test(arg->engine->i915))
2195                 err = -EIO;
2196         return err;
2197 }
2198
2199 static int live_preempt_cancel(void *arg)
2200 {
2201         struct intel_gt *gt = arg;
2202         struct live_preempt_cancel data;
2203         enum intel_engine_id id;
2204         int err = -ENOMEM;
2205
2206         /*
2207          * To cancel an inflight context, we need to first remove it from the
2208          * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2209          */
2210
2211         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2212                 return 0;
2213
2214         if (preempt_client_init(gt, &data.a))
2215                 return -ENOMEM;
2216         if (preempt_client_init(gt, &data.b))
2217                 goto err_client_a;
2218
2219         for_each_engine(data.engine, gt, id) {
2220                 if (!intel_engine_has_preemption(data.engine))
2221                         continue;
2222
2223                 err = __cancel_active0(&data);
2224                 if (err)
2225                         goto err_wedged;
2226
2227                 err = __cancel_active1(&data);
2228                 if (err)
2229                         goto err_wedged;
2230
2231                 err = __cancel_queued(&data);
2232                 if (err)
2233                         goto err_wedged;
2234
2235                 err = __cancel_hostile(&data);
2236                 if (err)
2237                         goto err_wedged;
2238         }
2239
2240         err = 0;
2241 err_client_b:
2242         preempt_client_fini(&data.b);
2243 err_client_a:
2244         preempt_client_fini(&data.a);
2245         return err;
2246
2247 err_wedged:
2248         GEM_TRACE_DUMP();
2249         igt_spinner_end(&data.b.spin);
2250         igt_spinner_end(&data.a.spin);
2251         intel_gt_set_wedged(gt);
2252         goto err_client_b;
2253 }
2254
2255 static int live_suppress_self_preempt(void *arg)
2256 {
2257         struct intel_gt *gt = arg;
2258         struct intel_engine_cs *engine;
2259         struct i915_sched_attr attr = {
2260                 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2261         };
2262         struct preempt_client a, b;
2263         enum intel_engine_id id;
2264         int err = -ENOMEM;
2265
2266         /*
2267          * Verify that if a preemption request does not cause a change in
2268          * the current execution order, the preempt-to-idle injection is
2269          * skipped and that we do not accidentally apply it after the CS
2270          * completion event.
2271          */
2272
2273         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2274                 return 0;
2275
2276         if (intel_uc_uses_guc_submission(&gt->uc))
2277                 return 0; /* presume black blox */
2278
2279         if (intel_vgpu_active(gt->i915))
2280                 return 0; /* GVT forces single port & request submission */
2281
2282         if (preempt_client_init(gt, &a))
2283                 return -ENOMEM;
2284         if (preempt_client_init(gt, &b))
2285                 goto err_client_a;
2286
2287         for_each_engine(engine, gt, id) {
2288                 struct i915_request *rq_a, *rq_b;
2289                 int depth;
2290
2291                 if (!intel_engine_has_preemption(engine))
2292                         continue;
2293
2294                 if (igt_flush_test(gt->i915))
2295                         goto err_wedged;
2296
2297                 intel_engine_pm_get(engine);
2298                 engine->execlists.preempt_hang.count = 0;
2299
2300                 rq_a = spinner_create_request(&a.spin,
2301                                               a.ctx, engine,
2302                                               MI_NOOP);
2303                 if (IS_ERR(rq_a)) {
2304                         err = PTR_ERR(rq_a);
2305                         intel_engine_pm_put(engine);
2306                         goto err_client_b;
2307                 }
2308
2309                 i915_request_add(rq_a);
2310                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2311                         pr_err("First client failed to start\n");
2312                         intel_engine_pm_put(engine);
2313                         goto err_wedged;
2314                 }
2315
2316                 /* Keep postponing the timer to avoid premature slicing */
2317                 mod_timer(&engine->execlists.timer, jiffies + HZ);
2318                 for (depth = 0; depth < 8; depth++) {
2319                         rq_b = spinner_create_request(&b.spin,
2320                                                       b.ctx, engine,
2321                                                       MI_NOOP);
2322                         if (IS_ERR(rq_b)) {
2323                                 err = PTR_ERR(rq_b);
2324                                 intel_engine_pm_put(engine);
2325                                 goto err_client_b;
2326                         }
2327                         i915_request_add(rq_b);
2328
2329                         GEM_BUG_ON(i915_request_completed(rq_a));
2330                         engine->schedule(rq_a, &attr);
2331                         igt_spinner_end(&a.spin);
2332
2333                         if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2334                                 pr_err("Second client failed to start\n");
2335                                 intel_engine_pm_put(engine);
2336                                 goto err_wedged;
2337                         }
2338
2339                         swap(a, b);
2340                         rq_a = rq_b;
2341                 }
2342                 igt_spinner_end(&a.spin);
2343
2344                 if (engine->execlists.preempt_hang.count) {
2345                         pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2346                                engine->name,
2347                                engine->execlists.preempt_hang.count,
2348                                depth);
2349                         intel_engine_pm_put(engine);
2350                         err = -EINVAL;
2351                         goto err_client_b;
2352                 }
2353
2354                 intel_engine_pm_put(engine);
2355                 if (igt_flush_test(gt->i915))
2356                         goto err_wedged;
2357         }
2358
2359         err = 0;
2360 err_client_b:
2361         preempt_client_fini(&b);
2362 err_client_a:
2363         preempt_client_fini(&a);
2364         return err;
2365
2366 err_wedged:
2367         igt_spinner_end(&b.spin);
2368         igt_spinner_end(&a.spin);
2369         intel_gt_set_wedged(gt);
2370         err = -EIO;
2371         goto err_client_b;
2372 }
2373
2374 static int __i915_sw_fence_call
2375 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
2376 {
2377         return NOTIFY_DONE;
2378 }
2379
2380 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
2381 {
2382         struct i915_request *rq;
2383
2384         rq = kzalloc(sizeof(*rq), GFP_KERNEL);
2385         if (!rq)
2386                 return NULL;
2387
2388         rq->engine = engine;
2389
2390         spin_lock_init(&rq->lock);
2391         INIT_LIST_HEAD(&rq->fence.cb_list);
2392         rq->fence.lock = &rq->lock;
2393         rq->fence.ops = &i915_fence_ops;
2394
2395         i915_sched_node_init(&rq->sched);
2396
2397         /* mark this request as permanently incomplete */
2398         rq->fence.seqno = 1;
2399         BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
2400         rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
2401         GEM_BUG_ON(i915_request_completed(rq));
2402
2403         i915_sw_fence_init(&rq->submit, dummy_notify);
2404         set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
2405
2406         spin_lock_init(&rq->lock);
2407         rq->fence.lock = &rq->lock;
2408         INIT_LIST_HEAD(&rq->fence.cb_list);
2409
2410         return rq;
2411 }
2412
2413 static void dummy_request_free(struct i915_request *dummy)
2414 {
2415         /* We have to fake the CS interrupt to kick the next request */
2416         i915_sw_fence_commit(&dummy->submit);
2417
2418         i915_request_mark_complete(dummy);
2419         dma_fence_signal(&dummy->fence);
2420
2421         i915_sched_node_fini(&dummy->sched);
2422         i915_sw_fence_fini(&dummy->submit);
2423
2424         dma_fence_free(&dummy->fence);
2425 }
2426
2427 static int live_suppress_wait_preempt(void *arg)
2428 {
2429         struct intel_gt *gt = arg;
2430         struct preempt_client client[4];
2431         struct i915_request *rq[ARRAY_SIZE(client)] = {};
2432         struct intel_engine_cs *engine;
2433         enum intel_engine_id id;
2434         int err = -ENOMEM;
2435         int i;
2436
2437         /*
2438          * Waiters are given a little priority nudge, but not enough
2439          * to actually cause any preemption. Double check that we do
2440          * not needlessly generate preempt-to-idle cycles.
2441          */
2442
2443         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2444                 return 0;
2445
2446         if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
2447                 return -ENOMEM;
2448         if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
2449                 goto err_client_0;
2450         if (preempt_client_init(gt, &client[2])) /* head of queue */
2451                 goto err_client_1;
2452         if (preempt_client_init(gt, &client[3])) /* bystander */
2453                 goto err_client_2;
2454
2455         for_each_engine(engine, gt, id) {
2456                 int depth;
2457
2458                 if (!intel_engine_has_preemption(engine))
2459                         continue;
2460
2461                 if (!engine->emit_init_breadcrumb)
2462                         continue;
2463
2464                 for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
2465                         struct i915_request *dummy;
2466
2467                         engine->execlists.preempt_hang.count = 0;
2468
2469                         dummy = dummy_request(engine);
2470                         if (!dummy)
2471                                 goto err_client_3;
2472
2473                         for (i = 0; i < ARRAY_SIZE(client); i++) {
2474                                 struct i915_request *this;
2475
2476                                 this = spinner_create_request(&client[i].spin,
2477                                                               client[i].ctx, engine,
2478                                                               MI_NOOP);
2479                                 if (IS_ERR(this)) {
2480                                         err = PTR_ERR(this);
2481                                         goto err_wedged;
2482                                 }
2483
2484                                 /* Disable NEWCLIENT promotion */
2485                                 __i915_active_fence_set(&i915_request_timeline(this)->last_request,
2486                                                         &dummy->fence);
2487
2488                                 rq[i] = i915_request_get(this);
2489                                 i915_request_add(this);
2490                         }
2491
2492                         dummy_request_free(dummy);
2493
2494                         GEM_BUG_ON(i915_request_completed(rq[0]));
2495                         if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
2496                                 pr_err("%s: First client failed to start\n",
2497                                        engine->name);
2498                                 goto err_wedged;
2499                         }
2500                         GEM_BUG_ON(!i915_request_started(rq[0]));
2501
2502                         if (i915_request_wait(rq[depth],
2503                                               I915_WAIT_PRIORITY,
2504                                               1) != -ETIME) {
2505                                 pr_err("%s: Waiter depth:%d completed!\n",
2506                                        engine->name, depth);
2507                                 goto err_wedged;
2508                         }
2509
2510                         for (i = 0; i < ARRAY_SIZE(client); i++) {
2511                                 igt_spinner_end(&client[i].spin);
2512                                 i915_request_put(rq[i]);
2513                                 rq[i] = NULL;
2514                         }
2515
2516                         if (igt_flush_test(gt->i915))
2517                                 goto err_wedged;
2518
2519                         if (engine->execlists.preempt_hang.count) {
2520                                 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
2521                                        engine->name,
2522                                        engine->execlists.preempt_hang.count,
2523                                        depth);
2524                                 err = -EINVAL;
2525                                 goto err_client_3;
2526                         }
2527                 }
2528         }
2529
2530         err = 0;
2531 err_client_3:
2532         preempt_client_fini(&client[3]);
2533 err_client_2:
2534         preempt_client_fini(&client[2]);
2535 err_client_1:
2536         preempt_client_fini(&client[1]);
2537 err_client_0:
2538         preempt_client_fini(&client[0]);
2539         return err;
2540
2541 err_wedged:
2542         for (i = 0; i < ARRAY_SIZE(client); i++) {
2543                 igt_spinner_end(&client[i].spin);
2544                 i915_request_put(rq[i]);
2545         }
2546         intel_gt_set_wedged(gt);
2547         err = -EIO;
2548         goto err_client_3;
2549 }
2550
2551 static int live_chain_preempt(void *arg)
2552 {
2553         struct intel_gt *gt = arg;
2554         struct intel_engine_cs *engine;
2555         struct preempt_client hi, lo;
2556         enum intel_engine_id id;
2557         int err = -ENOMEM;
2558
2559         /*
2560          * Build a chain AB...BA between two contexts (A, B) and request
2561          * preemption of the last request. It should then complete before
2562          * the previously submitted spinner in B.
2563          */
2564
2565         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2566                 return 0;
2567
2568         if (preempt_client_init(gt, &hi))
2569                 return -ENOMEM;
2570
2571         if (preempt_client_init(gt, &lo))
2572                 goto err_client_hi;
2573
2574         for_each_engine(engine, gt, id) {
2575                 struct i915_sched_attr attr = {
2576                         .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2577                 };
2578                 struct igt_live_test t;
2579                 struct i915_request *rq;
2580                 int ring_size, count, i;
2581
2582                 if (!intel_engine_has_preemption(engine))
2583                         continue;
2584
2585                 rq = spinner_create_request(&lo.spin,
2586                                             lo.ctx, engine,
2587                                             MI_ARB_CHECK);
2588                 if (IS_ERR(rq))
2589                         goto err_wedged;
2590
2591                 i915_request_get(rq);
2592                 i915_request_add(rq);
2593
2594                 ring_size = rq->wa_tail - rq->head;
2595                 if (ring_size < 0)
2596                         ring_size += rq->ring->size;
2597                 ring_size = rq->ring->size / ring_size;
2598                 pr_debug("%s(%s): Using maximum of %d requests\n",
2599                          __func__, engine->name, ring_size);
2600
2601                 igt_spinner_end(&lo.spin);
2602                 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2603                         pr_err("Timed out waiting to flush %s\n", engine->name);
2604                         i915_request_put(rq);
2605                         goto err_wedged;
2606                 }
2607                 i915_request_put(rq);
2608
2609                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2610                         err = -EIO;
2611                         goto err_wedged;
2612                 }
2613
2614                 for_each_prime_number_from(count, 1, ring_size) {
2615                         rq = spinner_create_request(&hi.spin,
2616                                                     hi.ctx, engine,
2617                                                     MI_ARB_CHECK);
2618                         if (IS_ERR(rq))
2619                                 goto err_wedged;
2620                         i915_request_add(rq);
2621                         if (!igt_wait_for_spinner(&hi.spin, rq))
2622                                 goto err_wedged;
2623
2624                         rq = spinner_create_request(&lo.spin,
2625                                                     lo.ctx, engine,
2626                                                     MI_ARB_CHECK);
2627                         if (IS_ERR(rq))
2628                                 goto err_wedged;
2629                         i915_request_add(rq);
2630
2631                         for (i = 0; i < count; i++) {
2632                                 rq = igt_request_alloc(lo.ctx, engine);
2633                                 if (IS_ERR(rq))
2634                                         goto err_wedged;
2635                                 i915_request_add(rq);
2636                         }
2637
2638                         rq = igt_request_alloc(hi.ctx, engine);
2639                         if (IS_ERR(rq))
2640                                 goto err_wedged;
2641
2642                         i915_request_get(rq);
2643                         i915_request_add(rq);
2644                         engine->schedule(rq, &attr);
2645
2646                         igt_spinner_end(&hi.spin);
2647                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2648                                 struct drm_printer p =
2649                                         drm_info_printer(gt->i915->drm.dev);
2650
2651                                 pr_err("Failed to preempt over chain of %d\n",
2652                                        count);
2653                                 intel_engine_dump(engine, &p,
2654                                                   "%s\n", engine->name);
2655                                 i915_request_put(rq);
2656                                 goto err_wedged;
2657                         }
2658                         igt_spinner_end(&lo.spin);
2659                         i915_request_put(rq);
2660
2661                         rq = igt_request_alloc(lo.ctx, engine);
2662                         if (IS_ERR(rq))
2663                                 goto err_wedged;
2664
2665                         i915_request_get(rq);
2666                         i915_request_add(rq);
2667
2668                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2669                                 struct drm_printer p =
2670                                         drm_info_printer(gt->i915->drm.dev);
2671
2672                                 pr_err("Failed to flush low priority chain of %d requests\n",
2673                                        count);
2674                                 intel_engine_dump(engine, &p,
2675                                                   "%s\n", engine->name);
2676
2677                                 i915_request_put(rq);
2678                                 goto err_wedged;
2679                         }
2680                         i915_request_put(rq);
2681                 }
2682
2683                 if (igt_live_test_end(&t)) {
2684                         err = -EIO;
2685                         goto err_wedged;
2686                 }
2687         }
2688
2689         err = 0;
2690 err_client_lo:
2691         preempt_client_fini(&lo);
2692 err_client_hi:
2693         preempt_client_fini(&hi);
2694         return err;
2695
2696 err_wedged:
2697         igt_spinner_end(&hi.spin);
2698         igt_spinner_end(&lo.spin);
2699         intel_gt_set_wedged(gt);
2700         err = -EIO;
2701         goto err_client_lo;
2702 }
2703
2704 static int create_gang(struct intel_engine_cs *engine,
2705                        struct i915_request **prev)
2706 {
2707         struct drm_i915_gem_object *obj;
2708         struct intel_context *ce;
2709         struct i915_request *rq;
2710         struct i915_vma *vma;
2711         u32 *cs;
2712         int err;
2713
2714         ce = intel_context_create(engine);
2715         if (IS_ERR(ce))
2716                 return PTR_ERR(ce);
2717
2718         obj = i915_gem_object_create_internal(engine->i915, 4096);
2719         if (IS_ERR(obj)) {
2720                 err = PTR_ERR(obj);
2721                 goto err_ce;
2722         }
2723
2724         vma = i915_vma_instance(obj, ce->vm, NULL);
2725         if (IS_ERR(vma)) {
2726                 err = PTR_ERR(vma);
2727                 goto err_obj;
2728         }
2729
2730         err = i915_vma_pin(vma, 0, 0, PIN_USER);
2731         if (err)
2732                 goto err_obj;
2733
2734         cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2735         if (IS_ERR(cs))
2736                 goto err_obj;
2737
2738         /* Semaphore target: spin until zero */
2739         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2740
2741         *cs++ = MI_SEMAPHORE_WAIT |
2742                 MI_SEMAPHORE_POLL |
2743                 MI_SEMAPHORE_SAD_EQ_SDD;
2744         *cs++ = 0;
2745         *cs++ = lower_32_bits(vma->node.start);
2746         *cs++ = upper_32_bits(vma->node.start);
2747
2748         if (*prev) {
2749                 u64 offset = (*prev)->batch->node.start;
2750
2751                 /* Terminate the spinner in the next lower priority batch. */
2752                 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2753                 *cs++ = lower_32_bits(offset);
2754                 *cs++ = upper_32_bits(offset);
2755                 *cs++ = 0;
2756         }
2757
2758         *cs++ = MI_BATCH_BUFFER_END;
2759         i915_gem_object_flush_map(obj);
2760         i915_gem_object_unpin_map(obj);
2761
2762         rq = intel_context_create_request(ce);
2763         if (IS_ERR(rq))
2764                 goto err_obj;
2765
2766         rq->batch = i915_vma_get(vma);
2767         i915_request_get(rq);
2768
2769         i915_vma_lock(vma);
2770         err = i915_request_await_object(rq, vma->obj, false);
2771         if (!err)
2772                 err = i915_vma_move_to_active(vma, rq, 0);
2773         if (!err)
2774                 err = rq->engine->emit_bb_start(rq,
2775                                                 vma->node.start,
2776                                                 PAGE_SIZE, 0);
2777         i915_vma_unlock(vma);
2778         i915_request_add(rq);
2779         if (err)
2780                 goto err_rq;
2781
2782         i915_gem_object_put(obj);
2783         intel_context_put(ce);
2784
2785         rq->client_link.next = &(*prev)->client_link;
2786         *prev = rq;
2787         return 0;
2788
2789 err_rq:
2790         i915_vma_put(rq->batch);
2791         i915_request_put(rq);
2792 err_obj:
2793         i915_gem_object_put(obj);
2794 err_ce:
2795         intel_context_put(ce);
2796         return err;
2797 }
2798
2799 static int live_preempt_gang(void *arg)
2800 {
2801         struct intel_gt *gt = arg;
2802         struct intel_engine_cs *engine;
2803         enum intel_engine_id id;
2804
2805         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2806                 return 0;
2807
2808         /*
2809          * Build as long a chain of preempters as we can, with each
2810          * request higher priority than the last. Once we are ready, we release
2811          * the last batch which then precolates down the chain, each releasing
2812          * the next oldest in turn. The intent is to simply push as hard as we
2813          * can with the number of preemptions, trying to exceed narrow HW
2814          * limits. At a minimum, we insist that we can sort all the user
2815          * high priority levels into execution order.
2816          */
2817
2818         for_each_engine(engine, gt, id) {
2819                 struct i915_request *rq = NULL;
2820                 struct igt_live_test t;
2821                 IGT_TIMEOUT(end_time);
2822                 int prio = 0;
2823                 int err = 0;
2824                 u32 *cs;
2825
2826                 if (!intel_engine_has_preemption(engine))
2827                         continue;
2828
2829                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2830                         return -EIO;
2831
2832                 do {
2833                         struct i915_sched_attr attr = {
2834                                 .priority = I915_USER_PRIORITY(prio++),
2835                         };
2836
2837                         err = create_gang(engine, &rq);
2838                         if (err)
2839                                 break;
2840
2841                         /* Submit each spinner at increasing priority */
2842                         engine->schedule(rq, &attr);
2843
2844                         if (prio <= I915_PRIORITY_MAX)
2845                                 continue;
2846
2847                         if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2848                                 break;
2849
2850                         if (__igt_timeout(end_time, NULL))
2851                                 break;
2852                 } while (1);
2853                 pr_debug("%s: Preempt chain of %d requests\n",
2854                          engine->name, prio);
2855
2856                 /*
2857                  * Such that the last spinner is the highest priority and
2858                  * should execute first. When that spinner completes,
2859                  * it will terminate the next lowest spinner until there
2860                  * are no more spinners and the gang is complete.
2861                  */
2862                 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2863                 if (!IS_ERR(cs)) {
2864                         *cs = 0;
2865                         i915_gem_object_unpin_map(rq->batch->obj);
2866                 } else {
2867                         err = PTR_ERR(cs);
2868                         intel_gt_set_wedged(gt);
2869                 }
2870
2871                 while (rq) { /* wait for each rq from highest to lowest prio */
2872                         struct i915_request *n =
2873                                 list_next_entry(rq, client_link);
2874
2875                         if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2876                                 struct drm_printer p =
2877                                         drm_info_printer(engine->i915->drm.dev);
2878
2879                                 pr_err("Failed to flush chain of %d requests, at %d\n",
2880                                        prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2881                                 intel_engine_dump(engine, &p,
2882                                                   "%s\n", engine->name);
2883
2884                                 err = -ETIME;
2885                         }
2886
2887                         i915_vma_put(rq->batch);
2888                         i915_request_put(rq);
2889                         rq = n;
2890                 }
2891
2892                 if (igt_live_test_end(&t))
2893                         err = -EIO;
2894                 if (err)
2895                         return err;
2896         }
2897
2898         return 0;
2899 }
2900
2901 static struct i915_vma *
2902 create_gpr_user(struct intel_engine_cs *engine,
2903                 struct i915_vma *result,
2904                 unsigned int offset)
2905 {
2906         struct drm_i915_gem_object *obj;
2907         struct i915_vma *vma;
2908         u32 *cs;
2909         int err;
2910         int i;
2911
2912         obj = i915_gem_object_create_internal(engine->i915, 4096);
2913         if (IS_ERR(obj))
2914                 return ERR_CAST(obj);
2915
2916         vma = i915_vma_instance(obj, result->vm, NULL);
2917         if (IS_ERR(vma)) {
2918                 i915_gem_object_put(obj);
2919                 return vma;
2920         }
2921
2922         err = i915_vma_pin(vma, 0, 0, PIN_USER);
2923         if (err) {
2924                 i915_vma_put(vma);
2925                 return ERR_PTR(err);
2926         }
2927
2928         cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2929         if (IS_ERR(cs)) {
2930                 i915_vma_put(vma);
2931                 return ERR_CAST(cs);
2932         }
2933
2934         /* All GPR are clear for new contexts. We use GPR(0) as a constant */
2935         *cs++ = MI_LOAD_REGISTER_IMM(1);
2936         *cs++ = CS_GPR(engine, 0);
2937         *cs++ = 1;
2938
2939         for (i = 1; i < NUM_GPR; i++) {
2940                 u64 addr;
2941
2942                 /*
2943                  * Perform: GPR[i]++
2944                  *
2945                  * As we read and write into the context saved GPR[i], if
2946                  * we restart this batch buffer from an earlier point, we
2947                  * will repeat the increment and store a value > 1.
2948                  */
2949                 *cs++ = MI_MATH(4);
2950                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
2951                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
2952                 *cs++ = MI_MATH_ADD;
2953                 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
2954
2955                 addr = result->node.start + offset + i * sizeof(*cs);
2956                 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
2957                 *cs++ = CS_GPR(engine, 2 * i);
2958                 *cs++ = lower_32_bits(addr);
2959                 *cs++ = upper_32_bits(addr);
2960
2961                 *cs++ = MI_SEMAPHORE_WAIT |
2962                         MI_SEMAPHORE_POLL |
2963                         MI_SEMAPHORE_SAD_GTE_SDD;
2964                 *cs++ = i;
2965                 *cs++ = lower_32_bits(result->node.start);
2966                 *cs++ = upper_32_bits(result->node.start);
2967         }
2968
2969         *cs++ = MI_BATCH_BUFFER_END;
2970         i915_gem_object_flush_map(obj);
2971         i915_gem_object_unpin_map(obj);
2972
2973         return vma;
2974 }
2975
2976 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
2977 {
2978         struct drm_i915_gem_object *obj;
2979         struct i915_vma *vma;
2980         int err;
2981
2982         obj = i915_gem_object_create_internal(gt->i915, sz);
2983         if (IS_ERR(obj))
2984                 return ERR_CAST(obj);
2985
2986         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
2987         if (IS_ERR(vma)) {
2988                 i915_gem_object_put(obj);
2989                 return vma;
2990         }
2991
2992         err = i915_ggtt_pin(vma, 0, 0);
2993         if (err) {
2994                 i915_vma_put(vma);
2995                 return ERR_PTR(err);
2996         }
2997
2998         return vma;
2999 }
3000
3001 static struct i915_request *
3002 create_gpr_client(struct intel_engine_cs *engine,
3003                   struct i915_vma *global,
3004                   unsigned int offset)
3005 {
3006         struct i915_vma *batch, *vma;
3007         struct intel_context *ce;
3008         struct i915_request *rq;
3009         int err;
3010
3011         ce = intel_context_create(engine);
3012         if (IS_ERR(ce))
3013                 return ERR_CAST(ce);
3014
3015         vma = i915_vma_instance(global->obj, ce->vm, NULL);
3016         if (IS_ERR(vma)) {
3017                 err = PTR_ERR(vma);
3018                 goto out_ce;
3019         }
3020
3021         err = i915_vma_pin(vma, 0, 0, PIN_USER);
3022         if (err)
3023                 goto out_ce;
3024
3025         batch = create_gpr_user(engine, vma, offset);
3026         if (IS_ERR(batch)) {
3027                 err = PTR_ERR(batch);
3028                 goto out_vma;
3029         }
3030
3031         rq = intel_context_create_request(ce);
3032         if (IS_ERR(rq)) {
3033                 err = PTR_ERR(rq);
3034                 goto out_batch;
3035         }
3036
3037         i915_vma_lock(vma);
3038         err = i915_request_await_object(rq, vma->obj, false);
3039         if (!err)
3040                 err = i915_vma_move_to_active(vma, rq, 0);
3041         i915_vma_unlock(vma);
3042
3043         i915_vma_lock(batch);
3044         if (!err)
3045                 err = i915_request_await_object(rq, batch->obj, false);
3046         if (!err)
3047                 err = i915_vma_move_to_active(batch, rq, 0);
3048         if (!err)
3049                 err = rq->engine->emit_bb_start(rq,
3050                                                 batch->node.start,
3051                                                 PAGE_SIZE, 0);
3052         i915_vma_unlock(batch);
3053         i915_vma_unpin(batch);
3054
3055         if (!err)
3056                 i915_request_get(rq);
3057         i915_request_add(rq);
3058
3059 out_batch:
3060         i915_vma_put(batch);
3061 out_vma:
3062         i915_vma_unpin(vma);
3063 out_ce:
3064         intel_context_put(ce);
3065         return err ? ERR_PTR(err) : rq;
3066 }
3067
3068 static int preempt_user(struct intel_engine_cs *engine,
3069                         struct i915_vma *global,
3070                         int id)
3071 {
3072         struct i915_sched_attr attr = {
3073                 .priority = I915_PRIORITY_MAX
3074         };
3075         struct i915_request *rq;
3076         int err = 0;
3077         u32 *cs;
3078
3079         rq = intel_engine_create_kernel_request(engine);
3080         if (IS_ERR(rq))
3081                 return PTR_ERR(rq);
3082
3083         cs = intel_ring_begin(rq, 4);
3084         if (IS_ERR(cs)) {
3085                 i915_request_add(rq);
3086                 return PTR_ERR(cs);
3087         }
3088
3089         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3090         *cs++ = i915_ggtt_offset(global);
3091         *cs++ = 0;
3092         *cs++ = id;
3093
3094         intel_ring_advance(rq, cs);
3095
3096         i915_request_get(rq);
3097         i915_request_add(rq);
3098
3099         engine->schedule(rq, &attr);
3100
3101         if (i915_request_wait(rq, 0, HZ / 2) < 0)
3102                 err = -ETIME;
3103         i915_request_put(rq);
3104
3105         return err;
3106 }
3107
3108 static int live_preempt_user(void *arg)
3109 {
3110         struct intel_gt *gt = arg;
3111         struct intel_engine_cs *engine;
3112         struct i915_vma *global;
3113         enum intel_engine_id id;
3114         u32 *result;
3115         int err = 0;
3116
3117         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3118                 return 0;
3119
3120         /*
3121          * In our other tests, we look at preemption in carefully
3122          * controlled conditions in the ringbuffer. Since most of the
3123          * time is spent in user batches, most of our preemptions naturally
3124          * occur there. We want to verify that when we preempt inside a batch
3125          * we continue on from the current instruction and do not roll back
3126          * to the start, or another earlier arbitration point.
3127          *
3128          * To verify this, we create a batch which is a mixture of
3129          * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3130          * a few preempting contexts thrown into the mix, we look for any
3131          * repeated instructions (which show up as incorrect values).
3132          */
3133
3134         global = create_global(gt, 4096);
3135         if (IS_ERR(global))
3136                 return PTR_ERR(global);
3137
3138         result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
3139         if (IS_ERR(result)) {
3140                 i915_vma_unpin_and_release(&global, 0);
3141                 return PTR_ERR(result);
3142         }
3143
3144         for_each_engine(engine, gt, id) {
3145                 struct i915_request *client[3] = {};
3146                 struct igt_live_test t;
3147                 int i;
3148
3149                 if (!intel_engine_has_preemption(engine))
3150                         continue;
3151
3152                 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3153                         continue; /* we need per-context GPR */
3154
3155                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3156                         err = -EIO;
3157                         break;
3158                 }
3159
3160                 memset(result, 0, 4096);
3161
3162                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3163                         struct i915_request *rq;
3164
3165                         rq = create_gpr_client(engine, global,
3166                                                NUM_GPR * i * sizeof(u32));
3167                         if (IS_ERR(rq))
3168                                 goto end_test;
3169
3170                         client[i] = rq;
3171                 }
3172
3173                 /* Continuously preempt the set of 3 running contexts */
3174                 for (i = 1; i <= NUM_GPR; i++) {
3175                         err = preempt_user(engine, global, i);
3176                         if (err)
3177                                 goto end_test;
3178                 }
3179
3180                 if (READ_ONCE(result[0]) != NUM_GPR) {
3181                         pr_err("%s: Failed to release semaphore\n",
3182                                engine->name);
3183                         err = -EIO;
3184                         goto end_test;
3185                 }
3186
3187                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3188                         int gpr;
3189
3190                         if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3191                                 err = -ETIME;
3192                                 goto end_test;
3193                         }
3194
3195                         for (gpr = 1; gpr < NUM_GPR; gpr++) {
3196                                 if (result[NUM_GPR * i + gpr] != 1) {
3197                                         pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3198                                                engine->name,
3199                                                i, gpr, result[NUM_GPR * i + gpr]);
3200                                         err = -EINVAL;
3201                                         goto end_test;
3202                                 }
3203                         }
3204                 }
3205
3206 end_test:
3207                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3208                         if (!client[i])
3209                                 break;
3210
3211                         i915_request_put(client[i]);
3212                 }
3213
3214                 /* Flush the semaphores on error */
3215                 smp_store_mb(result[0], -1);
3216                 if (igt_live_test_end(&t))
3217                         err = -EIO;
3218                 if (err)
3219                         break;
3220         }
3221
3222         i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3223         return err;
3224 }
3225
3226 static int live_preempt_timeout(void *arg)
3227 {
3228         struct intel_gt *gt = arg;
3229         struct i915_gem_context *ctx_hi, *ctx_lo;
3230         struct igt_spinner spin_lo;
3231         struct intel_engine_cs *engine;
3232         enum intel_engine_id id;
3233         int err = -ENOMEM;
3234
3235         /*
3236          * Check that we force preemption to occur by cancelling the previous
3237          * context if it refuses to yield the GPU.
3238          */
3239         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3240                 return 0;
3241
3242         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3243                 return 0;
3244
3245         if (!intel_has_reset_engine(gt))
3246                 return 0;
3247
3248         if (igt_spinner_init(&spin_lo, gt))
3249                 return -ENOMEM;
3250
3251         ctx_hi = kernel_context(gt->i915);
3252         if (!ctx_hi)
3253                 goto err_spin_lo;
3254         ctx_hi->sched.priority =
3255                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
3256
3257         ctx_lo = kernel_context(gt->i915);
3258         if (!ctx_lo)
3259                 goto err_ctx_hi;
3260         ctx_lo->sched.priority =
3261                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
3262
3263         for_each_engine(engine, gt, id) {
3264                 unsigned long saved_timeout;
3265                 struct i915_request *rq;
3266
3267                 if (!intel_engine_has_preemption(engine))
3268                         continue;
3269
3270                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3271                                             MI_NOOP); /* preemption disabled */
3272                 if (IS_ERR(rq)) {
3273                         err = PTR_ERR(rq);
3274                         goto err_ctx_lo;
3275                 }
3276
3277                 i915_request_add(rq);
3278                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3279                         intel_gt_set_wedged(gt);
3280                         err = -EIO;
3281                         goto err_ctx_lo;
3282                 }
3283
3284                 rq = igt_request_alloc(ctx_hi, engine);
3285                 if (IS_ERR(rq)) {
3286                         igt_spinner_end(&spin_lo);
3287                         err = PTR_ERR(rq);
3288                         goto err_ctx_lo;
3289                 }
3290
3291                 /* Flush the previous CS ack before changing timeouts */
3292                 while (READ_ONCE(engine->execlists.pending[0]))
3293                         cpu_relax();
3294
3295                 saved_timeout = engine->props.preempt_timeout_ms;
3296                 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3297
3298                 i915_request_get(rq);
3299                 i915_request_add(rq);
3300
3301                 intel_engine_flush_submission(engine);
3302                 engine->props.preempt_timeout_ms = saved_timeout;
3303
3304                 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3305                         intel_gt_set_wedged(gt);
3306                         i915_request_put(rq);
3307                         err = -ETIME;
3308                         goto err_ctx_lo;
3309                 }
3310
3311                 igt_spinner_end(&spin_lo);
3312                 i915_request_put(rq);
3313         }
3314
3315         err = 0;
3316 err_ctx_lo:
3317         kernel_context_close(ctx_lo);
3318 err_ctx_hi:
3319         kernel_context_close(ctx_hi);
3320 err_spin_lo:
3321         igt_spinner_fini(&spin_lo);
3322         return err;
3323 }
3324
3325 static int random_range(struct rnd_state *rnd, int min, int max)
3326 {
3327         return i915_prandom_u32_max_state(max - min, rnd) + min;
3328 }
3329
3330 static int random_priority(struct rnd_state *rnd)
3331 {
3332         return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3333 }
3334
3335 struct preempt_smoke {
3336         struct intel_gt *gt;
3337         struct i915_gem_context **contexts;
3338         struct intel_engine_cs *engine;
3339         struct drm_i915_gem_object *batch;
3340         unsigned int ncontext;
3341         struct rnd_state prng;
3342         unsigned long count;
3343 };
3344
3345 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3346 {
3347         return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3348                                                           &smoke->prng)];
3349 }
3350
3351 static int smoke_submit(struct preempt_smoke *smoke,
3352                         struct i915_gem_context *ctx, int prio,
3353                         struct drm_i915_gem_object *batch)
3354 {
3355         struct i915_request *rq;
3356         struct i915_vma *vma = NULL;
3357         int err = 0;
3358
3359         if (batch) {
3360                 struct i915_address_space *vm;
3361
3362                 vm = i915_gem_context_get_vm_rcu(ctx);
3363                 vma = i915_vma_instance(batch, vm, NULL);
3364                 i915_vm_put(vm);
3365                 if (IS_ERR(vma))
3366                         return PTR_ERR(vma);
3367
3368                 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3369                 if (err)
3370                         return err;
3371         }
3372
3373         ctx->sched.priority = prio;
3374
3375         rq = igt_request_alloc(ctx, smoke->engine);
3376         if (IS_ERR(rq)) {
3377                 err = PTR_ERR(rq);
3378                 goto unpin;
3379         }
3380
3381         if (vma) {
3382                 i915_vma_lock(vma);
3383                 err = i915_request_await_object(rq, vma->obj, false);
3384                 if (!err)
3385                         err = i915_vma_move_to_active(vma, rq, 0);
3386                 if (!err)
3387                         err = rq->engine->emit_bb_start(rq,
3388                                                         vma->node.start,
3389                                                         PAGE_SIZE, 0);
3390                 i915_vma_unlock(vma);
3391         }
3392
3393         i915_request_add(rq);
3394
3395 unpin:
3396         if (vma)
3397                 i915_vma_unpin(vma);
3398
3399         return err;
3400 }
3401
3402 static int smoke_crescendo_thread(void *arg)
3403 {
3404         struct preempt_smoke *smoke = arg;
3405         IGT_TIMEOUT(end_time);
3406         unsigned long count;
3407
3408         count = 0;
3409         do {
3410                 struct i915_gem_context *ctx = smoke_context(smoke);
3411                 int err;
3412
3413                 err = smoke_submit(smoke,
3414                                    ctx, count % I915_PRIORITY_MAX,
3415                                    smoke->batch);
3416                 if (err)
3417                         return err;
3418
3419                 count++;
3420         } while (!__igt_timeout(end_time, NULL));
3421
3422         smoke->count = count;
3423         return 0;
3424 }
3425
3426 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3427 #define BATCH BIT(0)
3428 {
3429         struct task_struct *tsk[I915_NUM_ENGINES] = {};
3430         struct preempt_smoke arg[I915_NUM_ENGINES];
3431         struct intel_engine_cs *engine;
3432         enum intel_engine_id id;
3433         unsigned long count;
3434         int err = 0;
3435
3436         for_each_engine(engine, smoke->gt, id) {
3437                 arg[id] = *smoke;
3438                 arg[id].engine = engine;
3439                 if (!(flags & BATCH))
3440                         arg[id].batch = NULL;
3441                 arg[id].count = 0;
3442
3443                 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3444                                       "igt/smoke:%d", id);
3445                 if (IS_ERR(tsk[id])) {
3446                         err = PTR_ERR(tsk[id]);
3447                         break;
3448                 }
3449                 get_task_struct(tsk[id]);
3450         }
3451
3452         yield(); /* start all threads before we kthread_stop() */
3453
3454         count = 0;
3455         for_each_engine(engine, smoke->gt, id) {
3456                 int status;
3457
3458                 if (IS_ERR_OR_NULL(tsk[id]))
3459                         continue;
3460
3461                 status = kthread_stop(tsk[id]);
3462                 if (status && !err)
3463                         err = status;
3464
3465                 count += arg[id].count;
3466
3467                 put_task_struct(tsk[id]);
3468         }
3469
3470         pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3471                 count, flags,
3472                 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3473         return 0;
3474 }
3475
3476 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3477 {
3478         enum intel_engine_id id;
3479         IGT_TIMEOUT(end_time);
3480         unsigned long count;
3481
3482         count = 0;
3483         do {
3484                 for_each_engine(smoke->engine, smoke->gt, id) {
3485                         struct i915_gem_context *ctx = smoke_context(smoke);
3486                         int err;
3487
3488                         err = smoke_submit(smoke,
3489                                            ctx, random_priority(&smoke->prng),
3490                                            flags & BATCH ? smoke->batch : NULL);
3491                         if (err)
3492                                 return err;
3493
3494                         count++;
3495                 }
3496         } while (!__igt_timeout(end_time, NULL));
3497
3498         pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3499                 count, flags,
3500                 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3501         return 0;
3502 }
3503
3504 static int live_preempt_smoke(void *arg)
3505 {
3506         struct preempt_smoke smoke = {
3507                 .gt = arg,
3508                 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3509                 .ncontext = 1024,
3510         };
3511         const unsigned int phase[] = { 0, BATCH };
3512         struct igt_live_test t;
3513         int err = -ENOMEM;
3514         u32 *cs;
3515         int n;
3516
3517         if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3518                 return 0;
3519
3520         smoke.contexts = kmalloc_array(smoke.ncontext,
3521                                        sizeof(*smoke.contexts),
3522                                        GFP_KERNEL);
3523         if (!smoke.contexts)
3524                 return -ENOMEM;
3525
3526         smoke.batch =
3527                 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3528         if (IS_ERR(smoke.batch)) {
3529                 err = PTR_ERR(smoke.batch);
3530                 goto err_free;
3531         }
3532
3533         cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3534         if (IS_ERR(cs)) {
3535                 err = PTR_ERR(cs);
3536                 goto err_batch;
3537         }
3538         for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3539                 cs[n] = MI_ARB_CHECK;
3540         cs[n] = MI_BATCH_BUFFER_END;
3541         i915_gem_object_flush_map(smoke.batch);
3542         i915_gem_object_unpin_map(smoke.batch);
3543
3544         if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3545                 err = -EIO;
3546                 goto err_batch;
3547         }
3548
3549         for (n = 0; n < smoke.ncontext; n++) {
3550                 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3551                 if (!smoke.contexts[n])
3552                         goto err_ctx;
3553         }
3554
3555         for (n = 0; n < ARRAY_SIZE(phase); n++) {
3556                 err = smoke_crescendo(&smoke, phase[n]);
3557                 if (err)
3558                         goto err_ctx;
3559
3560                 err = smoke_random(&smoke, phase[n]);
3561                 if (err)
3562                         goto err_ctx;
3563         }
3564
3565 err_ctx:
3566         if (igt_live_test_end(&t))
3567                 err = -EIO;
3568
3569         for (n = 0; n < smoke.ncontext; n++) {
3570                 if (!smoke.contexts[n])
3571                         break;
3572                 kernel_context_close(smoke.contexts[n]);
3573         }
3574
3575 err_batch:
3576         i915_gem_object_put(smoke.batch);
3577 err_free:
3578         kfree(smoke.contexts);
3579
3580         return err;
3581 }
3582
3583 static int nop_virtual_engine(struct intel_gt *gt,
3584                               struct intel_engine_cs **siblings,
3585                               unsigned int nsibling,
3586                               unsigned int nctx,
3587                               unsigned int flags)
3588 #define CHAIN BIT(0)
3589 {
3590         IGT_TIMEOUT(end_time);
3591         struct i915_request *request[16] = {};
3592         struct intel_context *ve[16];
3593         unsigned long n, prime, nc;
3594         struct igt_live_test t;
3595         ktime_t times[2] = {};
3596         int err;
3597
3598         GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3599
3600         for (n = 0; n < nctx; n++) {
3601                 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3602                 if (IS_ERR(ve[n])) {
3603                         err = PTR_ERR(ve[n]);
3604                         nctx = n;
3605                         goto out;
3606                 }
3607
3608                 err = intel_context_pin(ve[n]);
3609                 if (err) {
3610                         intel_context_put(ve[n]);
3611                         nctx = n;
3612                         goto out;
3613                 }
3614         }
3615
3616         err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3617         if (err)
3618                 goto out;
3619
3620         for_each_prime_number_from(prime, 1, 8192) {
3621                 times[1] = ktime_get_raw();
3622
3623                 if (flags & CHAIN) {
3624                         for (nc = 0; nc < nctx; nc++) {
3625                                 for (n = 0; n < prime; n++) {
3626                                         struct i915_request *rq;
3627
3628                                         rq = i915_request_create(ve[nc]);
3629                                         if (IS_ERR(rq)) {
3630                                                 err = PTR_ERR(rq);
3631                                                 goto out;
3632                                         }
3633
3634                                         if (request[nc])
3635                                                 i915_request_put(request[nc]);
3636                                         request[nc] = i915_request_get(rq);
3637                                         i915_request_add(rq);
3638                                 }
3639                         }
3640                 } else {
3641                         for (n = 0; n < prime; n++) {
3642                                 for (nc = 0; nc < nctx; nc++) {
3643                                         struct i915_request *rq;
3644
3645                                         rq = i915_request_create(ve[nc]);
3646                                         if (IS_ERR(rq)) {
3647                                                 err = PTR_ERR(rq);
3648                                                 goto out;
3649                                         }
3650
3651                                         if (request[nc])
3652                                                 i915_request_put(request[nc]);
3653                                         request[nc] = i915_request_get(rq);
3654                                         i915_request_add(rq);
3655                                 }
3656                         }
3657                 }
3658
3659                 for (nc = 0; nc < nctx; nc++) {
3660                         if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3661                                 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3662                                        __func__, ve[0]->engine->name,
3663                                        request[nc]->fence.context,
3664                                        request[nc]->fence.seqno);
3665
3666                                 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3667                                           __func__, ve[0]->engine->name,
3668                                           request[nc]->fence.context,
3669                                           request[nc]->fence.seqno);
3670                                 GEM_TRACE_DUMP();
3671                                 intel_gt_set_wedged(gt);
3672                                 break;
3673                         }
3674                 }
3675
3676                 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3677                 if (prime == 1)
3678                         times[0] = times[1];
3679
3680                 for (nc = 0; nc < nctx; nc++) {
3681                         i915_request_put(request[nc]);
3682                         request[nc] = NULL;
3683                 }
3684
3685                 if (__igt_timeout(end_time, NULL))
3686                         break;
3687         }
3688
3689         err = igt_live_test_end(&t);
3690         if (err)
3691                 goto out;
3692
3693         pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3694                 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3695                 prime, div64_u64(ktime_to_ns(times[1]), prime));
3696
3697 out:
3698         if (igt_flush_test(gt->i915))
3699                 err = -EIO;
3700
3701         for (nc = 0; nc < nctx; nc++) {
3702                 i915_request_put(request[nc]);
3703                 intel_context_unpin(ve[nc]);
3704                 intel_context_put(ve[nc]);
3705         }
3706         return err;
3707 }
3708
3709 static int live_virtual_engine(void *arg)
3710 {
3711         struct intel_gt *gt = arg;
3712         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3713         struct intel_engine_cs *engine;
3714         enum intel_engine_id id;
3715         unsigned int class, inst;
3716         int err;
3717
3718         if (intel_uc_uses_guc_submission(&gt->uc))
3719                 return 0;
3720
3721         for_each_engine(engine, gt, id) {
3722                 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3723                 if (err) {
3724                         pr_err("Failed to wrap engine %s: err=%d\n",
3725                                engine->name, err);
3726                         return err;
3727                 }
3728         }
3729
3730         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3731                 int nsibling, n;
3732
3733                 nsibling = 0;
3734                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3735                         if (!gt->engine_class[class][inst])
3736                                 continue;
3737
3738                         siblings[nsibling++] = gt->engine_class[class][inst];
3739                 }
3740                 if (nsibling < 2)
3741                         continue;
3742
3743                 for (n = 1; n <= nsibling + 1; n++) {
3744                         err = nop_virtual_engine(gt, siblings, nsibling,
3745                                                  n, 0);
3746                         if (err)
3747                                 return err;
3748                 }
3749
3750                 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3751                 if (err)
3752                         return err;
3753         }
3754
3755         return 0;
3756 }
3757
3758 static int mask_virtual_engine(struct intel_gt *gt,
3759                                struct intel_engine_cs **siblings,
3760                                unsigned int nsibling)
3761 {
3762         struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3763         struct intel_context *ve;
3764         struct igt_live_test t;
3765         unsigned int n;
3766         int err;
3767
3768         /*
3769          * Check that by setting the execution mask on a request, we can
3770          * restrict it to our desired engine within the virtual engine.
3771          */
3772
3773         ve = intel_execlists_create_virtual(siblings, nsibling);
3774         if (IS_ERR(ve)) {
3775                 err = PTR_ERR(ve);
3776                 goto out_close;
3777         }
3778
3779         err = intel_context_pin(ve);
3780         if (err)
3781                 goto out_put;
3782
3783         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3784         if (err)
3785                 goto out_unpin;
3786
3787         for (n = 0; n < nsibling; n++) {
3788                 request[n] = i915_request_create(ve);
3789                 if (IS_ERR(request[n])) {
3790                         err = PTR_ERR(request[n]);
3791                         nsibling = n;
3792                         goto out;
3793                 }
3794
3795                 /* Reverse order as it's more likely to be unnatural */
3796                 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3797
3798                 i915_request_get(request[n]);
3799                 i915_request_add(request[n]);
3800         }
3801
3802         for (n = 0; n < nsibling; n++) {
3803                 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3804                         pr_err("%s(%s): wait for %llx:%lld timed out\n",
3805                                __func__, ve->engine->name,
3806                                request[n]->fence.context,
3807                                request[n]->fence.seqno);
3808
3809                         GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3810                                   __func__, ve->engine->name,
3811                                   request[n]->fence.context,
3812                                   request[n]->fence.seqno);
3813                         GEM_TRACE_DUMP();
3814                         intel_gt_set_wedged(gt);
3815                         err = -EIO;
3816                         goto out;
3817                 }
3818
3819                 if (request[n]->engine != siblings[nsibling - n - 1]) {
3820                         pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3821                                request[n]->engine->name,
3822                                siblings[nsibling - n - 1]->name);
3823                         err = -EINVAL;
3824                         goto out;
3825                 }
3826         }
3827
3828         err = igt_live_test_end(&t);
3829 out:
3830         if (igt_flush_test(gt->i915))
3831                 err = -EIO;
3832
3833         for (n = 0; n < nsibling; n++)
3834                 i915_request_put(request[n]);
3835
3836 out_unpin:
3837         intel_context_unpin(ve);
3838 out_put:
3839         intel_context_put(ve);
3840 out_close:
3841         return err;
3842 }
3843
3844 static int live_virtual_mask(void *arg)
3845 {
3846         struct intel_gt *gt = arg;
3847         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3848         unsigned int class, inst;
3849         int err;
3850
3851         if (intel_uc_uses_guc_submission(&gt->uc))
3852                 return 0;
3853
3854         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3855                 unsigned int nsibling;
3856
3857                 nsibling = 0;
3858                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3859                         if (!gt->engine_class[class][inst])
3860                                 break;
3861
3862                         siblings[nsibling++] = gt->engine_class[class][inst];
3863                 }
3864                 if (nsibling < 2)
3865                         continue;
3866
3867                 err = mask_virtual_engine(gt, siblings, nsibling);
3868                 if (err)
3869                         return err;
3870         }
3871
3872         return 0;
3873 }
3874
3875 static int preserved_virtual_engine(struct intel_gt *gt,
3876                                     struct intel_engine_cs **siblings,
3877                                     unsigned int nsibling)
3878 {
3879         struct i915_request *last = NULL;
3880         struct intel_context *ve;
3881         struct i915_vma *scratch;
3882         struct igt_live_test t;
3883         unsigned int n;
3884         int err = 0;
3885         u32 *cs;
3886
3887         scratch = create_scratch(siblings[0]->gt);
3888         if (IS_ERR(scratch))
3889                 return PTR_ERR(scratch);
3890
3891         err = i915_vma_sync(scratch);
3892         if (err)
3893                 goto out_scratch;
3894
3895         ve = intel_execlists_create_virtual(siblings, nsibling);
3896         if (IS_ERR(ve)) {
3897                 err = PTR_ERR(ve);
3898                 goto out_scratch;
3899         }
3900
3901         err = intel_context_pin(ve);
3902         if (err)
3903                 goto out_put;
3904
3905         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3906         if (err)
3907                 goto out_unpin;
3908
3909         for (n = 0; n < NUM_GPR_DW; n++) {
3910                 struct intel_engine_cs *engine = siblings[n % nsibling];
3911                 struct i915_request *rq;
3912
3913                 rq = i915_request_create(ve);
3914                 if (IS_ERR(rq)) {
3915                         err = PTR_ERR(rq);
3916                         goto out_end;
3917                 }
3918
3919                 i915_request_put(last);
3920                 last = i915_request_get(rq);
3921
3922                 cs = intel_ring_begin(rq, 8);
3923                 if (IS_ERR(cs)) {
3924                         i915_request_add(rq);
3925                         err = PTR_ERR(cs);
3926                         goto out_end;
3927                 }
3928
3929                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3930                 *cs++ = CS_GPR(engine, n);
3931                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3932                 *cs++ = 0;
3933
3934                 *cs++ = MI_LOAD_REGISTER_IMM(1);
3935                 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3936                 *cs++ = n + 1;
3937
3938                 *cs++ = MI_NOOP;
3939                 intel_ring_advance(rq, cs);
3940
3941                 /* Restrict this request to run on a particular engine */
3942                 rq->execution_mask = engine->mask;
3943                 i915_request_add(rq);
3944         }
3945
3946         if (i915_request_wait(last, 0, HZ / 5) < 0) {
3947                 err = -ETIME;
3948                 goto out_end;
3949         }
3950
3951         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3952         if (IS_ERR(cs)) {
3953                 err = PTR_ERR(cs);
3954                 goto out_end;
3955         }
3956
3957         for (n = 0; n < NUM_GPR_DW; n++) {
3958                 if (cs[n] != n) {
3959                         pr_err("Incorrect value[%d] found for GPR[%d]\n",
3960                                cs[n], n);
3961                         err = -EINVAL;
3962                         break;
3963                 }
3964         }
3965
3966         i915_gem_object_unpin_map(scratch->obj);
3967
3968 out_end:
3969         if (igt_live_test_end(&t))
3970                 err = -EIO;
3971         i915_request_put(last);
3972 out_unpin:
3973         intel_context_unpin(ve);
3974 out_put:
3975         intel_context_put(ve);
3976 out_scratch:
3977         i915_vma_unpin_and_release(&scratch, 0);
3978         return err;
3979 }
3980
3981 static int live_virtual_preserved(void *arg)
3982 {
3983         struct intel_gt *gt = arg;
3984         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3985         unsigned int class, inst;
3986
3987         /*
3988          * Check that the context image retains non-privileged (user) registers
3989          * from one engine to the next. For this we check that the CS_GPR
3990          * are preserved.
3991          */
3992
3993         if (intel_uc_uses_guc_submission(&gt->uc))
3994                 return 0;
3995
3996         /* As we use CS_GPR we cannot run before they existed on all engines. */
3997         if (INTEL_GEN(gt->i915) < 9)
3998                 return 0;
3999
4000         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4001                 int nsibling, err;
4002
4003                 nsibling = 0;
4004                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4005                         if (!gt->engine_class[class][inst])
4006                                 continue;
4007
4008                         siblings[nsibling++] = gt->engine_class[class][inst];
4009                 }
4010                 if (nsibling < 2)
4011                         continue;
4012
4013                 err = preserved_virtual_engine(gt, siblings, nsibling);
4014                 if (err)
4015                         return err;
4016         }
4017
4018         return 0;
4019 }
4020
4021 static int bond_virtual_engine(struct intel_gt *gt,
4022                                unsigned int class,
4023                                struct intel_engine_cs **siblings,
4024                                unsigned int nsibling,
4025                                unsigned int flags)
4026 #define BOND_SCHEDULE BIT(0)
4027 {
4028         struct intel_engine_cs *master;
4029         struct i915_request *rq[16];
4030         enum intel_engine_id id;
4031         struct igt_spinner spin;
4032         unsigned long n;
4033         int err;
4034
4035         /*
4036          * A set of bonded requests is intended to be run concurrently
4037          * across a number of engines. We use one request per-engine
4038          * and a magic fence to schedule each of the bonded requests
4039          * at the same time. A consequence of our current scheduler is that
4040          * we only move requests to the HW ready queue when the request
4041          * becomes ready, that is when all of its prerequisite fences have
4042          * been signaled. As one of those fences is the master submit fence,
4043          * there is a delay on all secondary fences as the HW may be
4044          * currently busy. Equally, as all the requests are independent,
4045          * they may have other fences that delay individual request
4046          * submission to HW. Ergo, we do not guarantee that all requests are
4047          * immediately submitted to HW at the same time, just that if the
4048          * rules are abided by, they are ready at the same time as the
4049          * first is submitted. Userspace can embed semaphores in its batch
4050          * to ensure parallel execution of its phases as it requires.
4051          * Though naturally it gets requested that perhaps the scheduler should
4052          * take care of parallel execution, even across preemption events on
4053          * different HW. (The proper answer is of course "lalalala".)
4054          *
4055          * With the submit-fence, we have identified three possible phases
4056          * of synchronisation depending on the master fence: queued (not
4057          * ready), executing, and signaled. The first two are quite simple
4058          * and checked below. However, the signaled master fence handling is
4059          * contentious. Currently we do not distinguish between a signaled
4060          * fence and an expired fence, as once signaled it does not convey
4061          * any information about the previous execution. It may even be freed
4062          * and hence checking later it may not exist at all. Ergo we currently
4063          * do not apply the bonding constraint for an already signaled fence,
4064          * as our expectation is that it should not constrain the secondaries
4065          * and is outside of the scope of the bonded request API (i.e. all
4066          * userspace requests are meant to be running in parallel). As
4067          * it imposes no constraint, and is effectively a no-op, we do not
4068          * check below as normal execution flows are checked extensively above.
4069          *
4070          * XXX Is the degenerate handling of signaled submit fences the
4071          * expected behaviour for userpace?
4072          */
4073
4074         GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4075
4076         if (igt_spinner_init(&spin, gt))
4077                 return -ENOMEM;
4078
4079         err = 0;
4080         rq[0] = ERR_PTR(-ENOMEM);
4081         for_each_engine(master, gt, id) {
4082                 struct i915_sw_fence fence = {};
4083                 struct intel_context *ce;
4084
4085                 if (master->class == class)
4086                         continue;
4087
4088                 ce = intel_context_create(master);
4089                 if (IS_ERR(ce)) {
4090                         err = PTR_ERR(ce);
4091                         goto out;
4092                 }
4093
4094                 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4095
4096                 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4097                 intel_context_put(ce);
4098                 if (IS_ERR(rq[0])) {
4099                         err = PTR_ERR(rq[0]);
4100                         goto out;
4101                 }
4102                 i915_request_get(rq[0]);
4103
4104                 if (flags & BOND_SCHEDULE) {
4105                         onstack_fence_init(&fence);
4106                         err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4107                                                                &fence,
4108                                                                GFP_KERNEL);
4109                 }
4110
4111                 i915_request_add(rq[0]);
4112                 if (err < 0)
4113                         goto out;
4114
4115                 if (!(flags & BOND_SCHEDULE) &&
4116                     !igt_wait_for_spinner(&spin, rq[0])) {
4117                         err = -EIO;
4118                         goto out;
4119                 }
4120
4121                 for (n = 0; n < nsibling; n++) {
4122                         struct intel_context *ve;
4123
4124                         ve = intel_execlists_create_virtual(siblings, nsibling);
4125                         if (IS_ERR(ve)) {
4126                                 err = PTR_ERR(ve);
4127                                 onstack_fence_fini(&fence);
4128                                 goto out;
4129                         }
4130
4131                         err = intel_virtual_engine_attach_bond(ve->engine,
4132                                                                master,
4133                                                                siblings[n]);
4134                         if (err) {
4135                                 intel_context_put(ve);
4136                                 onstack_fence_fini(&fence);
4137                                 goto out;
4138                         }
4139
4140                         err = intel_context_pin(ve);
4141                         intel_context_put(ve);
4142                         if (err) {
4143                                 onstack_fence_fini(&fence);
4144                                 goto out;
4145                         }
4146
4147                         rq[n + 1] = i915_request_create(ve);
4148                         intel_context_unpin(ve);
4149                         if (IS_ERR(rq[n + 1])) {
4150                                 err = PTR_ERR(rq[n + 1]);
4151                                 onstack_fence_fini(&fence);
4152                                 goto out;
4153                         }
4154                         i915_request_get(rq[n + 1]);
4155
4156                         err = i915_request_await_execution(rq[n + 1],
4157                                                            &rq[0]->fence,
4158                                                            ve->engine->bond_execute);
4159                         i915_request_add(rq[n + 1]);
4160                         if (err < 0) {
4161                                 onstack_fence_fini(&fence);
4162                                 goto out;
4163                         }
4164                 }
4165                 onstack_fence_fini(&fence);
4166                 intel_engine_flush_submission(master);
4167                 igt_spinner_end(&spin);
4168
4169                 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4170                         pr_err("Master request did not execute (on %s)!\n",
4171                                rq[0]->engine->name);
4172                         err = -EIO;
4173                         goto out;
4174                 }
4175
4176                 for (n = 0; n < nsibling; n++) {
4177                         if (i915_request_wait(rq[n + 1], 0,
4178                                               MAX_SCHEDULE_TIMEOUT) < 0) {
4179                                 err = -EIO;
4180                                 goto out;
4181                         }
4182
4183                         if (rq[n + 1]->engine != siblings[n]) {
4184                                 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4185                                        siblings[n]->name,
4186                                        rq[n + 1]->engine->name,
4187                                        rq[0]->engine->name);
4188                                 err = -EINVAL;
4189                                 goto out;
4190                         }
4191                 }
4192
4193                 for (n = 0; !IS_ERR(rq[n]); n++)
4194                         i915_request_put(rq[n]);
4195                 rq[0] = ERR_PTR(-ENOMEM);
4196         }
4197
4198 out:
4199         for (n = 0; !IS_ERR(rq[n]); n++)
4200                 i915_request_put(rq[n]);
4201         if (igt_flush_test(gt->i915))
4202                 err = -EIO;
4203
4204         igt_spinner_fini(&spin);
4205         return err;
4206 }
4207
4208 static int live_virtual_bond(void *arg)
4209 {
4210         static const struct phase {
4211                 const char *name;
4212                 unsigned int flags;
4213         } phases[] = {
4214                 { "", 0 },
4215                 { "schedule", BOND_SCHEDULE },
4216                 { },
4217         };
4218         struct intel_gt *gt = arg;
4219         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4220         unsigned int class, inst;
4221         int err;
4222
4223         if (intel_uc_uses_guc_submission(&gt->uc))
4224                 return 0;
4225
4226         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4227                 const struct phase *p;
4228                 int nsibling;
4229
4230                 nsibling = 0;
4231                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4232                         if (!gt->engine_class[class][inst])
4233                                 break;
4234
4235                         GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
4236                         siblings[nsibling++] = gt->engine_class[class][inst];
4237                 }
4238                 if (nsibling < 2)
4239                         continue;
4240
4241                 for (p = phases; p->name; p++) {
4242                         err = bond_virtual_engine(gt,
4243                                                   class, siblings, nsibling,
4244                                                   p->flags);
4245                         if (err) {
4246                                 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4247                                        __func__, p->name, class, nsibling, err);
4248                                 return err;
4249                         }
4250                 }
4251         }
4252
4253         return 0;
4254 }
4255
4256 static int reset_virtual_engine(struct intel_gt *gt,
4257                                 struct intel_engine_cs **siblings,
4258                                 unsigned int nsibling)
4259 {
4260         struct intel_engine_cs *engine;
4261         struct intel_context *ve;
4262         struct igt_spinner spin;
4263         struct i915_request *rq;
4264         unsigned int n;
4265         int err = 0;
4266
4267         /*
4268          * In order to support offline error capture for fast preempt reset,
4269          * we need to decouple the guilty request and ensure that it and its
4270          * descendents are not executed while the capture is in progress.
4271          */
4272
4273         if (igt_spinner_init(&spin, gt))
4274                 return -ENOMEM;
4275
4276         ve = intel_execlists_create_virtual(siblings, nsibling);
4277         if (IS_ERR(ve)) {
4278                 err = PTR_ERR(ve);
4279                 goto out_spin;
4280         }
4281
4282         for (n = 0; n < nsibling; n++)
4283                 engine_heartbeat_disable(siblings[n]);
4284
4285         rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4286         if (IS_ERR(rq)) {
4287                 err = PTR_ERR(rq);
4288                 goto out_heartbeat;
4289         }
4290         i915_request_add(rq);
4291
4292         if (!igt_wait_for_spinner(&spin, rq)) {
4293                 intel_gt_set_wedged(gt);
4294                 err = -ETIME;
4295                 goto out_heartbeat;
4296         }
4297
4298         engine = rq->engine;
4299         GEM_BUG_ON(engine == ve->engine);
4300
4301         /* Take ownership of the reset and tasklet */
4302         if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4303                              &gt->reset.flags)) {
4304                 intel_gt_set_wedged(gt);
4305                 err = -EBUSY;
4306                 goto out_heartbeat;
4307         }
4308         tasklet_disable(&engine->execlists.tasklet);
4309
4310         engine->execlists.tasklet.func(engine->execlists.tasklet.data);
4311         GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4312
4313         /* Fake a preemption event; failed of course */
4314         spin_lock_irq(&engine->active.lock);
4315         __unwind_incomplete_requests(engine);
4316         spin_unlock_irq(&engine->active.lock);
4317         GEM_BUG_ON(rq->engine != ve->engine);
4318
4319         /* Reset the engine while keeping our active request on hold */
4320         execlists_hold(engine, rq);
4321         GEM_BUG_ON(!i915_request_on_hold(rq));
4322
4323         intel_engine_reset(engine, NULL);
4324         GEM_BUG_ON(rq->fence.error != -EIO);
4325
4326         /* Release our grasp on the engine, letting CS flow again */
4327         tasklet_enable(&engine->execlists.tasklet);
4328         clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
4329
4330         /* Check that we do not resubmit the held request */
4331         i915_request_get(rq);
4332         if (!i915_request_wait(rq, 0, HZ / 5)) {
4333                 pr_err("%s: on hold request completed!\n",
4334                        engine->name);
4335                 intel_gt_set_wedged(gt);
4336                 err = -EIO;
4337                 goto out_rq;
4338         }
4339         GEM_BUG_ON(!i915_request_on_hold(rq));
4340
4341         /* But is resubmitted on release */
4342         execlists_unhold(engine, rq);
4343         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4344                 pr_err("%s: held request did not complete!\n",
4345                        engine->name);
4346                 intel_gt_set_wedged(gt);
4347                 err = -ETIME;
4348         }
4349
4350 out_rq:
4351         i915_request_put(rq);
4352 out_heartbeat:
4353         for (n = 0; n < nsibling; n++)
4354                 engine_heartbeat_enable(siblings[n]);
4355
4356         intel_context_put(ve);
4357 out_spin:
4358         igt_spinner_fini(&spin);
4359         return err;
4360 }
4361
4362 static int live_virtual_reset(void *arg)
4363 {
4364         struct intel_gt *gt = arg;
4365         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4366         unsigned int class, inst;
4367
4368         /*
4369          * Check that we handle a reset event within a virtual engine.
4370          * Only the physical engine is reset, but we have to check the flow
4371          * of the virtual requests around the reset, and make sure it is not
4372          * forgotten.
4373          */
4374
4375         if (intel_uc_uses_guc_submission(&gt->uc))
4376                 return 0;
4377
4378         if (!intel_has_reset_engine(gt))
4379                 return 0;
4380
4381         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4382                 int nsibling, err;
4383
4384                 nsibling = 0;
4385                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4386                         if (!gt->engine_class[class][inst])
4387                                 continue;
4388
4389                         siblings[nsibling++] = gt->engine_class[class][inst];
4390                 }
4391                 if (nsibling < 2)
4392                         continue;
4393
4394                 err = reset_virtual_engine(gt, siblings, nsibling);
4395                 if (err)
4396                         return err;
4397         }
4398
4399         return 0;
4400 }
4401
4402 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4403 {
4404         static const struct i915_subtest tests[] = {
4405                 SUBTEST(live_sanitycheck),
4406                 SUBTEST(live_unlite_switch),
4407                 SUBTEST(live_unlite_preempt),
4408                 SUBTEST(live_pin_rewind),
4409                 SUBTEST(live_hold_reset),
4410                 SUBTEST(live_error_interrupt),
4411                 SUBTEST(live_timeslice_preempt),
4412                 SUBTEST(live_timeslice_rewind),
4413                 SUBTEST(live_timeslice_queue),
4414                 SUBTEST(live_timeslice_nopreempt),
4415                 SUBTEST(live_busywait_preempt),
4416                 SUBTEST(live_preempt),
4417                 SUBTEST(live_late_preempt),
4418                 SUBTEST(live_nopreempt),
4419                 SUBTEST(live_preempt_cancel),
4420                 SUBTEST(live_suppress_self_preempt),
4421                 SUBTEST(live_suppress_wait_preempt),
4422                 SUBTEST(live_chain_preempt),
4423                 SUBTEST(live_preempt_gang),
4424                 SUBTEST(live_preempt_timeout),
4425                 SUBTEST(live_preempt_user),
4426                 SUBTEST(live_preempt_smoke),
4427                 SUBTEST(live_virtual_engine),
4428                 SUBTEST(live_virtual_mask),
4429                 SUBTEST(live_virtual_preserved),
4430                 SUBTEST(live_virtual_bond),
4431                 SUBTEST(live_virtual_reset),
4432         };
4433
4434         if (!HAS_EXECLISTS(i915))
4435                 return 0;
4436
4437         if (intel_gt_is_wedged(&i915->gt))
4438                 return 0;
4439
4440         return intel_gt_live_subtests(tests, &i915->gt);
4441 }
4442
4443 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
4444 {
4445         const u32 offset =
4446                 i915_ggtt_offset(ce->engine->status_page.vma) +
4447                 offset_in_page(slot);
4448         struct i915_request *rq;
4449         u32 *cs;
4450
4451         rq = intel_context_create_request(ce);
4452         if (IS_ERR(rq))
4453                 return PTR_ERR(rq);
4454
4455         cs = intel_ring_begin(rq, 4);
4456         if (IS_ERR(cs)) {
4457                 i915_request_add(rq);
4458                 return PTR_ERR(cs);
4459         }
4460
4461         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4462         *cs++ = offset;
4463         *cs++ = 0;
4464         *cs++ = 1;
4465
4466         intel_ring_advance(rq, cs);
4467
4468         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4469         i915_request_add(rq);
4470         return 0;
4471 }
4472
4473 static int context_flush(struct intel_context *ce, long timeout)
4474 {
4475         struct i915_request *rq;
4476         struct dma_fence *fence;
4477         int err = 0;
4478
4479         rq = intel_engine_create_kernel_request(ce->engine);
4480         if (IS_ERR(rq))
4481                 return PTR_ERR(rq);
4482
4483         fence = i915_active_fence_get(&ce->timeline->last_request);
4484         if (fence) {
4485                 i915_request_await_dma_fence(rq, fence);
4486                 dma_fence_put(fence);
4487         }
4488
4489         rq = i915_request_get(rq);
4490         i915_request_add(rq);
4491         if (i915_request_wait(rq, 0, timeout) < 0)
4492                 err = -ETIME;
4493         i915_request_put(rq);
4494
4495         rmb(); /* We know the request is written, make sure all state is too! */
4496         return err;
4497 }
4498
4499 static int live_lrc_layout(void *arg)
4500 {
4501         struct intel_gt *gt = arg;
4502         struct intel_engine_cs *engine;
4503         enum intel_engine_id id;
4504         u32 *lrc;
4505         int err;
4506
4507         /*
4508          * Check the registers offsets we use to create the initial reg state
4509          * match the layout saved by HW.
4510          */
4511
4512         lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4513         if (!lrc)
4514                 return -ENOMEM;
4515
4516         err = 0;
4517         for_each_engine(engine, gt, id) {
4518                 u32 *hw;
4519                 int dw;
4520
4521                 if (!engine->default_state)
4522                         continue;
4523
4524                 hw = shmem_pin_map(engine->default_state);
4525                 if (IS_ERR(hw)) {
4526                         err = PTR_ERR(hw);
4527                         break;
4528                 }
4529                 hw += LRC_STATE_OFFSET / sizeof(*hw);
4530
4531                 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4532                                          engine->kernel_context,
4533                                          engine,
4534                                          engine->kernel_context->ring,
4535                                          true);
4536
4537                 dw = 0;
4538                 do {
4539                         u32 lri = hw[dw];
4540
4541                         if (lri == 0) {
4542                                 dw++;
4543                                 continue;
4544                         }
4545
4546                         if (lrc[dw] == 0) {
4547                                 pr_debug("%s: skipped instruction %x at dword %d\n",
4548                                          engine->name, lri, dw);
4549                                 dw++;
4550                                 continue;
4551                         }
4552
4553                         if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4554                                 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4555                                        engine->name, dw, lri);
4556                                 err = -EINVAL;
4557                                 break;
4558                         }
4559
4560                         if (lrc[dw] != lri) {
4561                                 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4562                                        engine->name, dw, lri, lrc[dw]);
4563                                 err = -EINVAL;
4564                                 break;
4565                         }
4566
4567                         lri &= 0x7f;
4568                         lri++;
4569                         dw++;
4570
4571                         while (lri) {
4572                                 if (hw[dw] != lrc[dw]) {
4573                                         pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4574                                                engine->name, dw, hw[dw], lrc[dw]);
4575                                         err = -EINVAL;
4576                                         break;
4577                                 }
4578
4579                                 /*
4580                                  * Skip over the actual register value as we
4581                                  * expect that to differ.
4582                                  */
4583                                 dw += 2;
4584                                 lri -= 2;
4585                         }
4586                 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4587
4588                 if (err) {
4589                         pr_info("%s: HW register image:\n", engine->name);
4590                         igt_hexdump(hw, PAGE_SIZE);
4591
4592                         pr_info("%s: SW register image:\n", engine->name);
4593                         igt_hexdump(lrc, PAGE_SIZE);
4594                 }
4595
4596                 shmem_unpin_map(engine->default_state, hw);
4597                 if (err)
4598                         break;
4599         }
4600
4601         kfree(lrc);
4602         return err;
4603 }
4604
4605 static int find_offset(const u32 *lri, u32 offset)
4606 {
4607         int i;
4608
4609         for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4610                 if (lri[i] == offset)
4611                         return i;
4612
4613         return -1;
4614 }
4615
4616 static int live_lrc_fixed(void *arg)
4617 {
4618         struct intel_gt *gt = arg;
4619         struct intel_engine_cs *engine;
4620         enum intel_engine_id id;
4621         int err = 0;
4622
4623         /*
4624          * Check the assumed register offsets match the actual locations in
4625          * the context image.
4626          */
4627
4628         for_each_engine(engine, gt, id) {
4629                 const struct {
4630                         u32 reg;
4631                         u32 offset;
4632                         const char *name;
4633                 } tbl[] = {
4634                         {
4635                                 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4636                                 CTX_RING_START - 1,
4637                                 "RING_START"
4638                         },
4639                         {
4640                                 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4641                                 CTX_RING_CTL - 1,
4642                                 "RING_CTL"
4643                         },
4644                         {
4645                                 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4646                                 CTX_RING_HEAD - 1,
4647                                 "RING_HEAD"
4648                         },
4649                         {
4650                                 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4651                                 CTX_RING_TAIL - 1,
4652                                 "RING_TAIL"
4653                         },
4654                         {
4655                                 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4656                                 lrc_ring_mi_mode(engine),
4657                                 "RING_MI_MODE"
4658                         },
4659                         {
4660                                 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4661                                 CTX_BB_STATE - 1,
4662                                 "BB_STATE"
4663                         },
4664                         {
4665                                 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
4666                                 lrc_ring_wa_bb_per_ctx(engine),
4667                                 "RING_BB_PER_CTX_PTR"
4668                         },
4669                         {
4670                                 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
4671                                 lrc_ring_indirect_ptr(engine),
4672                                 "RING_INDIRECT_CTX_PTR"
4673                         },
4674                         {
4675                                 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
4676                                 lrc_ring_indirect_offset(engine),
4677                                 "RING_INDIRECT_CTX_OFFSET"
4678                         },
4679                         {
4680                                 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4681                                 CTX_TIMESTAMP - 1,
4682                                 "RING_CTX_TIMESTAMP"
4683                         },
4684                         {
4685                                 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
4686                                 lrc_ring_gpr0(engine),
4687                                 "RING_CS_GPR0"
4688                         },
4689                         {
4690                                 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
4691                                 lrc_ring_cmd_buf_cctl(engine),
4692                                 "RING_CMD_BUF_CCTL"
4693                         },
4694                         { },
4695                 }, *t;
4696                 u32 *hw;
4697
4698                 if (!engine->default_state)
4699                         continue;
4700
4701                 hw = shmem_pin_map(engine->default_state);
4702                 if (IS_ERR(hw)) {
4703                         err = PTR_ERR(hw);
4704                         break;
4705                 }
4706                 hw += LRC_STATE_OFFSET / sizeof(*hw);
4707
4708                 for (t = tbl; t->name; t++) {
4709                         int dw = find_offset(hw, t->reg);
4710
4711                         if (dw != t->offset) {
4712                                 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4713                                        engine->name,
4714                                        t->name,
4715                                        t->reg,
4716                                        dw,
4717                                        t->offset);
4718                                 err = -EINVAL;
4719                         }
4720                 }
4721
4722                 shmem_unpin_map(engine->default_state, hw);
4723         }
4724
4725         return err;
4726 }
4727
4728 static int __live_lrc_state(struct intel_engine_cs *engine,
4729                             struct i915_vma *scratch)
4730 {
4731         struct intel_context *ce;
4732         struct i915_request *rq;
4733         enum {
4734                 RING_START_IDX = 0,
4735                 RING_TAIL_IDX,
4736                 MAX_IDX
4737         };
4738         u32 expected[MAX_IDX];
4739         u32 *cs;
4740         int err;
4741         int n;
4742
4743         ce = intel_context_create(engine);
4744         if (IS_ERR(ce))
4745                 return PTR_ERR(ce);
4746
4747         err = intel_context_pin(ce);
4748         if (err)
4749                 goto err_put;
4750
4751         rq = i915_request_create(ce);
4752         if (IS_ERR(rq)) {
4753                 err = PTR_ERR(rq);
4754                 goto err_unpin;
4755         }
4756
4757         cs = intel_ring_begin(rq, 4 * MAX_IDX);
4758         if (IS_ERR(cs)) {
4759                 err = PTR_ERR(cs);
4760                 i915_request_add(rq);
4761                 goto err_unpin;
4762         }
4763
4764         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4765         *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
4766         *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
4767         *cs++ = 0;
4768
4769         expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
4770
4771         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4772         *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
4773         *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
4774         *cs++ = 0;
4775
4776         i915_vma_lock(scratch);
4777         err = i915_request_await_object(rq, scratch->obj, true);
4778         if (!err)
4779                 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4780         i915_vma_unlock(scratch);
4781
4782         i915_request_get(rq);
4783         i915_request_add(rq);
4784         if (err)
4785                 goto err_rq;
4786
4787         intel_engine_flush_submission(engine);
4788         expected[RING_TAIL_IDX] = ce->ring->tail;
4789
4790         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4791                 err = -ETIME;
4792                 goto err_rq;
4793         }
4794
4795         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4796         if (IS_ERR(cs)) {
4797                 err = PTR_ERR(cs);
4798                 goto err_rq;
4799         }
4800
4801         for (n = 0; n < MAX_IDX; n++) {
4802                 if (cs[n] != expected[n]) {
4803                         pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
4804                                engine->name, n, cs[n], expected[n]);
4805                         err = -EINVAL;
4806                         break;
4807                 }
4808         }
4809
4810         i915_gem_object_unpin_map(scratch->obj);
4811
4812 err_rq:
4813         i915_request_put(rq);
4814 err_unpin:
4815         intel_context_unpin(ce);
4816 err_put:
4817         intel_context_put(ce);
4818         return err;
4819 }
4820
4821 static int live_lrc_state(void *arg)
4822 {
4823         struct intel_gt *gt = arg;
4824         struct intel_engine_cs *engine;
4825         struct i915_vma *scratch;
4826         enum intel_engine_id id;
4827         int err = 0;
4828
4829         /*
4830          * Check the live register state matches what we expect for this
4831          * intel_context.
4832          */
4833
4834         scratch = create_scratch(gt);
4835         if (IS_ERR(scratch))
4836                 return PTR_ERR(scratch);
4837
4838         for_each_engine(engine, gt, id) {
4839                 err = __live_lrc_state(engine, scratch);
4840                 if (err)
4841                         break;
4842         }
4843
4844         if (igt_flush_test(gt->i915))
4845                 err = -EIO;
4846
4847         i915_vma_unpin_and_release(&scratch, 0);
4848         return err;
4849 }
4850
4851 static int gpr_make_dirty(struct intel_context *ce)
4852 {
4853         struct i915_request *rq;
4854         u32 *cs;
4855         int n;
4856
4857         rq = intel_context_create_request(ce);
4858         if (IS_ERR(rq))
4859                 return PTR_ERR(rq);
4860
4861         cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
4862         if (IS_ERR(cs)) {
4863                 i915_request_add(rq);
4864                 return PTR_ERR(cs);
4865         }
4866
4867         *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
4868         for (n = 0; n < NUM_GPR_DW; n++) {
4869                 *cs++ = CS_GPR(ce->engine, n);
4870                 *cs++ = STACK_MAGIC;
4871         }
4872         *cs++ = MI_NOOP;
4873
4874         intel_ring_advance(rq, cs);
4875
4876         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4877         i915_request_add(rq);
4878
4879         return 0;
4880 }
4881
4882 static struct i915_request *
4883 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
4884 {
4885         const u32 offset =
4886                 i915_ggtt_offset(ce->engine->status_page.vma) +
4887                 offset_in_page(slot);
4888         struct i915_request *rq;
4889         u32 *cs;
4890         int err;
4891         int n;
4892
4893         rq = intel_context_create_request(ce);
4894         if (IS_ERR(rq))
4895                 return rq;
4896
4897         cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
4898         if (IS_ERR(cs)) {
4899                 i915_request_add(rq);
4900                 return ERR_CAST(cs);
4901         }
4902
4903         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4904         *cs++ = MI_NOOP;
4905
4906         *cs++ = MI_SEMAPHORE_WAIT |
4907                 MI_SEMAPHORE_GLOBAL_GTT |
4908                 MI_SEMAPHORE_POLL |
4909                 MI_SEMAPHORE_SAD_NEQ_SDD;
4910         *cs++ = 0;
4911         *cs++ = offset;
4912         *cs++ = 0;
4913
4914         for (n = 0; n < NUM_GPR_DW; n++) {
4915                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4916                 *cs++ = CS_GPR(ce->engine, n);
4917                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4918                 *cs++ = 0;
4919         }
4920
4921         i915_vma_lock(scratch);
4922         err = i915_request_await_object(rq, scratch->obj, true);
4923         if (!err)
4924                 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4925         i915_vma_unlock(scratch);
4926
4927         i915_request_get(rq);
4928         i915_request_add(rq);
4929         if (err) {
4930                 i915_request_put(rq);
4931                 rq = ERR_PTR(err);
4932         }
4933
4934         return rq;
4935 }
4936
4937 static int __live_lrc_gpr(struct intel_engine_cs *engine,
4938                           struct i915_vma *scratch,
4939                           bool preempt)
4940 {
4941         u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
4942         struct intel_context *ce;
4943         struct i915_request *rq;
4944         u32 *cs;
4945         int err;
4946         int n;
4947
4948         if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
4949                 return 0; /* GPR only on rcs0 for gen8 */
4950
4951         err = gpr_make_dirty(engine->kernel_context);
4952         if (err)
4953                 return err;
4954
4955         ce = intel_context_create(engine);
4956         if (IS_ERR(ce))
4957                 return PTR_ERR(ce);
4958
4959         rq = __gpr_read(ce, scratch, slot);
4960         if (IS_ERR(rq)) {
4961                 err = PTR_ERR(rq);
4962                 goto err_put;
4963         }
4964
4965         err = wait_for_submit(engine, rq, HZ / 2);
4966         if (err)
4967                 goto err_rq;
4968
4969         if (preempt) {
4970                 err = gpr_make_dirty(engine->kernel_context);
4971                 if (err)
4972                         goto err_rq;
4973
4974                 err = emit_semaphore_signal(engine->kernel_context, slot);
4975                 if (err)
4976                         goto err_rq;
4977         } else {
4978                 slot[0] = 1;
4979                 wmb();
4980         }
4981
4982         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4983                 err = -ETIME;
4984                 goto err_rq;
4985         }
4986
4987         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4988         if (IS_ERR(cs)) {
4989                 err = PTR_ERR(cs);
4990                 goto err_rq;
4991         }
4992
4993         for (n = 0; n < NUM_GPR_DW; n++) {
4994                 if (cs[n]) {
4995                         pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4996                                engine->name,
4997                                n / 2, n & 1 ? "udw" : "ldw",
4998                                cs[n]);
4999                         err = -EINVAL;
5000                         break;
5001                 }
5002         }
5003
5004         i915_gem_object_unpin_map(scratch->obj);
5005
5006 err_rq:
5007         memset32(&slot[0], -1, 4);
5008         wmb();
5009         i915_request_put(rq);
5010 err_put:
5011         intel_context_put(ce);
5012         return err;
5013 }
5014
5015 static int live_lrc_gpr(void *arg)
5016 {
5017         struct intel_gt *gt = arg;
5018         struct intel_engine_cs *engine;
5019         struct i915_vma *scratch;
5020         enum intel_engine_id id;
5021         int err = 0;
5022
5023         /*
5024          * Check that GPR registers are cleared in new contexts as we need
5025          * to avoid leaking any information from previous contexts.
5026          */
5027
5028         scratch = create_scratch(gt);
5029         if (IS_ERR(scratch))
5030                 return PTR_ERR(scratch);
5031
5032         for_each_engine(engine, gt, id) {
5033                 engine_heartbeat_disable(engine);
5034
5035                 err = __live_lrc_gpr(engine, scratch, false);
5036                 if (err)
5037                         goto err;
5038
5039                 err = __live_lrc_gpr(engine, scratch, true);
5040                 if (err)
5041                         goto err;
5042
5043 err:
5044                 engine_heartbeat_enable(engine);
5045                 if (igt_flush_test(gt->i915))
5046                         err = -EIO;
5047                 if (err)
5048                         break;
5049         }
5050
5051         i915_vma_unpin_and_release(&scratch, 0);
5052         return err;
5053 }
5054
5055 static struct i915_request *
5056 create_timestamp(struct intel_context *ce, void *slot, int idx)
5057 {
5058         const u32 offset =
5059                 i915_ggtt_offset(ce->engine->status_page.vma) +
5060                 offset_in_page(slot);
5061         struct i915_request *rq;
5062         u32 *cs;
5063         int err;
5064
5065         rq = intel_context_create_request(ce);
5066         if (IS_ERR(rq))
5067                 return rq;
5068
5069         cs = intel_ring_begin(rq, 10);
5070         if (IS_ERR(cs)) {
5071                 err = PTR_ERR(cs);
5072                 goto err;
5073         }
5074
5075         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5076         *cs++ = MI_NOOP;
5077
5078         *cs++ = MI_SEMAPHORE_WAIT |
5079                 MI_SEMAPHORE_GLOBAL_GTT |
5080                 MI_SEMAPHORE_POLL |
5081                 MI_SEMAPHORE_SAD_NEQ_SDD;
5082         *cs++ = 0;
5083         *cs++ = offset;
5084         *cs++ = 0;
5085
5086         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5087         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
5088         *cs++ = offset + idx * sizeof(u32);
5089         *cs++ = 0;
5090
5091         intel_ring_advance(rq, cs);
5092
5093         rq->sched.attr.priority = I915_PRIORITY_MASK;
5094         err = 0;
5095 err:
5096         i915_request_get(rq);
5097         i915_request_add(rq);
5098         if (err) {
5099                 i915_request_put(rq);
5100                 return ERR_PTR(err);
5101         }
5102
5103         return rq;
5104 }
5105
5106 struct lrc_timestamp {
5107         struct intel_engine_cs *engine;
5108         struct intel_context *ce[2];
5109         u32 poison;
5110 };
5111
5112 static bool timestamp_advanced(u32 start, u32 end)
5113 {
5114         return (s32)(end - start) > 0;
5115 }
5116
5117 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
5118 {
5119         u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
5120         struct i915_request *rq;
5121         u32 timestamp;
5122         int err = 0;
5123
5124         arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
5125         rq = create_timestamp(arg->ce[0], slot, 1);
5126         if (IS_ERR(rq))
5127                 return PTR_ERR(rq);
5128
5129         err = wait_for_submit(rq->engine, rq, HZ / 2);
5130         if (err)
5131                 goto err;
5132
5133         if (preempt) {
5134                 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
5135                 err = emit_semaphore_signal(arg->ce[1], slot);
5136                 if (err)
5137                         goto err;
5138         } else {
5139                 slot[0] = 1;
5140                 wmb();
5141         }
5142
5143         /* And wait for switch to kernel (to save our context to memory) */
5144         err = context_flush(arg->ce[0], HZ / 2);
5145         if (err)
5146                 goto err;
5147
5148         if (!timestamp_advanced(arg->poison, slot[1])) {
5149                 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
5150                        arg->engine->name, preempt ? "preempt" : "simple",
5151                        arg->poison, slot[1]);
5152                 err = -EINVAL;
5153         }
5154
5155         timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
5156         if (!timestamp_advanced(slot[1], timestamp)) {
5157                 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
5158                        arg->engine->name, preempt ? "preempt" : "simple",
5159                        slot[1], timestamp);
5160                 err = -EINVAL;
5161         }
5162
5163 err:
5164         memset32(slot, -1, 4);
5165         i915_request_put(rq);
5166         return err;
5167 }
5168
5169 static int live_lrc_timestamp(void *arg)
5170 {
5171         struct lrc_timestamp data = {};
5172         struct intel_gt *gt = arg;
5173         enum intel_engine_id id;
5174         const u32 poison[] = {
5175                 0,
5176                 S32_MAX,
5177                 (u32)S32_MAX + 1,
5178                 U32_MAX,
5179         };
5180
5181         /*
5182          * We want to verify that the timestamp is saved and restore across
5183          * context switches and is monotonic.
5184          *
5185          * So we do this with a little bit of LRC poisoning to check various
5186          * boundary conditions, and see what happens if we preempt the context
5187          * with a second request (carrying more poison into the timestamp).
5188          */
5189
5190         for_each_engine(data.engine, gt, id) {
5191                 int i, err = 0;
5192
5193                 engine_heartbeat_disable(data.engine);
5194
5195                 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5196                         struct intel_context *tmp;
5197
5198                         tmp = intel_context_create(data.engine);
5199                         if (IS_ERR(tmp)) {
5200                                 err = PTR_ERR(tmp);
5201                                 goto err;
5202                         }
5203
5204                         err = intel_context_pin(tmp);
5205                         if (err) {
5206                                 intel_context_put(tmp);
5207                                 goto err;
5208                         }
5209
5210                         data.ce[i] = tmp;
5211                 }
5212
5213                 for (i = 0; i < ARRAY_SIZE(poison); i++) {
5214                         data.poison = poison[i];
5215
5216                         err = __lrc_timestamp(&data, false);
5217                         if (err)
5218                                 break;
5219
5220                         err = __lrc_timestamp(&data, true);
5221                         if (err)
5222                                 break;
5223                 }
5224
5225 err:
5226                 engine_heartbeat_enable(data.engine);
5227                 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5228                         if (!data.ce[i])
5229                                 break;
5230
5231                         intel_context_unpin(data.ce[i]);
5232                         intel_context_put(data.ce[i]);
5233                 }
5234
5235                 if (igt_flush_test(gt->i915))
5236                         err = -EIO;
5237                 if (err)
5238                         return err;
5239         }
5240
5241         return 0;
5242 }
5243
5244 static struct i915_vma *
5245 create_user_vma(struct i915_address_space *vm, unsigned long size)
5246 {
5247         struct drm_i915_gem_object *obj;
5248         struct i915_vma *vma;
5249         int err;
5250
5251         obj = i915_gem_object_create_internal(vm->i915, size);
5252         if (IS_ERR(obj))
5253                 return ERR_CAST(obj);
5254
5255         vma = i915_vma_instance(obj, vm, NULL);
5256         if (IS_ERR(vma)) {
5257                 i915_gem_object_put(obj);
5258                 return vma;
5259         }
5260
5261         err = i915_vma_pin(vma, 0, 0, PIN_USER);
5262         if (err) {
5263                 i915_gem_object_put(obj);
5264                 return ERR_PTR(err);
5265         }
5266
5267         return vma;
5268 }
5269
5270 static struct i915_vma *
5271 store_context(struct intel_context *ce, struct i915_vma *scratch)
5272 {
5273         struct i915_vma *batch;
5274         u32 dw, x, *cs, *hw;
5275         u32 *defaults;
5276
5277         batch = create_user_vma(ce->vm, SZ_64K);
5278         if (IS_ERR(batch))
5279                 return batch;
5280
5281         cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5282         if (IS_ERR(cs)) {
5283                 i915_vma_put(batch);
5284                 return ERR_CAST(cs);
5285         }
5286
5287         defaults = shmem_pin_map(ce->engine->default_state);
5288         if (!defaults) {
5289                 i915_gem_object_unpin_map(batch->obj);
5290                 i915_vma_put(batch);
5291                 return ERR_PTR(-ENOMEM);
5292         }
5293
5294         x = 0;
5295         dw = 0;
5296         hw = defaults;
5297         hw += LRC_STATE_OFFSET / sizeof(*hw);
5298         do {
5299                 u32 len = hw[dw] & 0x7f;
5300
5301                 if (hw[dw] == 0) {
5302                         dw++;
5303                         continue;
5304                 }
5305
5306                 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5307                         dw += len + 2;
5308                         continue;
5309                 }
5310
5311                 dw++;
5312                 len = (len + 1) / 2;
5313                 while (len--) {
5314                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
5315                         *cs++ = hw[dw];
5316                         *cs++ = lower_32_bits(scratch->node.start + x);
5317                         *cs++ = upper_32_bits(scratch->node.start + x);
5318
5319                         dw += 2;
5320                         x += 4;
5321                 }
5322         } while (dw < PAGE_SIZE / sizeof(u32) &&
5323                  (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5324
5325         *cs++ = MI_BATCH_BUFFER_END;
5326
5327         shmem_unpin_map(ce->engine->default_state, defaults);
5328
5329         i915_gem_object_flush_map(batch->obj);
5330         i915_gem_object_unpin_map(batch->obj);
5331
5332         return batch;
5333 }
5334
5335 static int move_to_active(struct i915_request *rq,
5336                           struct i915_vma *vma,
5337                           unsigned int flags)
5338 {
5339         int err;
5340
5341         i915_vma_lock(vma);
5342         err = i915_request_await_object(rq, vma->obj, flags);
5343         if (!err)
5344                 err = i915_vma_move_to_active(vma, rq, flags);
5345         i915_vma_unlock(vma);
5346
5347         return err;
5348 }
5349
5350 static struct i915_request *
5351 record_registers(struct intel_context *ce,
5352                  struct i915_vma *before,
5353                  struct i915_vma *after,
5354                  u32 *sema)
5355 {
5356         struct i915_vma *b_before, *b_after;
5357         struct i915_request *rq;
5358         u32 *cs;
5359         int err;
5360
5361         b_before = store_context(ce, before);
5362         if (IS_ERR(b_before))
5363                 return ERR_CAST(b_before);
5364
5365         b_after = store_context(ce, after);
5366         if (IS_ERR(b_after)) {
5367                 rq = ERR_CAST(b_after);
5368                 goto err_before;
5369         }
5370
5371         rq = intel_context_create_request(ce);
5372         if (IS_ERR(rq))
5373                 goto err_after;
5374
5375         err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
5376         if (err)
5377                 goto err_rq;
5378
5379         err = move_to_active(rq, b_before, 0);
5380         if (err)
5381                 goto err_rq;
5382
5383         err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
5384         if (err)
5385                 goto err_rq;
5386
5387         err = move_to_active(rq, b_after, 0);
5388         if (err)
5389                 goto err_rq;
5390
5391         cs = intel_ring_begin(rq, 14);
5392         if (IS_ERR(cs)) {
5393                 err = PTR_ERR(cs);
5394                 goto err_rq;
5395         }
5396
5397         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5398         *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5399         *cs++ = lower_32_bits(b_before->node.start);
5400         *cs++ = upper_32_bits(b_before->node.start);
5401
5402         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5403         *cs++ = MI_SEMAPHORE_WAIT |
5404                 MI_SEMAPHORE_GLOBAL_GTT |
5405                 MI_SEMAPHORE_POLL |
5406                 MI_SEMAPHORE_SAD_NEQ_SDD;
5407         *cs++ = 0;
5408         *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5409                 offset_in_page(sema);
5410         *cs++ = 0;
5411         *cs++ = MI_NOOP;
5412
5413         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5414         *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5415         *cs++ = lower_32_bits(b_after->node.start);
5416         *cs++ = upper_32_bits(b_after->node.start);
5417
5418         intel_ring_advance(rq, cs);
5419
5420         WRITE_ONCE(*sema, 0);
5421         i915_request_get(rq);
5422         i915_request_add(rq);
5423 err_after:
5424         i915_vma_put(b_after);
5425 err_before:
5426         i915_vma_put(b_before);
5427         return rq;
5428
5429 err_rq:
5430         i915_request_add(rq);
5431         rq = ERR_PTR(err);
5432         goto err_after;
5433 }
5434
5435 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
5436 {
5437         struct i915_vma *batch;
5438         u32 dw, *cs, *hw;
5439         u32 *defaults;
5440
5441         batch = create_user_vma(ce->vm, SZ_64K);
5442         if (IS_ERR(batch))
5443                 return batch;
5444
5445         cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5446         if (IS_ERR(cs)) {
5447                 i915_vma_put(batch);
5448                 return ERR_CAST(cs);
5449         }
5450
5451         defaults = shmem_pin_map(ce->engine->default_state);
5452         if (!defaults) {
5453                 i915_gem_object_unpin_map(batch->obj);
5454                 i915_vma_put(batch);
5455                 return ERR_PTR(-ENOMEM);
5456         }
5457
5458         dw = 0;
5459         hw = defaults;
5460         hw += LRC_STATE_OFFSET / sizeof(*hw);
5461         do {
5462                 u32 len = hw[dw] & 0x7f;
5463
5464                 if (hw[dw] == 0) {
5465                         dw++;
5466                         continue;
5467                 }
5468
5469                 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5470                         dw += len + 2;
5471                         continue;
5472                 }
5473
5474                 dw++;
5475                 len = (len + 1) / 2;
5476                 *cs++ = MI_LOAD_REGISTER_IMM(len);
5477                 while (len--) {
5478                         *cs++ = hw[dw];
5479                         *cs++ = poison;
5480                         dw += 2;
5481                 }
5482         } while (dw < PAGE_SIZE / sizeof(u32) &&
5483                  (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5484
5485         *cs++ = MI_BATCH_BUFFER_END;
5486
5487         shmem_unpin_map(ce->engine->default_state, defaults);
5488
5489         i915_gem_object_flush_map(batch->obj);
5490         i915_gem_object_unpin_map(batch->obj);
5491
5492         return batch;
5493 }
5494
5495 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
5496 {
5497         struct i915_request *rq;
5498         struct i915_vma *batch;
5499         u32 *cs;
5500         int err;
5501
5502         batch = load_context(ce, poison);
5503         if (IS_ERR(batch))
5504                 return PTR_ERR(batch);
5505
5506         rq = intel_context_create_request(ce);
5507         if (IS_ERR(rq)) {
5508                 err = PTR_ERR(rq);
5509                 goto err_batch;
5510         }
5511
5512         err = move_to_active(rq, batch, 0);
5513         if (err)
5514                 goto err_rq;
5515
5516         cs = intel_ring_begin(rq, 8);
5517         if (IS_ERR(cs)) {
5518                 err = PTR_ERR(cs);
5519                 goto err_rq;
5520         }
5521
5522         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5523         *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5524         *cs++ = lower_32_bits(batch->node.start);
5525         *cs++ = upper_32_bits(batch->node.start);
5526
5527         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
5528         *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5529                 offset_in_page(sema);
5530         *cs++ = 0;
5531         *cs++ = 1;
5532
5533         intel_ring_advance(rq, cs);
5534
5535         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5536 err_rq:
5537         i915_request_add(rq);
5538 err_batch:
5539         i915_vma_put(batch);
5540         return err;
5541 }
5542
5543 static bool is_moving(u32 a, u32 b)
5544 {
5545         return a != b;
5546 }
5547
5548 static int compare_isolation(struct intel_engine_cs *engine,
5549                              struct i915_vma *ref[2],
5550                              struct i915_vma *result[2],
5551                              struct intel_context *ce,
5552                              u32 poison)
5553 {
5554         u32 x, dw, *hw, *lrc;
5555         u32 *A[2], *B[2];
5556         u32 *defaults;
5557         int err = 0;
5558
5559         A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
5560         if (IS_ERR(A[0]))
5561                 return PTR_ERR(A[0]);
5562
5563         A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
5564         if (IS_ERR(A[1])) {
5565                 err = PTR_ERR(A[1]);
5566                 goto err_A0;
5567         }
5568
5569         B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
5570         if (IS_ERR(B[0])) {
5571                 err = PTR_ERR(B[0]);
5572                 goto err_A1;
5573         }
5574
5575         B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
5576         if (IS_ERR(B[1])) {
5577                 err = PTR_ERR(B[1]);
5578                 goto err_B0;
5579         }
5580
5581         lrc = i915_gem_object_pin_map(ce->state->obj,
5582                                       i915_coherent_map_type(engine->i915));
5583         if (IS_ERR(lrc)) {
5584                 err = PTR_ERR(lrc);
5585                 goto err_B1;
5586         }
5587         lrc += LRC_STATE_OFFSET / sizeof(*hw);
5588
5589         defaults = shmem_pin_map(ce->engine->default_state);
5590         if (!defaults) {
5591                 err = -ENOMEM;
5592                 goto err_lrc;
5593         }
5594
5595         x = 0;
5596         dw = 0;
5597         hw = defaults;
5598         hw += LRC_STATE_OFFSET / sizeof(*hw);
5599         do {
5600                 u32 len = hw[dw] & 0x7f;
5601
5602                 if (hw[dw] == 0) {
5603                         dw++;
5604                         continue;
5605                 }
5606
5607                 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5608                         dw += len + 2;
5609                         continue;
5610                 }
5611
5612                 dw++;
5613                 len = (len + 1) / 2;
5614                 while (len--) {
5615                         if (!is_moving(A[0][x], A[1][x]) &&
5616                             (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
5617                                 switch (hw[dw] & 4095) {
5618                                 case 0x30: /* RING_HEAD */
5619                                 case 0x34: /* RING_TAIL */
5620                                         break;
5621
5622                                 default:
5623                                         pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5624                                                engine->name, dw,
5625                                                hw[dw], hw[dw + 1],
5626                                                A[0][x], B[0][x], B[1][x],
5627                                                poison, lrc[dw + 1]);
5628                                         err = -EINVAL;
5629                                 }
5630                         }
5631                         dw += 2;
5632                         x++;
5633                 }
5634         } while (dw < PAGE_SIZE / sizeof(u32) &&
5635                  (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5636
5637         shmem_unpin_map(ce->engine->default_state, defaults);
5638 err_lrc:
5639         i915_gem_object_unpin_map(ce->state->obj);
5640 err_B1:
5641         i915_gem_object_unpin_map(result[1]->obj);
5642 err_B0:
5643         i915_gem_object_unpin_map(result[0]->obj);
5644 err_A1:
5645         i915_gem_object_unpin_map(ref[1]->obj);
5646 err_A0:
5647         i915_gem_object_unpin_map(ref[0]->obj);
5648         return err;
5649 }
5650
5651 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
5652 {
5653         u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
5654         struct i915_vma *ref[2], *result[2];
5655         struct intel_context *A, *B;
5656         struct i915_request *rq;
5657         int err;
5658
5659         A = intel_context_create(engine);
5660         if (IS_ERR(A))
5661                 return PTR_ERR(A);
5662
5663         B = intel_context_create(engine);
5664         if (IS_ERR(B)) {
5665                 err = PTR_ERR(B);
5666                 goto err_A;
5667         }
5668
5669         ref[0] = create_user_vma(A->vm, SZ_64K);
5670         if (IS_ERR(ref[0])) {
5671                 err = PTR_ERR(ref[0]);
5672                 goto err_B;
5673         }
5674
5675         ref[1] = create_user_vma(A->vm, SZ_64K);
5676         if (IS_ERR(ref[1])) {
5677                 err = PTR_ERR(ref[1]);
5678                 goto err_ref0;
5679         }
5680
5681         rq = record_registers(A, ref[0], ref[1], sema);
5682         if (IS_ERR(rq)) {
5683                 err = PTR_ERR(rq);
5684                 goto err_ref1;
5685         }
5686
5687         WRITE_ONCE(*sema, 1);
5688         wmb();
5689
5690         if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5691                 i915_request_put(rq);
5692                 err = -ETIME;
5693                 goto err_ref1;
5694         }
5695         i915_request_put(rq);
5696
5697         result[0] = create_user_vma(A->vm, SZ_64K);
5698         if (IS_ERR(result[0])) {
5699                 err = PTR_ERR(result[0]);
5700                 goto err_ref1;
5701         }
5702
5703         result[1] = create_user_vma(A->vm, SZ_64K);
5704         if (IS_ERR(result[1])) {
5705                 err = PTR_ERR(result[1]);
5706                 goto err_result0;
5707         }
5708
5709         rq = record_registers(A, result[0], result[1], sema);
5710         if (IS_ERR(rq)) {
5711                 err = PTR_ERR(rq);
5712                 goto err_result1;
5713         }
5714
5715         err = poison_registers(B, poison, sema);
5716         if (err) {
5717                 WRITE_ONCE(*sema, -1);
5718                 i915_request_put(rq);
5719                 goto err_result1;
5720         }
5721
5722         if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5723                 i915_request_put(rq);
5724                 err = -ETIME;
5725                 goto err_result1;
5726         }
5727         i915_request_put(rq);
5728
5729         err = compare_isolation(engine, ref, result, A, poison);
5730
5731 err_result1:
5732         i915_vma_put(result[1]);
5733 err_result0:
5734         i915_vma_put(result[0]);
5735 err_ref1:
5736         i915_vma_put(ref[1]);
5737 err_ref0:
5738         i915_vma_put(ref[0]);
5739 err_B:
5740         intel_context_put(B);
5741 err_A:
5742         intel_context_put(A);
5743         return err;
5744 }
5745
5746 static bool skip_isolation(const struct intel_engine_cs *engine)
5747 {
5748         if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
5749                 return true;
5750
5751         if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
5752                 return true;
5753
5754         return false;
5755 }
5756
5757 static int live_lrc_isolation(void *arg)
5758 {
5759         struct intel_gt *gt = arg;
5760         struct intel_engine_cs *engine;
5761         enum intel_engine_id id;
5762         const u32 poison[] = {
5763                 STACK_MAGIC,
5764                 0x3a3a3a3a,
5765                 0x5c5c5c5c,
5766                 0xffffffff,
5767                 0xffff0000,
5768         };
5769         int err = 0;
5770
5771         /*
5772          * Our goal is try and verify that per-context state cannot be
5773          * tampered with by another non-privileged client.
5774          *
5775          * We take the list of context registers from the LRI in the default
5776          * context image and attempt to modify that list from a remote context.
5777          */
5778
5779         for_each_engine(engine, gt, id) {
5780                 int i;
5781
5782                 /* Just don't even ask */
5783                 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
5784                     skip_isolation(engine))
5785                         continue;
5786
5787                 intel_engine_pm_get(engine);
5788                 for (i = 0; i < ARRAY_SIZE(poison); i++) {
5789                         int result;
5790
5791                         result = __lrc_isolation(engine, poison[i]);
5792                         if (result && !err)
5793                                 err = result;
5794
5795                         result = __lrc_isolation(engine, ~poison[i]);
5796                         if (result && !err)
5797                                 err = result;
5798                 }
5799                 intel_engine_pm_put(engine);
5800                 if (igt_flush_test(gt->i915)) {
5801                         err = -EIO;
5802                         break;
5803                 }
5804         }
5805
5806         return err;
5807 }
5808
5809 static int indirect_ctx_submit_req(struct intel_context *ce)
5810 {
5811         struct i915_request *rq;
5812         int err = 0;
5813
5814         rq = intel_context_create_request(ce);
5815         if (IS_ERR(rq))
5816                 return PTR_ERR(rq);
5817
5818         i915_request_get(rq);
5819         i915_request_add(rq);
5820
5821         if (i915_request_wait(rq, 0, HZ / 5) < 0)
5822                 err = -ETIME;
5823
5824         i915_request_put(rq);
5825
5826         return err;
5827 }
5828
5829 #define CTX_BB_CANARY_OFFSET (3 * 1024)
5830 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
5831
5832 static u32 *
5833 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
5834 {
5835         *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
5836                 MI_SRM_LRM_GLOBAL_GTT |
5837                 MI_LRI_LRM_CS_MMIO;
5838         *cs++ = i915_mmio_reg_offset(RING_START(0));
5839         *cs++ = i915_ggtt_offset(ce->state) +
5840                 context_wa_bb_offset(ce) +
5841                 CTX_BB_CANARY_OFFSET;
5842         *cs++ = 0;
5843
5844         return cs;
5845 }
5846
5847 static void
5848 indirect_ctx_bb_setup(struct intel_context *ce)
5849 {
5850         u32 *cs = context_indirect_bb(ce);
5851
5852         cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
5853
5854         setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
5855 }
5856
5857 static bool check_ring_start(struct intel_context *ce)
5858 {
5859         const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
5860                 LRC_STATE_OFFSET + context_wa_bb_offset(ce);
5861
5862         if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
5863                 return true;
5864
5865         pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
5866                ctx_bb[CTX_BB_CANARY_INDEX],
5867                ce->lrc_reg_state[CTX_RING_START]);
5868
5869         return false;
5870 }
5871
5872 static int indirect_ctx_bb_check(struct intel_context *ce)
5873 {
5874         int err;
5875
5876         err = indirect_ctx_submit_req(ce);
5877         if (err)
5878                 return err;
5879
5880         if (!check_ring_start(ce))
5881                 return -EINVAL;
5882
5883         return 0;
5884 }
5885
5886 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
5887 {
5888         struct intel_context *a, *b;
5889         int err;
5890
5891         a = intel_context_create(engine);
5892         if (IS_ERR(a))
5893                 return PTR_ERR(a);
5894         err = intel_context_pin(a);
5895         if (err)
5896                 goto put_a;
5897
5898         b = intel_context_create(engine);
5899         if (IS_ERR(b)) {
5900                 err = PTR_ERR(b);
5901                 goto unpin_a;
5902         }
5903         err = intel_context_pin(b);
5904         if (err)
5905                 goto put_b;
5906
5907         /* We use the already reserved extra page in context state */
5908         if (!a->wa_bb_page) {
5909                 GEM_BUG_ON(b->wa_bb_page);
5910                 GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
5911                 goto unpin_b;
5912         }
5913
5914         /*
5915          * In order to test that our per context bb is truly per context,
5916          * and executes at the intended spot on context restoring process,
5917          * make the batch store the ring start value to memory.
5918          * As ring start is restored apriori of starting the indirect ctx bb and
5919          * as it will be different for each context, it fits to this purpose.
5920          */
5921         indirect_ctx_bb_setup(a);
5922         indirect_ctx_bb_setup(b);
5923
5924         err = indirect_ctx_bb_check(a);
5925         if (err)
5926                 goto unpin_b;
5927
5928         err = indirect_ctx_bb_check(b);
5929
5930 unpin_b:
5931         intel_context_unpin(b);
5932 put_b:
5933         intel_context_put(b);
5934 unpin_a:
5935         intel_context_unpin(a);
5936 put_a:
5937         intel_context_put(a);
5938
5939         return err;
5940 }
5941
5942 static int live_lrc_indirect_ctx_bb(void *arg)
5943 {
5944         struct intel_gt *gt = arg;
5945         struct intel_engine_cs *engine;
5946         enum intel_engine_id id;
5947         int err = 0;
5948
5949         for_each_engine(engine, gt, id) {
5950                 intel_engine_pm_get(engine);
5951                 err = __live_lrc_indirect_ctx_bb(engine);
5952                 intel_engine_pm_put(engine);
5953
5954                 if (igt_flush_test(gt->i915))
5955                         err = -EIO;
5956
5957                 if (err)
5958                         break;
5959         }
5960
5961         return err;
5962 }
5963
5964 static void garbage_reset(struct intel_engine_cs *engine,
5965                           struct i915_request *rq)
5966 {
5967         const unsigned int bit = I915_RESET_ENGINE + engine->id;
5968         unsigned long *lock = &engine->gt->reset.flags;
5969
5970         if (test_and_set_bit(bit, lock))
5971                 return;
5972
5973         tasklet_disable(&engine->execlists.tasklet);
5974
5975         if (!rq->fence.error)
5976                 intel_engine_reset(engine, NULL);
5977
5978         tasklet_enable(&engine->execlists.tasklet);
5979         clear_and_wake_up_bit(bit, lock);
5980 }
5981
5982 static struct i915_request *garbage(struct intel_context *ce,
5983                                     struct rnd_state *prng)
5984 {
5985         struct i915_request *rq;
5986         int err;
5987
5988         err = intel_context_pin(ce);
5989         if (err)
5990                 return ERR_PTR(err);
5991
5992         prandom_bytes_state(prng,
5993                             ce->lrc_reg_state,
5994                             ce->engine->context_size -
5995                             LRC_STATE_OFFSET);
5996
5997         rq = intel_context_create_request(ce);
5998         if (IS_ERR(rq)) {
5999                 err = PTR_ERR(rq);
6000                 goto err_unpin;
6001         }
6002
6003         i915_request_get(rq);
6004         i915_request_add(rq);
6005         return rq;
6006
6007 err_unpin:
6008         intel_context_unpin(ce);
6009         return ERR_PTR(err);
6010 }
6011
6012 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
6013 {
6014         struct intel_context *ce;
6015         struct i915_request *hang;
6016         int err = 0;
6017
6018         ce = intel_context_create(engine);
6019         if (IS_ERR(ce))
6020                 return PTR_ERR(ce);
6021
6022         hang = garbage(ce, prng);
6023         if (IS_ERR(hang)) {
6024                 err = PTR_ERR(hang);
6025                 goto err_ce;
6026         }
6027
6028         if (wait_for_submit(engine, hang, HZ / 2)) {
6029                 i915_request_put(hang);
6030                 err = -ETIME;
6031                 goto err_ce;
6032         }
6033
6034         intel_context_set_banned(ce);
6035         garbage_reset(engine, hang);
6036
6037         intel_engine_flush_submission(engine);
6038         if (!hang->fence.error) {
6039                 i915_request_put(hang);
6040                 pr_err("%s: corrupted context was not reset\n",
6041                        engine->name);
6042                 err = -EINVAL;
6043                 goto err_ce;
6044         }
6045
6046         if (i915_request_wait(hang, 0, HZ / 2) < 0) {
6047                 pr_err("%s: corrupted context did not recover\n",
6048                        engine->name);
6049                 i915_request_put(hang);
6050                 err = -EIO;
6051                 goto err_ce;
6052         }
6053         i915_request_put(hang);
6054
6055 err_ce:
6056         intel_context_put(ce);
6057         return err;
6058 }
6059
6060 static int live_lrc_garbage(void *arg)
6061 {
6062         struct intel_gt *gt = arg;
6063         struct intel_engine_cs *engine;
6064         enum intel_engine_id id;
6065
6066         /*
6067          * Verify that we can recover if one context state is completely
6068          * corrupted.
6069          */
6070
6071         if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
6072                 return 0;
6073
6074         for_each_engine(engine, gt, id) {
6075                 I915_RND_STATE(prng);
6076                 int err = 0, i;
6077
6078                 if (!intel_has_reset_engine(engine->gt))
6079                         continue;
6080
6081                 intel_engine_pm_get(engine);
6082                 for (i = 0; i < 3; i++) {
6083                         err = __lrc_garbage(engine, &prng);
6084                         if (err)
6085                                 break;
6086                 }
6087                 intel_engine_pm_put(engine);
6088
6089                 if (igt_flush_test(gt->i915))
6090                         err = -EIO;
6091                 if (err)
6092                         return err;
6093         }
6094
6095         return 0;
6096 }
6097
6098 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
6099 {
6100         struct intel_context *ce;
6101         struct i915_request *rq;
6102         IGT_TIMEOUT(end_time);
6103         int err;
6104
6105         ce = intel_context_create(engine);
6106         if (IS_ERR(ce))
6107                 return PTR_ERR(ce);
6108
6109         ce->runtime.num_underflow = 0;
6110         ce->runtime.max_underflow = 0;
6111
6112         do {
6113                 unsigned int loop = 1024;
6114
6115                 while (loop) {
6116                         rq = intel_context_create_request(ce);
6117                         if (IS_ERR(rq)) {
6118                                 err = PTR_ERR(rq);
6119                                 goto err_rq;
6120                         }
6121
6122                         if (--loop == 0)
6123                                 i915_request_get(rq);
6124
6125                         i915_request_add(rq);
6126                 }
6127
6128                 if (__igt_timeout(end_time, NULL))
6129                         break;
6130
6131                 i915_request_put(rq);
6132         } while (1);
6133
6134         err = i915_request_wait(rq, 0, HZ / 5);
6135         if (err < 0) {
6136                 pr_err("%s: request not completed!\n", engine->name);
6137                 goto err_wait;
6138         }
6139
6140         igt_flush_test(engine->i915);
6141
6142         pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
6143                 engine->name,
6144                 intel_context_get_total_runtime_ns(ce),
6145                 intel_context_get_avg_runtime_ns(ce));
6146
6147         err = 0;
6148         if (ce->runtime.num_underflow) {
6149                 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
6150                        engine->name,
6151                        ce->runtime.num_underflow,
6152                        ce->runtime.max_underflow);
6153                 GEM_TRACE_DUMP();
6154                 err = -EOVERFLOW;
6155         }
6156
6157 err_wait:
6158         i915_request_put(rq);
6159 err_rq:
6160         intel_context_put(ce);
6161         return err;
6162 }
6163
6164 static int live_pphwsp_runtime(void *arg)
6165 {
6166         struct intel_gt *gt = arg;
6167         struct intel_engine_cs *engine;
6168         enum intel_engine_id id;
6169         int err = 0;
6170
6171         /*
6172          * Check that cumulative context runtime as stored in the pphwsp[16]
6173          * is monotonic.
6174          */
6175
6176         for_each_engine(engine, gt, id) {
6177                 err = __live_pphwsp_runtime(engine);
6178                 if (err)
6179                         break;
6180         }
6181
6182         if (igt_flush_test(gt->i915))
6183                 err = -EIO;
6184
6185         return err;
6186 }
6187
6188 int intel_lrc_live_selftests(struct drm_i915_private *i915)
6189 {
6190         static const struct i915_subtest tests[] = {
6191                 SUBTEST(live_lrc_layout),
6192                 SUBTEST(live_lrc_fixed),
6193                 SUBTEST(live_lrc_state),
6194                 SUBTEST(live_lrc_gpr),
6195                 SUBTEST(live_lrc_isolation),
6196                 SUBTEST(live_lrc_timestamp),
6197                 SUBTEST(live_lrc_garbage),
6198                 SUBTEST(live_pphwsp_runtime),
6199                 SUBTEST(live_lrc_indirect_ctx_bb),
6200         };
6201
6202         if (!HAS_LOGICAL_RING_CONTEXTS(i915))
6203                 return 0;
6204
6205         return intel_gt_live_subtests(tests, &i915->gt);
6206 }