2 * SPDX-License-Identifier: MIT
4 * Copyright © 2018 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 #include "gt/selftest_engine_heartbeat.h"
14 #include "i915_selftest.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_live_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/lib_sw_fence.h"
21 #include "gem/selftests/igt_gem_utils.h"
22 #include "gem/selftests/mock_context.h"
24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
28 static bool is_active(struct i915_request *rq)
30 if (i915_request_is_active(rq))
33 if (i915_request_on_hold(rq))
36 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
42 static int wait_for_submit(struct intel_engine_cs *engine,
43 struct i915_request *rq,
44 unsigned long timeout)
46 /* Ignore our own attempts to suppress excess tasklets */
47 tasklet_hi_schedule(&engine->execlists.tasklet);
51 bool done = time_after(jiffies, timeout);
53 if (i915_request_completed(rq)) /* that was quick! */
56 /* Wait until the HW has acknowleged the submission (or err) */
57 intel_engine_flush_submission(engine);
58 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
68 static int wait_for_reset(struct intel_engine_cs *engine,
69 struct i915_request *rq,
70 unsigned long timeout)
76 intel_engine_flush_submission(engine);
78 if (READ_ONCE(engine->execlists.pending[0]))
81 if (i915_request_completed(rq))
84 if (READ_ONCE(rq->fence.error))
86 } while (time_before(jiffies, timeout));
88 flush_scheduled_work();
90 if (rq->fence.error != -EIO) {
91 pr_err("%s: hanging request %llx:%lld not reset\n",
98 /* Give the request a jiffie to complete after flushing the worker */
99 if (i915_request_wait(rq, 0,
100 max(0l, (long)(timeout - jiffies)) + 1) < 0) {
101 pr_err("%s: hanging request %llx:%lld did not complete\n",
111 static int live_sanitycheck(void *arg)
113 struct intel_gt *gt = arg;
114 struct intel_engine_cs *engine;
115 enum intel_engine_id id;
116 struct igt_spinner spin;
119 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
122 if (igt_spinner_init(&spin, gt))
125 for_each_engine(engine, gt, id) {
126 struct intel_context *ce;
127 struct i915_request *rq;
129 ce = intel_context_create(engine);
135 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
141 i915_request_add(rq);
142 if (!igt_wait_for_spinner(&spin, rq)) {
143 GEM_TRACE("spinner failed to start\n");
145 intel_gt_set_wedged(gt);
150 igt_spinner_end(&spin);
151 if (igt_flush_test(gt->i915)) {
157 intel_context_put(ce);
162 igt_spinner_fini(&spin);
166 static int live_unlite_restore(struct intel_gt *gt, int prio)
168 struct intel_engine_cs *engine;
169 enum intel_engine_id id;
170 struct igt_spinner spin;
174 * Check that we can correctly context switch between 2 instances
175 * on the same engine from the same parent context.
178 if (igt_spinner_init(&spin, gt))
182 for_each_engine(engine, gt, id) {
183 struct intel_context *ce[2] = {};
184 struct i915_request *rq[2];
185 struct igt_live_test t;
188 if (prio && !intel_engine_has_preemption(engine))
191 if (!intel_engine_can_store_dword(engine))
194 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
198 st_engine_heartbeat_disable(engine);
200 for (n = 0; n < ARRAY_SIZE(ce); n++) {
201 struct intel_context *tmp;
203 tmp = intel_context_create(engine);
209 err = intel_context_pin(tmp);
211 intel_context_put(tmp);
216 * Setup the pair of contexts such that if we
217 * lite-restore using the RING_TAIL from ce[1] it
218 * will execute garbage from ce[0]->ring.
220 memset(tmp->ring->vaddr,
221 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
222 tmp->ring->vma->size);
226 GEM_BUG_ON(!ce[1]->ring->size);
227 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
228 lrc_update_regs(ce[1], engine, ce[1]->ring->head);
230 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
232 err = PTR_ERR(rq[0]);
236 i915_request_get(rq[0]);
237 i915_request_add(rq[0]);
238 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
240 if (!igt_wait_for_spinner(&spin, rq[0])) {
241 i915_request_put(rq[0]);
245 rq[1] = i915_request_create(ce[1]);
247 err = PTR_ERR(rq[1]);
248 i915_request_put(rq[0]);
254 * Ensure we do the switch to ce[1] on completion.
256 * rq[0] is already submitted, so this should reduce
257 * to a no-op (a wait on a request on the same engine
258 * uses the submit fence, not the completion fence),
259 * but it will install a dependency on rq[1] for rq[0]
260 * that will prevent the pair being reordered by
263 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
266 i915_request_get(rq[1]);
267 i915_request_add(rq[1]);
268 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
269 i915_request_put(rq[0]);
272 struct i915_sched_attr attr = {
276 /* Alternatively preempt the spinner with ce[1] */
277 engine->schedule(rq[1], &attr);
280 /* And switch back to ce[0] for good measure */
281 rq[0] = i915_request_create(ce[0]);
283 err = PTR_ERR(rq[0]);
284 i915_request_put(rq[1]);
288 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
289 i915_request_get(rq[0]);
290 i915_request_add(rq[0]);
291 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
292 i915_request_put(rq[1]);
293 i915_request_put(rq[0]);
296 intel_engine_flush_submission(engine);
297 igt_spinner_end(&spin);
298 for (n = 0; n < ARRAY_SIZE(ce); n++) {
299 if (IS_ERR_OR_NULL(ce[n]))
302 intel_context_unpin(ce[n]);
303 intel_context_put(ce[n]);
306 st_engine_heartbeat_enable(engine);
307 if (igt_live_test_end(&t))
313 igt_spinner_fini(&spin);
317 static int live_unlite_switch(void *arg)
319 return live_unlite_restore(arg, 0);
322 static int live_unlite_preempt(void *arg)
324 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
327 static int live_unlite_ring(void *arg)
329 struct intel_gt *gt = arg;
330 struct intel_engine_cs *engine;
331 struct igt_spinner spin;
332 enum intel_engine_id id;
336 * Setup a preemption event that will cause almost the entire ring
337 * to be unwound, potentially fooling our intel_ring_direction()
338 * into emitting a forward lite-restore instead of the rollback.
341 if (igt_spinner_init(&spin, gt))
344 for_each_engine(engine, gt, id) {
345 struct intel_context *ce[2] = {};
346 struct i915_request *rq;
347 struct igt_live_test t;
350 if (!intel_engine_has_preemption(engine))
353 if (!intel_engine_can_store_dword(engine))
356 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
360 st_engine_heartbeat_disable(engine);
362 for (n = 0; n < ARRAY_SIZE(ce); n++) {
363 struct intel_context *tmp;
365 tmp = intel_context_create(engine);
371 err = intel_context_pin(tmp);
373 intel_context_put(tmp);
377 memset32(tmp->ring->vaddr,
378 0xdeadbeef, /* trigger a hang if executed */
379 tmp->ring->vma->size / sizeof(u32));
384 /* Create max prio spinner, followed by N low prio nops */
385 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
391 i915_request_get(rq);
392 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
393 i915_request_add(rq);
395 if (!igt_wait_for_spinner(&spin, rq)) {
396 intel_gt_set_wedged(gt);
397 i915_request_put(rq);
402 /* Fill the ring, until we will cause a wrap */
404 while (intel_ring_direction(ce[0]->ring,
406 ce[0]->ring->tail) <= 0) {
407 struct i915_request *tmp;
409 tmp = intel_context_create_request(ce[0]);
412 i915_request_put(rq);
416 i915_request_add(tmp);
417 intel_engine_flush_submission(engine);
420 intel_engine_flush_submission(engine);
421 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
427 GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
429 ce[0]->ring->tail) <= 0);
430 i915_request_put(rq);
432 /* Create a second ring to preempt the first ring after rq[0] */
433 rq = intel_context_create_request(ce[1]);
439 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
440 i915_request_get(rq);
441 i915_request_add(rq);
443 err = wait_for_submit(engine, rq, HZ / 2);
444 i915_request_put(rq);
446 pr_err("%s: preemption request was not submitted\n",
451 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
453 ce[0]->ring->tail, ce[0]->ring->emit,
454 ce[1]->ring->tail, ce[1]->ring->emit);
457 intel_engine_flush_submission(engine);
458 igt_spinner_end(&spin);
459 for (n = 0; n < ARRAY_SIZE(ce); n++) {
460 if (IS_ERR_OR_NULL(ce[n]))
463 intel_context_unpin(ce[n]);
464 intel_context_put(ce[n]);
466 st_engine_heartbeat_enable(engine);
467 if (igt_live_test_end(&t))
473 igt_spinner_fini(&spin);
477 static int live_pin_rewind(void *arg)
479 struct intel_gt *gt = arg;
480 struct intel_engine_cs *engine;
481 enum intel_engine_id id;
485 * We have to be careful not to trust intel_ring too much, for example
486 * ring->head is updated upon retire which is out of sync with pinning
487 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
488 * or else we risk writing an older, stale value.
490 * To simulate this, let's apply a bit of deliberate sabotague.
493 for_each_engine(engine, gt, id) {
494 struct intel_context *ce;
495 struct i915_request *rq;
496 struct intel_ring *ring;
497 struct igt_live_test t;
499 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
504 ce = intel_context_create(engine);
510 err = intel_context_pin(ce);
512 intel_context_put(ce);
516 /* Keep the context awake while we play games */
517 err = i915_active_acquire(&ce->active);
519 intel_context_unpin(ce);
520 intel_context_put(ce);
525 /* Poison the ring, and offset the next request from HEAD */
526 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
527 ring->emit = ring->size / 2;
528 ring->tail = ring->emit;
529 GEM_BUG_ON(ring->head);
531 intel_context_unpin(ce);
533 /* Submit a simple nop request */
534 GEM_BUG_ON(intel_context_is_pinned(ce));
535 rq = intel_context_create_request(ce);
536 i915_active_release(&ce->active); /* e.g. async retire */
537 intel_context_put(ce);
542 GEM_BUG_ON(!rq->head);
543 i915_request_add(rq);
545 /* Expect not to hang! */
546 if (igt_live_test_end(&t)) {
555 static int live_hold_reset(void *arg)
557 struct intel_gt *gt = arg;
558 struct intel_engine_cs *engine;
559 enum intel_engine_id id;
560 struct igt_spinner spin;
564 * In order to support offline error capture for fast preempt reset,
565 * we need to decouple the guilty request and ensure that it and its
566 * descendents are not executed while the capture is in progress.
569 if (!intel_has_reset_engine(gt))
572 if (igt_spinner_init(&spin, gt))
575 for_each_engine(engine, gt, id) {
576 struct intel_context *ce;
577 struct i915_request *rq;
579 ce = intel_context_create(engine);
585 st_engine_heartbeat_disable(engine);
587 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
592 i915_request_add(rq);
594 if (!igt_wait_for_spinner(&spin, rq)) {
595 intel_gt_set_wedged(gt);
600 /* We have our request executing, now remove it and reset */
603 if (test_and_set_bit(I915_RESET_ENGINE + id,
606 intel_gt_set_wedged(gt);
610 tasklet_disable(&engine->execlists.tasklet);
612 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
613 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
615 i915_request_get(rq);
616 execlists_hold(engine, rq);
617 GEM_BUG_ON(!i915_request_on_hold(rq));
619 __intel_engine_reset_bh(engine, NULL);
620 GEM_BUG_ON(rq->fence.error != -EIO);
622 tasklet_enable(&engine->execlists.tasklet);
623 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
627 /* Check that we do not resubmit the held request */
628 if (!i915_request_wait(rq, 0, HZ / 5)) {
629 pr_err("%s: on hold request completed!\n",
631 i915_request_put(rq);
635 GEM_BUG_ON(!i915_request_on_hold(rq));
637 /* But is resubmitted on release */
638 execlists_unhold(engine, rq);
639 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
640 pr_err("%s: held request did not complete!\n",
642 intel_gt_set_wedged(gt);
645 i915_request_put(rq);
648 st_engine_heartbeat_enable(engine);
649 intel_context_put(ce);
654 igt_spinner_fini(&spin);
658 static const char *error_repr(int err)
660 return err ? "bad" : "good";
663 static int live_error_interrupt(void *arg)
665 static const struct error_phase {
666 enum { GOOD = 0, BAD = -EIO } error[2];
671 { { GOOD, GOOD } }, /* sentinel */
673 struct intel_gt *gt = arg;
674 struct intel_engine_cs *engine;
675 enum intel_engine_id id;
678 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
679 * of invalid commands in user batches that will cause a GPU hang.
680 * This is a faster mechanism than using hangcheck/heartbeats, but
681 * only detects problems the HW knows about -- it will not warn when
684 * To verify our detection and reset, we throw some invalid commands
685 * at the HW and wait for the interrupt.
688 if (!intel_has_reset_engine(gt))
691 for_each_engine(engine, gt, id) {
692 const struct error_phase *p;
695 st_engine_heartbeat_disable(engine);
697 for (p = phases; p->error[0] != GOOD; p++) {
698 struct i915_request *client[ARRAY_SIZE(phases->error)];
702 memset(client, 0, sizeof(*client));
703 for (i = 0; i < ARRAY_SIZE(client); i++) {
704 struct intel_context *ce;
705 struct i915_request *rq;
707 ce = intel_context_create(engine);
713 rq = intel_context_create_request(ce);
714 intel_context_put(ce);
720 if (rq->engine->emit_init_breadcrumb) {
721 err = rq->engine->emit_init_breadcrumb(rq);
723 i915_request_add(rq);
728 cs = intel_ring_begin(rq, 2);
730 i915_request_add(rq);
743 client[i] = i915_request_get(rq);
744 i915_request_add(rq);
747 err = wait_for_submit(engine, client[0], HZ / 2);
749 pr_err("%s: first request did not start within time!\n",
755 for (i = 0; i < ARRAY_SIZE(client); i++) {
756 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
757 pr_debug("%s: %s request incomplete!\n",
759 error_repr(p->error[i]));
761 if (!i915_request_started(client[i])) {
762 pr_err("%s: %s request not started!\n",
764 error_repr(p->error[i]));
769 /* Kick the tasklet to process the error */
770 intel_engine_flush_submission(engine);
771 if (client[i]->fence.error != p->error[i]) {
772 pr_err("%s: %s request (%s) with wrong error code: %d\n",
774 error_repr(p->error[i]),
775 i915_request_completed(client[i]) ? "completed" : "running",
776 client[i]->fence.error);
783 for (i = 0; i < ARRAY_SIZE(client); i++)
785 i915_request_put(client[i]);
787 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
788 engine->name, p - phases,
789 p->error[0], p->error[1]);
794 st_engine_heartbeat_enable(engine);
796 intel_gt_set_wedged(gt);
805 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
809 cs = intel_ring_begin(rq, 10);
813 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
815 *cs++ = MI_SEMAPHORE_WAIT |
816 MI_SEMAPHORE_GLOBAL_GTT |
818 MI_SEMAPHORE_SAD_NEQ_SDD;
820 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
824 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
825 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
835 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
837 intel_ring_advance(rq, cs);
841 static struct i915_request *
842 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
844 struct intel_context *ce;
845 struct i915_request *rq;
848 ce = intel_context_create(engine);
852 rq = intel_context_create_request(ce);
857 if (rq->engine->emit_init_breadcrumb)
858 err = rq->engine->emit_init_breadcrumb(rq);
860 err = emit_semaphore_chain(rq, vma, idx);
862 i915_request_get(rq);
863 i915_request_add(rq);
868 intel_context_put(ce);
873 release_queue(struct intel_engine_cs *engine,
874 struct i915_vma *vma,
877 struct i915_sched_attr attr = {
880 struct i915_request *rq;
883 rq = intel_engine_create_kernel_request(engine);
887 cs = intel_ring_begin(rq, 4);
889 i915_request_add(rq);
893 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
894 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
898 intel_ring_advance(rq, cs);
900 i915_request_get(rq);
901 i915_request_add(rq);
904 engine->schedule(rq, &attr);
905 local_bh_enable(); /* kick tasklet */
907 i915_request_put(rq);
913 slice_semaphore_queue(struct intel_engine_cs *outer,
914 struct i915_vma *vma,
917 struct intel_engine_cs *engine;
918 struct i915_request *head;
919 enum intel_engine_id id;
922 head = semaphore_queue(outer, vma, n++);
924 return PTR_ERR(head);
926 for_each_engine(engine, outer->gt, id) {
927 if (!intel_engine_has_preemption(engine))
930 for (i = 0; i < count; i++) {
931 struct i915_request *rq;
933 rq = semaphore_queue(engine, vma, n++);
939 i915_request_put(rq);
943 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
947 if (i915_request_wait(head, 0,
948 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
949 pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
950 outer->name, count, n);
952 intel_gt_set_wedged(outer->gt);
957 i915_request_put(head);
961 static int live_timeslice_preempt(void *arg)
963 struct intel_gt *gt = arg;
964 struct drm_i915_gem_object *obj;
965 struct intel_engine_cs *engine;
966 enum intel_engine_id id;
967 struct i915_vma *vma;
972 * If a request takes too long, we would like to give other users
973 * a fair go on the GPU. In particular, users may create batches
974 * that wait upon external input, where that input may even be
975 * supplied by another GPU job. To avoid blocking forever, we
976 * need to preempt the current task and replace it with another
979 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
982 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
986 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
992 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
994 err = PTR_ERR(vaddr);
998 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1002 err = i915_vma_sync(vma);
1006 for_each_engine(engine, gt, id) {
1007 if (!intel_engine_has_preemption(engine))
1010 memset(vaddr, 0, PAGE_SIZE);
1012 st_engine_heartbeat_disable(engine);
1013 err = slice_semaphore_queue(engine, vma, 5);
1014 st_engine_heartbeat_enable(engine);
1018 if (igt_flush_test(gt->i915)) {
1025 i915_vma_unpin(vma);
1027 i915_gem_object_unpin_map(obj);
1029 i915_gem_object_put(obj);
1033 static struct i915_request *
1034 create_rewinder(struct intel_context *ce,
1035 struct i915_request *wait,
1036 void *slot, int idx)
1039 i915_ggtt_offset(ce->engine->status_page.vma) +
1040 offset_in_page(slot);
1041 struct i915_request *rq;
1045 rq = intel_context_create_request(ce);
1050 err = i915_request_await_dma_fence(rq, &wait->fence);
1055 cs = intel_ring_begin(rq, 14);
1061 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1064 *cs++ = MI_SEMAPHORE_WAIT |
1065 MI_SEMAPHORE_GLOBAL_GTT |
1067 MI_SEMAPHORE_SAD_GTE_SDD;
1072 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1073 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1074 *cs++ = offset + idx * sizeof(u32);
1077 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1082 intel_ring_advance(rq, cs);
1084 rq->sched.attr.priority = I915_PRIORITY_MASK;
1087 i915_request_get(rq);
1088 i915_request_add(rq);
1090 i915_request_put(rq);
1091 return ERR_PTR(err);
1097 static int live_timeslice_rewind(void *arg)
1099 struct intel_gt *gt = arg;
1100 struct intel_engine_cs *engine;
1101 enum intel_engine_id id;
1104 * The usual presumption on timeslice expiration is that we replace
1105 * the active context with another. However, given a chain of
1106 * dependencies we may end up with replacing the context with itself,
1107 * but only a few of those requests, forcing us to rewind the
1108 * RING_TAIL of the original request.
1110 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1113 for_each_engine(engine, gt, id) {
1114 enum { A1, A2, B1 };
1115 enum { X = 1, Z, Y };
1116 struct i915_request *rq[3] = {};
1117 struct intel_context *ce;
1118 unsigned long timeslice;
1122 if (!intel_engine_has_timeslices(engine))
1126 * A:rq1 -- semaphore wait, timestamp X
1127 * A:rq2 -- write timestamp Y
1129 * B:rq1 [await A:rq1] -- write timestamp Z
1131 * Force timeslice, release semaphore.
1133 * Expect execution/evaluation order XZY
1136 st_engine_heartbeat_disable(engine);
1137 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1139 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1141 ce = intel_context_create(engine);
1147 rq[A1] = create_rewinder(ce, NULL, slot, X);
1148 if (IS_ERR(rq[A1])) {
1149 intel_context_put(ce);
1153 rq[A2] = create_rewinder(ce, NULL, slot, Y);
1154 intel_context_put(ce);
1158 err = wait_for_submit(engine, rq[A2], HZ / 2);
1160 pr_err("%s: failed to submit first context\n",
1165 ce = intel_context_create(engine);
1171 rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1172 intel_context_put(ce);
1176 err = wait_for_submit(engine, rq[B1], HZ / 2);
1178 pr_err("%s: failed to submit second context\n",
1183 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1184 ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1185 while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1186 /* Wait for the timeslice to kick in */
1187 del_timer(&engine->execlists.timer);
1188 tasklet_hi_schedule(&engine->execlists.tasklet);
1189 intel_engine_flush_submission(engine);
1191 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1192 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1193 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1194 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1196 /* Release the hounds! */
1198 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1200 for (i = 1; i <= 3; i++) {
1201 unsigned long timeout = jiffies + HZ / 2;
1203 while (!READ_ONCE(slot[i]) &&
1204 time_before(jiffies, timeout))
1207 if (!time_before(jiffies, timeout)) {
1208 pr_err("%s: rq[%d] timed out\n",
1209 engine->name, i - 1);
1214 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1218 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1219 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1227 memset32(&slot[0], -1, 4);
1230 engine->props.timeslice_duration_ms = timeslice;
1231 st_engine_heartbeat_enable(engine);
1232 for (i = 0; i < 3; i++)
1233 i915_request_put(rq[i]);
1234 if (igt_flush_test(gt->i915))
1243 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1245 struct i915_request *rq;
1247 rq = intel_engine_create_kernel_request(engine);
1251 i915_request_get(rq);
1252 i915_request_add(rq);
1257 static long slice_timeout(struct intel_engine_cs *engine)
1261 /* Enough time for a timeslice to kick in, and kick out */
1262 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1264 /* Enough time for the nop request to complete */
1270 static int live_timeslice_queue(void *arg)
1272 struct intel_gt *gt = arg;
1273 struct drm_i915_gem_object *obj;
1274 struct intel_engine_cs *engine;
1275 enum intel_engine_id id;
1276 struct i915_vma *vma;
1281 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1282 * timeslicing between them disabled, we *do* enable timeslicing
1283 * if the queue demands it. (Normally, we do not submit if
1284 * ELSP[1] is already occupied, so must rely on timeslicing to
1285 * eject ELSP[0] in favour of the queue.)
1287 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1290 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1292 return PTR_ERR(obj);
1294 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1300 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1301 if (IS_ERR(vaddr)) {
1302 err = PTR_ERR(vaddr);
1306 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1310 err = i915_vma_sync(vma);
1314 for_each_engine(engine, gt, id) {
1315 struct i915_sched_attr attr = {
1316 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1318 struct i915_request *rq, *nop;
1320 if (!intel_engine_has_preemption(engine))
1323 st_engine_heartbeat_disable(engine);
1324 memset(vaddr, 0, PAGE_SIZE);
1326 /* ELSP[0]: semaphore wait */
1327 rq = semaphore_queue(engine, vma, 0);
1332 engine->schedule(rq, &attr);
1333 err = wait_for_submit(engine, rq, HZ / 2);
1335 pr_err("%s: Timed out trying to submit semaphores\n",
1340 /* ELSP[1]: nop request */
1341 nop = nop_request(engine);
1346 err = wait_for_submit(engine, nop, HZ / 2);
1347 i915_request_put(nop);
1349 pr_err("%s: Timed out trying to submit nop\n",
1354 GEM_BUG_ON(i915_request_completed(rq));
1355 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1357 /* Queue: semaphore signal, matching priority as semaphore */
1358 err = release_queue(engine, vma, 1, effective_prio(rq));
1362 /* Wait until we ack the release_queue and start timeslicing */
1365 intel_engine_flush_submission(engine);
1366 } while (READ_ONCE(engine->execlists.pending[0]));
1368 /* Timeslice every jiffy, so within 2 we should signal */
1369 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1370 struct drm_printer p =
1371 drm_info_printer(gt->i915->drm.dev);
1373 pr_err("%s: Failed to timeslice into queue\n",
1375 intel_engine_dump(engine, &p,
1376 "%s\n", engine->name);
1378 memset(vaddr, 0xff, PAGE_SIZE);
1382 i915_request_put(rq);
1384 st_engine_heartbeat_enable(engine);
1390 i915_vma_unpin(vma);
1392 i915_gem_object_unpin_map(obj);
1394 i915_gem_object_put(obj);
1398 static int live_timeslice_nopreempt(void *arg)
1400 struct intel_gt *gt = arg;
1401 struct intel_engine_cs *engine;
1402 enum intel_engine_id id;
1403 struct igt_spinner spin;
1407 * We should not timeslice into a request that is marked with
1408 * I915_REQUEST_NOPREEMPT.
1410 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1413 if (igt_spinner_init(&spin, gt))
1416 for_each_engine(engine, gt, id) {
1417 struct intel_context *ce;
1418 struct i915_request *rq;
1419 unsigned long timeslice;
1421 if (!intel_engine_has_preemption(engine))
1424 ce = intel_context_create(engine);
1430 st_engine_heartbeat_disable(engine);
1431 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1433 /* Create an unpreemptible spinner */
1435 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1436 intel_context_put(ce);
1442 i915_request_get(rq);
1443 i915_request_add(rq);
1445 if (!igt_wait_for_spinner(&spin, rq)) {
1446 i915_request_put(rq);
1451 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1452 i915_request_put(rq);
1454 /* Followed by a maximum priority barrier (heartbeat) */
1456 ce = intel_context_create(engine);
1462 rq = intel_context_create_request(ce);
1463 intel_context_put(ce);
1469 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1470 i915_request_get(rq);
1471 i915_request_add(rq);
1474 * Wait until the barrier is in ELSP, and we know timeslicing
1475 * will have been activated.
1477 if (wait_for_submit(engine, rq, HZ / 2)) {
1478 i915_request_put(rq);
1484 * Since the ELSP[0] request is unpreemptible, it should not
1485 * allow the maximum priority barrier through. Wait long
1486 * enough to see if it is timesliced in by mistake.
1488 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1489 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1493 i915_request_put(rq);
1496 igt_spinner_end(&spin);
1498 xchg(&engine->props.timeslice_duration_ms, timeslice);
1499 st_engine_heartbeat_enable(engine);
1503 if (igt_flush_test(gt->i915)) {
1509 igt_spinner_fini(&spin);
1513 static int live_busywait_preempt(void *arg)
1515 struct intel_gt *gt = arg;
1516 struct i915_gem_context *ctx_hi, *ctx_lo;
1517 struct intel_engine_cs *engine;
1518 struct drm_i915_gem_object *obj;
1519 struct i915_vma *vma;
1520 enum intel_engine_id id;
1525 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1526 * preempt the busywaits used to synchronise between rings.
1529 ctx_hi = kernel_context(gt->i915);
1532 ctx_hi->sched.priority =
1533 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1535 ctx_lo = kernel_context(gt->i915);
1538 ctx_lo->sched.priority =
1539 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1541 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1547 map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1553 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1559 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1563 err = i915_vma_sync(vma);
1567 for_each_engine(engine, gt, id) {
1568 struct i915_request *lo, *hi;
1569 struct igt_live_test t;
1572 if (!intel_engine_has_preemption(engine))
1575 if (!intel_engine_can_store_dword(engine))
1578 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1584 * We create two requests. The low priority request
1585 * busywaits on a semaphore (inside the ringbuffer where
1586 * is should be preemptible) and the high priority requests
1587 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1588 * allowing the first request to complete. If preemption
1589 * fails, we hang instead.
1592 lo = igt_request_alloc(ctx_lo, engine);
1598 cs = intel_ring_begin(lo, 8);
1601 i915_request_add(lo);
1605 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1606 *cs++ = i915_ggtt_offset(vma);
1610 /* XXX Do we need a flush + invalidate here? */
1612 *cs++ = MI_SEMAPHORE_WAIT |
1613 MI_SEMAPHORE_GLOBAL_GTT |
1615 MI_SEMAPHORE_SAD_EQ_SDD;
1617 *cs++ = i915_ggtt_offset(vma);
1620 intel_ring_advance(lo, cs);
1622 i915_request_get(lo);
1623 i915_request_add(lo);
1625 if (wait_for(READ_ONCE(*map), 10)) {
1626 i915_request_put(lo);
1631 /* Low priority request should be busywaiting now */
1632 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1633 i915_request_put(lo);
1634 pr_err("%s: Busywaiting request did not!\n",
1640 hi = igt_request_alloc(ctx_hi, engine);
1643 i915_request_put(lo);
1647 cs = intel_ring_begin(hi, 4);
1650 i915_request_add(hi);
1651 i915_request_put(lo);
1655 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1656 *cs++ = i915_ggtt_offset(vma);
1660 intel_ring_advance(hi, cs);
1661 i915_request_add(hi);
1663 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1664 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1666 pr_err("%s: Failed to preempt semaphore busywait!\n",
1669 intel_engine_dump(engine, &p, "%s\n", engine->name);
1672 i915_request_put(lo);
1673 intel_gt_set_wedged(gt);
1677 GEM_BUG_ON(READ_ONCE(*map));
1678 i915_request_put(lo);
1680 if (igt_live_test_end(&t)) {
1688 i915_vma_unpin(vma);
1690 i915_gem_object_unpin_map(obj);
1692 i915_gem_object_put(obj);
1694 kernel_context_close(ctx_lo);
1696 kernel_context_close(ctx_hi);
1700 static struct i915_request *
1701 spinner_create_request(struct igt_spinner *spin,
1702 struct i915_gem_context *ctx,
1703 struct intel_engine_cs *engine,
1706 struct intel_context *ce;
1707 struct i915_request *rq;
1709 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1711 return ERR_CAST(ce);
1713 rq = igt_spinner_create_request(spin, ce, arb);
1714 intel_context_put(ce);
1718 static int live_preempt(void *arg)
1720 struct intel_gt *gt = arg;
1721 struct i915_gem_context *ctx_hi, *ctx_lo;
1722 struct igt_spinner spin_hi, spin_lo;
1723 struct intel_engine_cs *engine;
1724 enum intel_engine_id id;
1727 if (igt_spinner_init(&spin_hi, gt))
1730 if (igt_spinner_init(&spin_lo, gt))
1733 ctx_hi = kernel_context(gt->i915);
1736 ctx_hi->sched.priority =
1737 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1739 ctx_lo = kernel_context(gt->i915);
1742 ctx_lo->sched.priority =
1743 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1745 for_each_engine(engine, gt, id) {
1746 struct igt_live_test t;
1747 struct i915_request *rq;
1749 if (!intel_engine_has_preemption(engine))
1752 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1757 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1764 i915_request_add(rq);
1765 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1766 GEM_TRACE("lo spinner failed to start\n");
1768 intel_gt_set_wedged(gt);
1773 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1776 igt_spinner_end(&spin_lo);
1781 i915_request_add(rq);
1782 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1783 GEM_TRACE("hi spinner failed to start\n");
1785 intel_gt_set_wedged(gt);
1790 igt_spinner_end(&spin_hi);
1791 igt_spinner_end(&spin_lo);
1793 if (igt_live_test_end(&t)) {
1801 kernel_context_close(ctx_lo);
1803 kernel_context_close(ctx_hi);
1805 igt_spinner_fini(&spin_lo);
1807 igt_spinner_fini(&spin_hi);
1811 static int live_late_preempt(void *arg)
1813 struct intel_gt *gt = arg;
1814 struct i915_gem_context *ctx_hi, *ctx_lo;
1815 struct igt_spinner spin_hi, spin_lo;
1816 struct intel_engine_cs *engine;
1817 struct i915_sched_attr attr = {};
1818 enum intel_engine_id id;
1821 if (igt_spinner_init(&spin_hi, gt))
1824 if (igt_spinner_init(&spin_lo, gt))
1827 ctx_hi = kernel_context(gt->i915);
1831 ctx_lo = kernel_context(gt->i915);
1835 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1836 ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1838 for_each_engine(engine, gt, id) {
1839 struct igt_live_test t;
1840 struct i915_request *rq;
1842 if (!intel_engine_has_preemption(engine))
1845 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1850 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1857 i915_request_add(rq);
1858 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1859 pr_err("First context failed to start\n");
1863 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1866 igt_spinner_end(&spin_lo);
1871 i915_request_add(rq);
1872 if (igt_wait_for_spinner(&spin_hi, rq)) {
1873 pr_err("Second context overtook first?\n");
1877 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1878 engine->schedule(rq, &attr);
1880 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1881 pr_err("High priority context failed to preempt the low priority context\n");
1886 igt_spinner_end(&spin_hi);
1887 igt_spinner_end(&spin_lo);
1889 if (igt_live_test_end(&t)) {
1897 kernel_context_close(ctx_lo);
1899 kernel_context_close(ctx_hi);
1901 igt_spinner_fini(&spin_lo);
1903 igt_spinner_fini(&spin_hi);
1907 igt_spinner_end(&spin_hi);
1908 igt_spinner_end(&spin_lo);
1909 intel_gt_set_wedged(gt);
1914 struct preempt_client {
1915 struct igt_spinner spin;
1916 struct i915_gem_context *ctx;
1919 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1921 c->ctx = kernel_context(gt->i915);
1925 if (igt_spinner_init(&c->spin, gt))
1931 kernel_context_close(c->ctx);
1935 static void preempt_client_fini(struct preempt_client *c)
1937 igt_spinner_fini(&c->spin);
1938 kernel_context_close(c->ctx);
1941 static int live_nopreempt(void *arg)
1943 struct intel_gt *gt = arg;
1944 struct intel_engine_cs *engine;
1945 struct preempt_client a, b;
1946 enum intel_engine_id id;
1950 * Verify that we can disable preemption for an individual request
1951 * that may be being observed and not want to be interrupted.
1954 if (preempt_client_init(gt, &a))
1956 if (preempt_client_init(gt, &b))
1958 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1960 for_each_engine(engine, gt, id) {
1961 struct i915_request *rq_a, *rq_b;
1963 if (!intel_engine_has_preemption(engine))
1966 engine->execlists.preempt_hang.count = 0;
1968 rq_a = spinner_create_request(&a.spin,
1972 err = PTR_ERR(rq_a);
1976 /* Low priority client, but unpreemptable! */
1977 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1979 i915_request_add(rq_a);
1980 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1981 pr_err("First client failed to start\n");
1985 rq_b = spinner_create_request(&b.spin,
1989 err = PTR_ERR(rq_b);
1993 i915_request_add(rq_b);
1995 /* B is much more important than A! (But A is unpreemptable.) */
1996 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1998 /* Wait long enough for preemption and timeslicing */
1999 if (igt_wait_for_spinner(&b.spin, rq_b)) {
2000 pr_err("Second client started too early!\n");
2004 igt_spinner_end(&a.spin);
2006 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2007 pr_err("Second client failed to start\n");
2011 igt_spinner_end(&b.spin);
2013 if (engine->execlists.preempt_hang.count) {
2014 pr_err("Preemption recorded x%d; should have been suppressed!\n",
2015 engine->execlists.preempt_hang.count);
2020 if (igt_flush_test(gt->i915))
2026 preempt_client_fini(&b);
2028 preempt_client_fini(&a);
2032 igt_spinner_end(&b.spin);
2033 igt_spinner_end(&a.spin);
2034 intel_gt_set_wedged(gt);
2039 struct live_preempt_cancel {
2040 struct intel_engine_cs *engine;
2041 struct preempt_client a, b;
2044 static int __cancel_active0(struct live_preempt_cancel *arg)
2046 struct i915_request *rq;
2047 struct igt_live_test t;
2050 /* Preempt cancel of ELSP0 */
2051 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2052 if (igt_live_test_begin(&t, arg->engine->i915,
2053 __func__, arg->engine->name))
2056 rq = spinner_create_request(&arg->a.spin,
2057 arg->a.ctx, arg->engine,
2062 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2063 i915_request_get(rq);
2064 i915_request_add(rq);
2065 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2070 intel_context_set_banned(rq->context);
2071 err = intel_engine_pulse(arg->engine);
2075 err = wait_for_reset(arg->engine, rq, HZ / 2);
2077 pr_err("Cancelled inflight0 request did not reset\n");
2082 i915_request_put(rq);
2083 if (igt_live_test_end(&t))
2088 static int __cancel_active1(struct live_preempt_cancel *arg)
2090 struct i915_request *rq[2] = {};
2091 struct igt_live_test t;
2094 /* Preempt cancel of ELSP1 */
2095 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2096 if (igt_live_test_begin(&t, arg->engine->i915,
2097 __func__, arg->engine->name))
2100 rq[0] = spinner_create_request(&arg->a.spin,
2101 arg->a.ctx, arg->engine,
2102 MI_NOOP); /* no preemption */
2104 return PTR_ERR(rq[0]);
2106 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2107 i915_request_get(rq[0]);
2108 i915_request_add(rq[0]);
2109 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2114 rq[1] = spinner_create_request(&arg->b.spin,
2115 arg->b.ctx, arg->engine,
2117 if (IS_ERR(rq[1])) {
2118 err = PTR_ERR(rq[1]);
2122 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2123 i915_request_get(rq[1]);
2124 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2125 i915_request_add(rq[1]);
2129 intel_context_set_banned(rq[1]->context);
2130 err = intel_engine_pulse(arg->engine);
2134 igt_spinner_end(&arg->a.spin);
2135 err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2139 if (rq[0]->fence.error != 0) {
2140 pr_err("Normal inflight0 request did not complete\n");
2145 if (rq[1]->fence.error != -EIO) {
2146 pr_err("Cancelled inflight1 request did not report -EIO\n");
2152 i915_request_put(rq[1]);
2153 i915_request_put(rq[0]);
2154 if (igt_live_test_end(&t))
2159 static int __cancel_queued(struct live_preempt_cancel *arg)
2161 struct i915_request *rq[3] = {};
2162 struct igt_live_test t;
2165 /* Full ELSP and one in the wings */
2166 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2167 if (igt_live_test_begin(&t, arg->engine->i915,
2168 __func__, arg->engine->name))
2171 rq[0] = spinner_create_request(&arg->a.spin,
2172 arg->a.ctx, arg->engine,
2175 return PTR_ERR(rq[0]);
2177 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2178 i915_request_get(rq[0]);
2179 i915_request_add(rq[0]);
2180 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2185 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2186 if (IS_ERR(rq[1])) {
2187 err = PTR_ERR(rq[1]);
2191 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2192 i915_request_get(rq[1]);
2193 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2194 i915_request_add(rq[1]);
2198 rq[2] = spinner_create_request(&arg->b.spin,
2199 arg->a.ctx, arg->engine,
2201 if (IS_ERR(rq[2])) {
2202 err = PTR_ERR(rq[2]);
2206 i915_request_get(rq[2]);
2207 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2208 i915_request_add(rq[2]);
2212 intel_context_set_banned(rq[2]->context);
2213 err = intel_engine_pulse(arg->engine);
2217 err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2221 if (rq[0]->fence.error != -EIO) {
2222 pr_err("Cancelled inflight0 request did not report -EIO\n");
2227 if (rq[1]->fence.error != 0) {
2228 pr_err("Normal inflight1 request did not complete\n");
2233 if (rq[2]->fence.error != -EIO) {
2234 pr_err("Cancelled queued request did not report -EIO\n");
2240 i915_request_put(rq[2]);
2241 i915_request_put(rq[1]);
2242 i915_request_put(rq[0]);
2243 if (igt_live_test_end(&t))
2248 static int __cancel_hostile(struct live_preempt_cancel *arg)
2250 struct i915_request *rq;
2253 /* Preempt cancel non-preemptible spinner in ELSP0 */
2254 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2257 if (!intel_has_reset_engine(arg->engine->gt))
2260 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2261 rq = spinner_create_request(&arg->a.spin,
2262 arg->a.ctx, arg->engine,
2263 MI_NOOP); /* preemption disabled */
2267 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2268 i915_request_get(rq);
2269 i915_request_add(rq);
2270 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2275 intel_context_set_banned(rq->context);
2276 err = intel_engine_pulse(arg->engine); /* force reset */
2280 err = wait_for_reset(arg->engine, rq, HZ / 2);
2282 pr_err("Cancelled inflight0 request did not reset\n");
2287 i915_request_put(rq);
2288 if (igt_flush_test(arg->engine->i915))
2293 static void force_reset_timeout(struct intel_engine_cs *engine)
2295 engine->reset_timeout.probability = 999;
2296 atomic_set(&engine->reset_timeout.times, -1);
2299 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2301 memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2304 static int __cancel_fail(struct live_preempt_cancel *arg)
2306 struct intel_engine_cs *engine = arg->engine;
2307 struct i915_request *rq;
2310 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2313 if (!intel_has_reset_engine(engine->gt))
2316 GEM_TRACE("%s(%s)\n", __func__, engine->name);
2317 rq = spinner_create_request(&arg->a.spin,
2319 MI_NOOP); /* preemption disabled */
2323 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2324 i915_request_get(rq);
2325 i915_request_add(rq);
2326 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2331 intel_context_set_banned(rq->context);
2333 err = intel_engine_pulse(engine);
2337 force_reset_timeout(engine);
2339 /* force preempt reset [failure] */
2340 while (!engine->execlists.pending[0])
2341 intel_engine_flush_submission(engine);
2342 del_timer_sync(&engine->execlists.preempt);
2343 intel_engine_flush_submission(engine);
2345 cancel_reset_timeout(engine);
2347 /* after failure, require heartbeats to reset device */
2348 intel_engine_set_heartbeat(engine, 1);
2349 err = wait_for_reset(engine, rq, HZ / 2);
2350 intel_engine_set_heartbeat(engine,
2351 engine->defaults.heartbeat_interval_ms);
2353 pr_err("Cancelled inflight0 request did not reset\n");
2358 i915_request_put(rq);
2359 if (igt_flush_test(engine->i915))
2364 static int live_preempt_cancel(void *arg)
2366 struct intel_gt *gt = arg;
2367 struct live_preempt_cancel data;
2368 enum intel_engine_id id;
2372 * To cancel an inflight context, we need to first remove it from the
2373 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2376 if (preempt_client_init(gt, &data.a))
2378 if (preempt_client_init(gt, &data.b))
2381 for_each_engine(data.engine, gt, id) {
2382 if (!intel_engine_has_preemption(data.engine))
2385 err = __cancel_active0(&data);
2389 err = __cancel_active1(&data);
2393 err = __cancel_queued(&data);
2397 err = __cancel_hostile(&data);
2401 err = __cancel_fail(&data);
2408 preempt_client_fini(&data.b);
2410 preempt_client_fini(&data.a);
2415 igt_spinner_end(&data.b.spin);
2416 igt_spinner_end(&data.a.spin);
2417 intel_gt_set_wedged(gt);
2421 static int live_suppress_self_preempt(void *arg)
2423 struct intel_gt *gt = arg;
2424 struct intel_engine_cs *engine;
2425 struct i915_sched_attr attr = {
2426 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2428 struct preempt_client a, b;
2429 enum intel_engine_id id;
2433 * Verify that if a preemption request does not cause a change in
2434 * the current execution order, the preempt-to-idle injection is
2435 * skipped and that we do not accidentally apply it after the CS
2439 if (intel_uc_uses_guc_submission(>->uc))
2440 return 0; /* presume black blox */
2442 if (intel_vgpu_active(gt->i915))
2443 return 0; /* GVT forces single port & request submission */
2445 if (preempt_client_init(gt, &a))
2447 if (preempt_client_init(gt, &b))
2450 for_each_engine(engine, gt, id) {
2451 struct i915_request *rq_a, *rq_b;
2454 if (!intel_engine_has_preemption(engine))
2457 if (igt_flush_test(gt->i915))
2460 st_engine_heartbeat_disable(engine);
2461 engine->execlists.preempt_hang.count = 0;
2463 rq_a = spinner_create_request(&a.spin,
2467 err = PTR_ERR(rq_a);
2468 st_engine_heartbeat_enable(engine);
2472 i915_request_add(rq_a);
2473 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2474 pr_err("First client failed to start\n");
2475 st_engine_heartbeat_enable(engine);
2479 /* Keep postponing the timer to avoid premature slicing */
2480 mod_timer(&engine->execlists.timer, jiffies + HZ);
2481 for (depth = 0; depth < 8; depth++) {
2482 rq_b = spinner_create_request(&b.spin,
2486 err = PTR_ERR(rq_b);
2487 st_engine_heartbeat_enable(engine);
2490 i915_request_add(rq_b);
2492 GEM_BUG_ON(i915_request_completed(rq_a));
2493 engine->schedule(rq_a, &attr);
2494 igt_spinner_end(&a.spin);
2496 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2497 pr_err("Second client failed to start\n");
2498 st_engine_heartbeat_enable(engine);
2505 igt_spinner_end(&a.spin);
2507 if (engine->execlists.preempt_hang.count) {
2508 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2510 engine->execlists.preempt_hang.count,
2512 st_engine_heartbeat_enable(engine);
2517 st_engine_heartbeat_enable(engine);
2518 if (igt_flush_test(gt->i915))
2524 preempt_client_fini(&b);
2526 preempt_client_fini(&a);
2530 igt_spinner_end(&b.spin);
2531 igt_spinner_end(&a.spin);
2532 intel_gt_set_wedged(gt);
2537 static int live_chain_preempt(void *arg)
2539 struct intel_gt *gt = arg;
2540 struct intel_engine_cs *engine;
2541 struct preempt_client hi, lo;
2542 enum intel_engine_id id;
2546 * Build a chain AB...BA between two contexts (A, B) and request
2547 * preemption of the last request. It should then complete before
2548 * the previously submitted spinner in B.
2551 if (preempt_client_init(gt, &hi))
2554 if (preempt_client_init(gt, &lo))
2557 for_each_engine(engine, gt, id) {
2558 struct i915_sched_attr attr = {
2559 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2561 struct igt_live_test t;
2562 struct i915_request *rq;
2563 int ring_size, count, i;
2565 if (!intel_engine_has_preemption(engine))
2568 rq = spinner_create_request(&lo.spin,
2574 i915_request_get(rq);
2575 i915_request_add(rq);
2577 ring_size = rq->wa_tail - rq->head;
2579 ring_size += rq->ring->size;
2580 ring_size = rq->ring->size / ring_size;
2581 pr_debug("%s(%s): Using maximum of %d requests\n",
2582 __func__, engine->name, ring_size);
2584 igt_spinner_end(&lo.spin);
2585 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2586 pr_err("Timed out waiting to flush %s\n", engine->name);
2587 i915_request_put(rq);
2590 i915_request_put(rq);
2592 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2597 for_each_prime_number_from(count, 1, ring_size) {
2598 rq = spinner_create_request(&hi.spin,
2603 i915_request_add(rq);
2604 if (!igt_wait_for_spinner(&hi.spin, rq))
2607 rq = spinner_create_request(&lo.spin,
2612 i915_request_add(rq);
2614 for (i = 0; i < count; i++) {
2615 rq = igt_request_alloc(lo.ctx, engine);
2618 i915_request_add(rq);
2621 rq = igt_request_alloc(hi.ctx, engine);
2625 i915_request_get(rq);
2626 i915_request_add(rq);
2627 engine->schedule(rq, &attr);
2629 igt_spinner_end(&hi.spin);
2630 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2631 struct drm_printer p =
2632 drm_info_printer(gt->i915->drm.dev);
2634 pr_err("Failed to preempt over chain of %d\n",
2636 intel_engine_dump(engine, &p,
2637 "%s\n", engine->name);
2638 i915_request_put(rq);
2641 igt_spinner_end(&lo.spin);
2642 i915_request_put(rq);
2644 rq = igt_request_alloc(lo.ctx, engine);
2648 i915_request_get(rq);
2649 i915_request_add(rq);
2651 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2652 struct drm_printer p =
2653 drm_info_printer(gt->i915->drm.dev);
2655 pr_err("Failed to flush low priority chain of %d requests\n",
2657 intel_engine_dump(engine, &p,
2658 "%s\n", engine->name);
2660 i915_request_put(rq);
2663 i915_request_put(rq);
2666 if (igt_live_test_end(&t)) {
2674 preempt_client_fini(&lo);
2676 preempt_client_fini(&hi);
2680 igt_spinner_end(&hi.spin);
2681 igt_spinner_end(&lo.spin);
2682 intel_gt_set_wedged(gt);
2687 static int create_gang(struct intel_engine_cs *engine,
2688 struct i915_request **prev)
2690 struct drm_i915_gem_object *obj;
2691 struct intel_context *ce;
2692 struct i915_request *rq;
2693 struct i915_vma *vma;
2697 ce = intel_context_create(engine);
2701 obj = i915_gem_object_create_internal(engine->i915, 4096);
2707 vma = i915_vma_instance(obj, ce->vm, NULL);
2713 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2717 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2723 /* Semaphore target: spin until zero */
2724 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2726 *cs++ = MI_SEMAPHORE_WAIT |
2728 MI_SEMAPHORE_SAD_EQ_SDD;
2730 *cs++ = lower_32_bits(vma->node.start);
2731 *cs++ = upper_32_bits(vma->node.start);
2734 u64 offset = (*prev)->batch->node.start;
2736 /* Terminate the spinner in the next lower priority batch. */
2737 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2738 *cs++ = lower_32_bits(offset);
2739 *cs++ = upper_32_bits(offset);
2743 *cs++ = MI_BATCH_BUFFER_END;
2744 i915_gem_object_flush_map(obj);
2745 i915_gem_object_unpin_map(obj);
2747 rq = intel_context_create_request(ce);
2753 rq->batch = i915_vma_get(vma);
2754 i915_request_get(rq);
2757 err = i915_request_await_object(rq, vma->obj, false);
2759 err = i915_vma_move_to_active(vma, rq, 0);
2761 err = rq->engine->emit_bb_start(rq,
2764 i915_vma_unlock(vma);
2765 i915_request_add(rq);
2769 i915_gem_object_put(obj);
2770 intel_context_put(ce);
2772 rq->mock.link.next = &(*prev)->mock.link;
2777 i915_vma_put(rq->batch);
2778 i915_request_put(rq);
2780 i915_gem_object_put(obj);
2782 intel_context_put(ce);
2786 static int __live_preempt_ring(struct intel_engine_cs *engine,
2787 struct igt_spinner *spin,
2788 int queue_sz, int ring_sz)
2790 struct intel_context *ce[2] = {};
2791 struct i915_request *rq;
2792 struct igt_live_test t;
2796 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2799 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2800 struct intel_context *tmp;
2802 tmp = intel_context_create(engine);
2808 tmp->ring = __intel_context_ring_size(ring_sz);
2810 err = intel_context_pin(tmp);
2812 intel_context_put(tmp);
2816 memset32(tmp->ring->vaddr,
2817 0xdeadbeef, /* trigger a hang if executed */
2818 tmp->ring->vma->size / sizeof(u32));
2823 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2829 i915_request_get(rq);
2830 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2831 i915_request_add(rq);
2833 if (!igt_wait_for_spinner(spin, rq)) {
2834 intel_gt_set_wedged(engine->gt);
2835 i915_request_put(rq);
2840 /* Fill the ring, until we will cause a wrap */
2842 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2843 struct i915_request *tmp;
2845 tmp = intel_context_create_request(ce[0]);
2848 i915_request_put(rq);
2852 i915_request_add(tmp);
2853 intel_engine_flush_submission(engine);
2856 intel_engine_flush_submission(engine);
2857 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2858 engine->name, queue_sz, n,
2863 i915_request_put(rq);
2865 /* Create a second request to preempt the first ring */
2866 rq = intel_context_create_request(ce[1]);
2872 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2873 i915_request_get(rq);
2874 i915_request_add(rq);
2876 err = wait_for_submit(engine, rq, HZ / 2);
2877 i915_request_put(rq);
2879 pr_err("%s: preemption request was not submited\n",
2884 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2886 ce[0]->ring->tail, ce[0]->ring->emit,
2887 ce[1]->ring->tail, ce[1]->ring->emit);
2890 intel_engine_flush_submission(engine);
2891 igt_spinner_end(spin);
2892 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2893 if (IS_ERR_OR_NULL(ce[n]))
2896 intel_context_unpin(ce[n]);
2897 intel_context_put(ce[n]);
2899 if (igt_live_test_end(&t))
2904 static int live_preempt_ring(void *arg)
2906 struct intel_gt *gt = arg;
2907 struct intel_engine_cs *engine;
2908 struct igt_spinner spin;
2909 enum intel_engine_id id;
2913 * Check that we rollback large chunks of a ring in order to do a
2914 * preemption event. Similar to live_unlite_ring, but looking at
2915 * ring size rather than the impact of intel_ring_direction().
2918 if (igt_spinner_init(&spin, gt))
2921 for_each_engine(engine, gt, id) {
2924 if (!intel_engine_has_preemption(engine))
2927 if (!intel_engine_can_store_dword(engine))
2930 st_engine_heartbeat_disable(engine);
2932 for (n = 0; n <= 3; n++) {
2933 err = __live_preempt_ring(engine, &spin,
2934 n * SZ_4K / 4, SZ_4K);
2939 st_engine_heartbeat_enable(engine);
2944 igt_spinner_fini(&spin);
2948 static int live_preempt_gang(void *arg)
2950 struct intel_gt *gt = arg;
2951 struct intel_engine_cs *engine;
2952 enum intel_engine_id id;
2955 * Build as long a chain of preempters as we can, with each
2956 * request higher priority than the last. Once we are ready, we release
2957 * the last batch which then precolates down the chain, each releasing
2958 * the next oldest in turn. The intent is to simply push as hard as we
2959 * can with the number of preemptions, trying to exceed narrow HW
2960 * limits. At a minimum, we insist that we can sort all the user
2961 * high priority levels into execution order.
2964 for_each_engine(engine, gt, id) {
2965 struct i915_request *rq = NULL;
2966 struct igt_live_test t;
2967 IGT_TIMEOUT(end_time);
2972 if (!intel_engine_has_preemption(engine))
2975 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2979 struct i915_sched_attr attr = {
2980 .priority = I915_USER_PRIORITY(prio++),
2983 err = create_gang(engine, &rq);
2987 /* Submit each spinner at increasing priority */
2988 engine->schedule(rq, &attr);
2989 } while (prio <= I915_PRIORITY_MAX &&
2990 !__igt_timeout(end_time, NULL));
2991 pr_debug("%s: Preempt chain of %d requests\n",
2992 engine->name, prio);
2995 * Such that the last spinner is the highest priority and
2996 * should execute first. When that spinner completes,
2997 * it will terminate the next lowest spinner until there
2998 * are no more spinners and the gang is complete.
3000 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
3003 i915_gem_object_unpin_map(rq->batch->obj);
3006 intel_gt_set_wedged(gt);
3009 while (rq) { /* wait for each rq from highest to lowest prio */
3010 struct i915_request *n = list_next_entry(rq, mock.link);
3012 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3013 struct drm_printer p =
3014 drm_info_printer(engine->i915->drm.dev);
3016 pr_err("Failed to flush chain of %d requests, at %d\n",
3017 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
3018 intel_engine_dump(engine, &p,
3019 "%s\n", engine->name);
3024 i915_vma_put(rq->batch);
3025 i915_request_put(rq);
3029 if (igt_live_test_end(&t))
3038 static struct i915_vma *
3039 create_gpr_user(struct intel_engine_cs *engine,
3040 struct i915_vma *result,
3041 unsigned int offset)
3043 struct drm_i915_gem_object *obj;
3044 struct i915_vma *vma;
3049 obj = i915_gem_object_create_internal(engine->i915, 4096);
3051 return ERR_CAST(obj);
3053 vma = i915_vma_instance(obj, result->vm, NULL);
3055 i915_gem_object_put(obj);
3059 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3062 return ERR_PTR(err);
3065 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
3068 return ERR_CAST(cs);
3071 /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3072 *cs++ = MI_LOAD_REGISTER_IMM(1);
3073 *cs++ = CS_GPR(engine, 0);
3076 for (i = 1; i < NUM_GPR; i++) {
3082 * As we read and write into the context saved GPR[i], if
3083 * we restart this batch buffer from an earlier point, we
3084 * will repeat the increment and store a value > 1.
3087 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3088 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3089 *cs++ = MI_MATH_ADD;
3090 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3092 addr = result->node.start + offset + i * sizeof(*cs);
3093 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
3094 *cs++ = CS_GPR(engine, 2 * i);
3095 *cs++ = lower_32_bits(addr);
3096 *cs++ = upper_32_bits(addr);
3098 *cs++ = MI_SEMAPHORE_WAIT |
3100 MI_SEMAPHORE_SAD_GTE_SDD;
3102 *cs++ = lower_32_bits(result->node.start);
3103 *cs++ = upper_32_bits(result->node.start);
3106 *cs++ = MI_BATCH_BUFFER_END;
3107 i915_gem_object_flush_map(obj);
3108 i915_gem_object_unpin_map(obj);
3113 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3115 struct drm_i915_gem_object *obj;
3116 struct i915_vma *vma;
3119 obj = i915_gem_object_create_internal(gt->i915, sz);
3121 return ERR_CAST(obj);
3123 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
3125 i915_gem_object_put(obj);
3129 err = i915_ggtt_pin(vma, NULL, 0, 0);
3132 return ERR_PTR(err);
3138 static struct i915_request *
3139 create_gpr_client(struct intel_engine_cs *engine,
3140 struct i915_vma *global,
3141 unsigned int offset)
3143 struct i915_vma *batch, *vma;
3144 struct intel_context *ce;
3145 struct i915_request *rq;
3148 ce = intel_context_create(engine);
3150 return ERR_CAST(ce);
3152 vma = i915_vma_instance(global->obj, ce->vm, NULL);
3158 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3162 batch = create_gpr_user(engine, vma, offset);
3163 if (IS_ERR(batch)) {
3164 err = PTR_ERR(batch);
3168 rq = intel_context_create_request(ce);
3175 err = i915_request_await_object(rq, vma->obj, false);
3177 err = i915_vma_move_to_active(vma, rq, 0);
3178 i915_vma_unlock(vma);
3180 i915_vma_lock(batch);
3182 err = i915_request_await_object(rq, batch->obj, false);
3184 err = i915_vma_move_to_active(batch, rq, 0);
3186 err = rq->engine->emit_bb_start(rq,
3189 i915_vma_unlock(batch);
3190 i915_vma_unpin(batch);
3193 i915_request_get(rq);
3194 i915_request_add(rq);
3197 i915_vma_put(batch);
3199 i915_vma_unpin(vma);
3201 intel_context_put(ce);
3202 return err ? ERR_PTR(err) : rq;
3205 static int preempt_user(struct intel_engine_cs *engine,
3206 struct i915_vma *global,
3209 struct i915_sched_attr attr = {
3210 .priority = I915_PRIORITY_MAX
3212 struct i915_request *rq;
3216 rq = intel_engine_create_kernel_request(engine);
3220 cs = intel_ring_begin(rq, 4);
3222 i915_request_add(rq);
3226 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3227 *cs++ = i915_ggtt_offset(global);
3231 intel_ring_advance(rq, cs);
3233 i915_request_get(rq);
3234 i915_request_add(rq);
3236 engine->schedule(rq, &attr);
3238 if (i915_request_wait(rq, 0, HZ / 2) < 0)
3240 i915_request_put(rq);
3245 static int live_preempt_user(void *arg)
3247 struct intel_gt *gt = arg;
3248 struct intel_engine_cs *engine;
3249 struct i915_vma *global;
3250 enum intel_engine_id id;
3255 * In our other tests, we look at preemption in carefully
3256 * controlled conditions in the ringbuffer. Since most of the
3257 * time is spent in user batches, most of our preemptions naturally
3258 * occur there. We want to verify that when we preempt inside a batch
3259 * we continue on from the current instruction and do not roll back
3260 * to the start, or another earlier arbitration point.
3262 * To verify this, we create a batch which is a mixture of
3263 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3264 * a few preempting contexts thrown into the mix, we look for any
3265 * repeated instructions (which show up as incorrect values).
3268 global = create_global(gt, 4096);
3270 return PTR_ERR(global);
3272 result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
3273 if (IS_ERR(result)) {
3274 i915_vma_unpin_and_release(&global, 0);
3275 return PTR_ERR(result);
3278 for_each_engine(engine, gt, id) {
3279 struct i915_request *client[3] = {};
3280 struct igt_live_test t;
3283 if (!intel_engine_has_preemption(engine))
3286 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3287 continue; /* we need per-context GPR */
3289 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3294 memset(result, 0, 4096);
3296 for (i = 0; i < ARRAY_SIZE(client); i++) {
3297 struct i915_request *rq;
3299 rq = create_gpr_client(engine, global,
3300 NUM_GPR * i * sizeof(u32));
3309 /* Continuously preempt the set of 3 running contexts */
3310 for (i = 1; i <= NUM_GPR; i++) {
3311 err = preempt_user(engine, global, i);
3316 if (READ_ONCE(result[0]) != NUM_GPR) {
3317 pr_err("%s: Failed to release semaphore\n",
3323 for (i = 0; i < ARRAY_SIZE(client); i++) {
3326 if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3331 for (gpr = 1; gpr < NUM_GPR; gpr++) {
3332 if (result[NUM_GPR * i + gpr] != 1) {
3333 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3335 i, gpr, result[NUM_GPR * i + gpr]);
3343 for (i = 0; i < ARRAY_SIZE(client); i++) {
3347 i915_request_put(client[i]);
3350 /* Flush the semaphores on error */
3351 smp_store_mb(result[0], -1);
3352 if (igt_live_test_end(&t))
3358 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3362 static int live_preempt_timeout(void *arg)
3364 struct intel_gt *gt = arg;
3365 struct i915_gem_context *ctx_hi, *ctx_lo;
3366 struct igt_spinner spin_lo;
3367 struct intel_engine_cs *engine;
3368 enum intel_engine_id id;
3372 * Check that we force preemption to occur by cancelling the previous
3373 * context if it refuses to yield the GPU.
3375 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3378 if (!intel_has_reset_engine(gt))
3381 if (igt_spinner_init(&spin_lo, gt))
3384 ctx_hi = kernel_context(gt->i915);
3387 ctx_hi->sched.priority =
3388 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
3390 ctx_lo = kernel_context(gt->i915);
3393 ctx_lo->sched.priority =
3394 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
3396 for_each_engine(engine, gt, id) {
3397 unsigned long saved_timeout;
3398 struct i915_request *rq;
3400 if (!intel_engine_has_preemption(engine))
3403 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3404 MI_NOOP); /* preemption disabled */
3410 i915_request_add(rq);
3411 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3412 intel_gt_set_wedged(gt);
3417 rq = igt_request_alloc(ctx_hi, engine);
3419 igt_spinner_end(&spin_lo);
3424 /* Flush the previous CS ack before changing timeouts */
3425 while (READ_ONCE(engine->execlists.pending[0]))
3428 saved_timeout = engine->props.preempt_timeout_ms;
3429 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3431 i915_request_get(rq);
3432 i915_request_add(rq);
3434 intel_engine_flush_submission(engine);
3435 engine->props.preempt_timeout_ms = saved_timeout;
3437 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3438 intel_gt_set_wedged(gt);
3439 i915_request_put(rq);
3444 igt_spinner_end(&spin_lo);
3445 i915_request_put(rq);
3450 kernel_context_close(ctx_lo);
3452 kernel_context_close(ctx_hi);
3454 igt_spinner_fini(&spin_lo);
3458 static int random_range(struct rnd_state *rnd, int min, int max)
3460 return i915_prandom_u32_max_state(max - min, rnd) + min;
3463 static int random_priority(struct rnd_state *rnd)
3465 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3468 struct preempt_smoke {
3469 struct intel_gt *gt;
3470 struct i915_gem_context **contexts;
3471 struct intel_engine_cs *engine;
3472 struct drm_i915_gem_object *batch;
3473 unsigned int ncontext;
3474 struct rnd_state prng;
3475 unsigned long count;
3478 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3480 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3484 static int smoke_submit(struct preempt_smoke *smoke,
3485 struct i915_gem_context *ctx, int prio,
3486 struct drm_i915_gem_object *batch)
3488 struct i915_request *rq;
3489 struct i915_vma *vma = NULL;
3493 struct i915_address_space *vm;
3495 vm = i915_gem_context_get_vm_rcu(ctx);
3496 vma = i915_vma_instance(batch, vm, NULL);
3499 return PTR_ERR(vma);
3501 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3506 ctx->sched.priority = prio;
3508 rq = igt_request_alloc(ctx, smoke->engine);
3516 err = i915_request_await_object(rq, vma->obj, false);
3518 err = i915_vma_move_to_active(vma, rq, 0);
3520 err = rq->engine->emit_bb_start(rq,
3523 i915_vma_unlock(vma);
3526 i915_request_add(rq);
3530 i915_vma_unpin(vma);
3535 static int smoke_crescendo_thread(void *arg)
3537 struct preempt_smoke *smoke = arg;
3538 IGT_TIMEOUT(end_time);
3539 unsigned long count;
3543 struct i915_gem_context *ctx = smoke_context(smoke);
3546 err = smoke_submit(smoke,
3547 ctx, count % I915_PRIORITY_MAX,
3553 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3555 smoke->count = count;
3559 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3560 #define BATCH BIT(0)
3562 struct task_struct *tsk[I915_NUM_ENGINES] = {};
3563 struct preempt_smoke arg[I915_NUM_ENGINES];
3564 struct intel_engine_cs *engine;
3565 enum intel_engine_id id;
3566 unsigned long count;
3569 for_each_engine(engine, smoke->gt, id) {
3571 arg[id].engine = engine;
3572 if (!(flags & BATCH))
3573 arg[id].batch = NULL;
3576 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3577 "igt/smoke:%d", id);
3578 if (IS_ERR(tsk[id])) {
3579 err = PTR_ERR(tsk[id]);
3582 get_task_struct(tsk[id]);
3585 yield(); /* start all threads before we kthread_stop() */
3588 for_each_engine(engine, smoke->gt, id) {
3591 if (IS_ERR_OR_NULL(tsk[id]))
3594 status = kthread_stop(tsk[id]);
3598 count += arg[id].count;
3600 put_task_struct(tsk[id]);
3603 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3604 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3608 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3610 enum intel_engine_id id;
3611 IGT_TIMEOUT(end_time);
3612 unsigned long count;
3616 for_each_engine(smoke->engine, smoke->gt, id) {
3617 struct i915_gem_context *ctx = smoke_context(smoke);
3620 err = smoke_submit(smoke,
3621 ctx, random_priority(&smoke->prng),
3622 flags & BATCH ? smoke->batch : NULL);
3628 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3630 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3631 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3635 static int live_preempt_smoke(void *arg)
3637 struct preempt_smoke smoke = {
3639 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3642 const unsigned int phase[] = { 0, BATCH };
3643 struct igt_live_test t;
3648 smoke.contexts = kmalloc_array(smoke.ncontext,
3649 sizeof(*smoke.contexts),
3651 if (!smoke.contexts)
3655 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3656 if (IS_ERR(smoke.batch)) {
3657 err = PTR_ERR(smoke.batch);
3661 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3666 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3667 cs[n] = MI_ARB_CHECK;
3668 cs[n] = MI_BATCH_BUFFER_END;
3669 i915_gem_object_flush_map(smoke.batch);
3670 i915_gem_object_unpin_map(smoke.batch);
3672 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3677 for (n = 0; n < smoke.ncontext; n++) {
3678 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3679 if (!smoke.contexts[n])
3683 for (n = 0; n < ARRAY_SIZE(phase); n++) {
3684 err = smoke_crescendo(&smoke, phase[n]);
3688 err = smoke_random(&smoke, phase[n]);
3694 if (igt_live_test_end(&t))
3697 for (n = 0; n < smoke.ncontext; n++) {
3698 if (!smoke.contexts[n])
3700 kernel_context_close(smoke.contexts[n]);
3704 i915_gem_object_put(smoke.batch);
3706 kfree(smoke.contexts);
3711 static int nop_virtual_engine(struct intel_gt *gt,
3712 struct intel_engine_cs **siblings,
3713 unsigned int nsibling,
3716 #define CHAIN BIT(0)
3718 IGT_TIMEOUT(end_time);
3719 struct i915_request *request[16] = {};
3720 struct intel_context *ve[16];
3721 unsigned long n, prime, nc;
3722 struct igt_live_test t;
3723 ktime_t times[2] = {};
3726 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3728 for (n = 0; n < nctx; n++) {
3729 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3730 if (IS_ERR(ve[n])) {
3731 err = PTR_ERR(ve[n]);
3736 err = intel_context_pin(ve[n]);
3738 intel_context_put(ve[n]);
3744 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3748 for_each_prime_number_from(prime, 1, 8192) {
3749 times[1] = ktime_get_raw();
3751 if (flags & CHAIN) {
3752 for (nc = 0; nc < nctx; nc++) {
3753 for (n = 0; n < prime; n++) {
3754 struct i915_request *rq;
3756 rq = i915_request_create(ve[nc]);
3763 i915_request_put(request[nc]);
3764 request[nc] = i915_request_get(rq);
3765 i915_request_add(rq);
3769 for (n = 0; n < prime; n++) {
3770 for (nc = 0; nc < nctx; nc++) {
3771 struct i915_request *rq;
3773 rq = i915_request_create(ve[nc]);
3780 i915_request_put(request[nc]);
3781 request[nc] = i915_request_get(rq);
3782 i915_request_add(rq);
3787 for (nc = 0; nc < nctx; nc++) {
3788 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3789 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3790 __func__, ve[0]->engine->name,
3791 request[nc]->fence.context,
3792 request[nc]->fence.seqno);
3794 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3795 __func__, ve[0]->engine->name,
3796 request[nc]->fence.context,
3797 request[nc]->fence.seqno);
3799 intel_gt_set_wedged(gt);
3804 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3806 times[0] = times[1];
3808 for (nc = 0; nc < nctx; nc++) {
3809 i915_request_put(request[nc]);
3813 if (__igt_timeout(end_time, NULL))
3817 err = igt_live_test_end(&t);
3821 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3822 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3823 prime, div64_u64(ktime_to_ns(times[1]), prime));
3826 if (igt_flush_test(gt->i915))
3829 for (nc = 0; nc < nctx; nc++) {
3830 i915_request_put(request[nc]);
3831 intel_context_unpin(ve[nc]);
3832 intel_context_put(ve[nc]);
3838 __select_siblings(struct intel_gt *gt,
3840 struct intel_engine_cs **siblings,
3841 bool (*filter)(const struct intel_engine_cs *))
3846 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3847 if (!gt->engine_class[class][inst])
3850 if (filter && !filter(gt->engine_class[class][inst]))
3853 siblings[n++] = gt->engine_class[class][inst];
3860 select_siblings(struct intel_gt *gt,
3862 struct intel_engine_cs **siblings)
3864 return __select_siblings(gt, class, siblings, NULL);
3867 static int live_virtual_engine(void *arg)
3869 struct intel_gt *gt = arg;
3870 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3871 struct intel_engine_cs *engine;
3872 enum intel_engine_id id;
3876 if (intel_uc_uses_guc_submission(>->uc))
3879 for_each_engine(engine, gt, id) {
3880 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3882 pr_err("Failed to wrap engine %s: err=%d\n",
3888 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3891 nsibling = select_siblings(gt, class, siblings);
3895 for (n = 1; n <= nsibling + 1; n++) {
3896 err = nop_virtual_engine(gt, siblings, nsibling,
3902 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3910 static int mask_virtual_engine(struct intel_gt *gt,
3911 struct intel_engine_cs **siblings,
3912 unsigned int nsibling)
3914 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3915 struct intel_context *ve;
3916 struct igt_live_test t;
3921 * Check that by setting the execution mask on a request, we can
3922 * restrict it to our desired engine within the virtual engine.
3925 ve = intel_execlists_create_virtual(siblings, nsibling);
3931 err = intel_context_pin(ve);
3935 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3939 for (n = 0; n < nsibling; n++) {
3940 request[n] = i915_request_create(ve);
3941 if (IS_ERR(request[n])) {
3942 err = PTR_ERR(request[n]);
3947 /* Reverse order as it's more likely to be unnatural */
3948 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3950 i915_request_get(request[n]);
3951 i915_request_add(request[n]);
3954 for (n = 0; n < nsibling; n++) {
3955 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3956 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3957 __func__, ve->engine->name,
3958 request[n]->fence.context,
3959 request[n]->fence.seqno);
3961 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3962 __func__, ve->engine->name,
3963 request[n]->fence.context,
3964 request[n]->fence.seqno);
3966 intel_gt_set_wedged(gt);
3971 if (request[n]->engine != siblings[nsibling - n - 1]) {
3972 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3973 request[n]->engine->name,
3974 siblings[nsibling - n - 1]->name);
3980 err = igt_live_test_end(&t);
3982 if (igt_flush_test(gt->i915))
3985 for (n = 0; n < nsibling; n++)
3986 i915_request_put(request[n]);
3989 intel_context_unpin(ve);
3991 intel_context_put(ve);
3996 static int live_virtual_mask(void *arg)
3998 struct intel_gt *gt = arg;
3999 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4003 if (intel_uc_uses_guc_submission(>->uc))
4006 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4007 unsigned int nsibling;
4009 nsibling = select_siblings(gt, class, siblings);
4013 err = mask_virtual_engine(gt, siblings, nsibling);
4021 static int slicein_virtual_engine(struct intel_gt *gt,
4022 struct intel_engine_cs **siblings,
4023 unsigned int nsibling)
4025 const long timeout = slice_timeout(siblings[0]);
4026 struct intel_context *ce;
4027 struct i915_request *rq;
4028 struct igt_spinner spin;
4033 * Virtual requests must take part in timeslicing on the target engines.
4036 if (igt_spinner_init(&spin, gt))
4039 for (n = 0; n < nsibling; n++) {
4040 ce = intel_context_create(siblings[n]);
4046 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4047 intel_context_put(ce);
4053 i915_request_add(rq);
4056 ce = intel_execlists_create_virtual(siblings, nsibling);
4062 rq = intel_context_create_request(ce);
4063 intel_context_put(ce);
4069 i915_request_get(rq);
4070 i915_request_add(rq);
4071 if (i915_request_wait(rq, 0, timeout) < 0) {
4072 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4073 __func__, rq->engine->name);
4075 intel_gt_set_wedged(gt);
4078 i915_request_put(rq);
4081 igt_spinner_end(&spin);
4082 if (igt_flush_test(gt->i915))
4084 igt_spinner_fini(&spin);
4088 static int sliceout_virtual_engine(struct intel_gt *gt,
4089 struct intel_engine_cs **siblings,
4090 unsigned int nsibling)
4092 const long timeout = slice_timeout(siblings[0]);
4093 struct intel_context *ce;
4094 struct i915_request *rq;
4095 struct igt_spinner spin;
4100 * Virtual requests must allow others a fair timeslice.
4103 if (igt_spinner_init(&spin, gt))
4106 /* XXX We do not handle oversubscription and fairness with normal rq */
4107 for (n = 0; n < nsibling; n++) {
4108 ce = intel_execlists_create_virtual(siblings, nsibling);
4114 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4115 intel_context_put(ce);
4121 i915_request_add(rq);
4124 for (n = 0; !err && n < nsibling; n++) {
4125 ce = intel_context_create(siblings[n]);
4131 rq = intel_context_create_request(ce);
4132 intel_context_put(ce);
4138 i915_request_get(rq);
4139 i915_request_add(rq);
4140 if (i915_request_wait(rq, 0, timeout) < 0) {
4141 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4142 __func__, siblings[n]->name);
4144 intel_gt_set_wedged(gt);
4147 i915_request_put(rq);
4151 igt_spinner_end(&spin);
4152 if (igt_flush_test(gt->i915))
4154 igt_spinner_fini(&spin);
4158 static int live_virtual_slice(void *arg)
4160 struct intel_gt *gt = arg;
4161 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4165 if (intel_uc_uses_guc_submission(>->uc))
4168 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4169 unsigned int nsibling;
4171 nsibling = __select_siblings(gt, class, siblings,
4172 intel_engine_has_timeslices);
4176 err = slicein_virtual_engine(gt, siblings, nsibling);
4180 err = sliceout_virtual_engine(gt, siblings, nsibling);
4188 static int preserved_virtual_engine(struct intel_gt *gt,
4189 struct intel_engine_cs **siblings,
4190 unsigned int nsibling)
4192 struct i915_request *last = NULL;
4193 struct intel_context *ve;
4194 struct i915_vma *scratch;
4195 struct igt_live_test t;
4200 scratch = __vm_create_scratch_for_read(&siblings[0]->gt->ggtt->vm,
4202 if (IS_ERR(scratch))
4203 return PTR_ERR(scratch);
4205 err = i915_vma_sync(scratch);
4209 ve = intel_execlists_create_virtual(siblings, nsibling);
4215 err = intel_context_pin(ve);
4219 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4223 for (n = 0; n < NUM_GPR_DW; n++) {
4224 struct intel_engine_cs *engine = siblings[n % nsibling];
4225 struct i915_request *rq;
4227 rq = i915_request_create(ve);
4233 i915_request_put(last);
4234 last = i915_request_get(rq);
4236 cs = intel_ring_begin(rq, 8);
4238 i915_request_add(rq);
4243 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4244 *cs++ = CS_GPR(engine, n);
4245 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4248 *cs++ = MI_LOAD_REGISTER_IMM(1);
4249 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4253 intel_ring_advance(rq, cs);
4255 /* Restrict this request to run on a particular engine */
4256 rq->execution_mask = engine->mask;
4257 i915_request_add(rq);
4260 if (i915_request_wait(last, 0, HZ / 5) < 0) {
4265 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4271 for (n = 0; n < NUM_GPR_DW; n++) {
4273 pr_err("Incorrect value[%d] found for GPR[%d]\n",
4280 i915_gem_object_unpin_map(scratch->obj);
4283 if (igt_live_test_end(&t))
4285 i915_request_put(last);
4287 intel_context_unpin(ve);
4289 intel_context_put(ve);
4291 i915_vma_unpin_and_release(&scratch, 0);
4295 static int live_virtual_preserved(void *arg)
4297 struct intel_gt *gt = arg;
4298 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4302 * Check that the context image retains non-privileged (user) registers
4303 * from one engine to the next. For this we check that the CS_GPR
4307 if (intel_uc_uses_guc_submission(>->uc))
4310 /* As we use CS_GPR we cannot run before they existed on all engines. */
4311 if (INTEL_GEN(gt->i915) < 9)
4314 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4317 nsibling = select_siblings(gt, class, siblings);
4321 err = preserved_virtual_engine(gt, siblings, nsibling);
4329 static int bond_virtual_engine(struct intel_gt *gt,
4331 struct intel_engine_cs **siblings,
4332 unsigned int nsibling,
4334 #define BOND_SCHEDULE BIT(0)
4336 struct intel_engine_cs *master;
4337 struct i915_request *rq[16];
4338 enum intel_engine_id id;
4339 struct igt_spinner spin;
4344 * A set of bonded requests is intended to be run concurrently
4345 * across a number of engines. We use one request per-engine
4346 * and a magic fence to schedule each of the bonded requests
4347 * at the same time. A consequence of our current scheduler is that
4348 * we only move requests to the HW ready queue when the request
4349 * becomes ready, that is when all of its prerequisite fences have
4350 * been signaled. As one of those fences is the master submit fence,
4351 * there is a delay on all secondary fences as the HW may be
4352 * currently busy. Equally, as all the requests are independent,
4353 * they may have other fences that delay individual request
4354 * submission to HW. Ergo, we do not guarantee that all requests are
4355 * immediately submitted to HW at the same time, just that if the
4356 * rules are abided by, they are ready at the same time as the
4357 * first is submitted. Userspace can embed semaphores in its batch
4358 * to ensure parallel execution of its phases as it requires.
4359 * Though naturally it gets requested that perhaps the scheduler should
4360 * take care of parallel execution, even across preemption events on
4361 * different HW. (The proper answer is of course "lalalala".)
4363 * With the submit-fence, we have identified three possible phases
4364 * of synchronisation depending on the master fence: queued (not
4365 * ready), executing, and signaled. The first two are quite simple
4366 * and checked below. However, the signaled master fence handling is
4367 * contentious. Currently we do not distinguish between a signaled
4368 * fence and an expired fence, as once signaled it does not convey
4369 * any information about the previous execution. It may even be freed
4370 * and hence checking later it may not exist at all. Ergo we currently
4371 * do not apply the bonding constraint for an already signaled fence,
4372 * as our expectation is that it should not constrain the secondaries
4373 * and is outside of the scope of the bonded request API (i.e. all
4374 * userspace requests are meant to be running in parallel). As
4375 * it imposes no constraint, and is effectively a no-op, we do not
4376 * check below as normal execution flows are checked extensively above.
4378 * XXX Is the degenerate handling of signaled submit fences the
4379 * expected behaviour for userpace?
4382 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4384 if (igt_spinner_init(&spin, gt))
4388 rq[0] = ERR_PTR(-ENOMEM);
4389 for_each_engine(master, gt, id) {
4390 struct i915_sw_fence fence = {};
4391 struct intel_context *ce;
4393 if (master->class == class)
4396 ce = intel_context_create(master);
4402 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4404 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4405 intel_context_put(ce);
4406 if (IS_ERR(rq[0])) {
4407 err = PTR_ERR(rq[0]);
4410 i915_request_get(rq[0]);
4412 if (flags & BOND_SCHEDULE) {
4413 onstack_fence_init(&fence);
4414 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4419 i915_request_add(rq[0]);
4423 if (!(flags & BOND_SCHEDULE) &&
4424 !igt_wait_for_spinner(&spin, rq[0])) {
4429 for (n = 0; n < nsibling; n++) {
4430 struct intel_context *ve;
4432 ve = intel_execlists_create_virtual(siblings, nsibling);
4435 onstack_fence_fini(&fence);
4439 err = intel_virtual_engine_attach_bond(ve->engine,
4443 intel_context_put(ve);
4444 onstack_fence_fini(&fence);
4448 err = intel_context_pin(ve);
4449 intel_context_put(ve);
4451 onstack_fence_fini(&fence);
4455 rq[n + 1] = i915_request_create(ve);
4456 intel_context_unpin(ve);
4457 if (IS_ERR(rq[n + 1])) {
4458 err = PTR_ERR(rq[n + 1]);
4459 onstack_fence_fini(&fence);
4462 i915_request_get(rq[n + 1]);
4464 err = i915_request_await_execution(rq[n + 1],
4466 ve->engine->bond_execute);
4467 i915_request_add(rq[n + 1]);
4469 onstack_fence_fini(&fence);
4473 onstack_fence_fini(&fence);
4474 intel_engine_flush_submission(master);
4475 igt_spinner_end(&spin);
4477 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4478 pr_err("Master request did not execute (on %s)!\n",
4479 rq[0]->engine->name);
4484 for (n = 0; n < nsibling; n++) {
4485 if (i915_request_wait(rq[n + 1], 0,
4486 MAX_SCHEDULE_TIMEOUT) < 0) {
4491 if (rq[n + 1]->engine != siblings[n]) {
4492 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4494 rq[n + 1]->engine->name,
4495 rq[0]->engine->name);
4501 for (n = 0; !IS_ERR(rq[n]); n++)
4502 i915_request_put(rq[n]);
4503 rq[0] = ERR_PTR(-ENOMEM);
4507 for (n = 0; !IS_ERR(rq[n]); n++)
4508 i915_request_put(rq[n]);
4509 if (igt_flush_test(gt->i915))
4512 igt_spinner_fini(&spin);
4516 static int live_virtual_bond(void *arg)
4518 static const struct phase {
4523 { "schedule", BOND_SCHEDULE },
4526 struct intel_gt *gt = arg;
4527 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4531 if (intel_uc_uses_guc_submission(>->uc))
4534 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4535 const struct phase *p;
4538 nsibling = select_siblings(gt, class, siblings);
4542 for (p = phases; p->name; p++) {
4543 err = bond_virtual_engine(gt,
4544 class, siblings, nsibling,
4547 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4548 __func__, p->name, class, nsibling, err);
4557 static int reset_virtual_engine(struct intel_gt *gt,
4558 struct intel_engine_cs **siblings,
4559 unsigned int nsibling)
4561 struct intel_engine_cs *engine;
4562 struct intel_context *ve;
4563 struct igt_spinner spin;
4564 struct i915_request *rq;
4569 * In order to support offline error capture for fast preempt reset,
4570 * we need to decouple the guilty request and ensure that it and its
4571 * descendents are not executed while the capture is in progress.
4574 if (igt_spinner_init(&spin, gt))
4577 ve = intel_execlists_create_virtual(siblings, nsibling);
4583 for (n = 0; n < nsibling; n++)
4584 st_engine_heartbeat_disable(siblings[n]);
4586 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4591 i915_request_add(rq);
4593 if (!igt_wait_for_spinner(&spin, rq)) {
4594 intel_gt_set_wedged(gt);
4599 engine = rq->engine;
4600 GEM_BUG_ON(engine == ve->engine);
4602 /* Take ownership of the reset and tasklet */
4604 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4605 >->reset.flags)) {
4607 intel_gt_set_wedged(gt);
4611 tasklet_disable(&engine->execlists.tasklet);
4613 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
4614 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4616 /* Fake a preemption event; failed of course */
4617 spin_lock_irq(&engine->active.lock);
4618 __unwind_incomplete_requests(engine);
4619 spin_unlock_irq(&engine->active.lock);
4620 GEM_BUG_ON(rq->engine != engine);
4622 /* Reset the engine while keeping our active request on hold */
4623 execlists_hold(engine, rq);
4624 GEM_BUG_ON(!i915_request_on_hold(rq));
4626 __intel_engine_reset_bh(engine, NULL);
4627 GEM_BUG_ON(rq->fence.error != -EIO);
4629 /* Release our grasp on the engine, letting CS flow again */
4630 tasklet_enable(&engine->execlists.tasklet);
4631 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags);
4634 /* Check that we do not resubmit the held request */
4635 i915_request_get(rq);
4636 if (!i915_request_wait(rq, 0, HZ / 5)) {
4637 pr_err("%s: on hold request completed!\n",
4639 intel_gt_set_wedged(gt);
4643 GEM_BUG_ON(!i915_request_on_hold(rq));
4645 /* But is resubmitted on release */
4646 execlists_unhold(engine, rq);
4647 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4648 pr_err("%s: held request did not complete!\n",
4650 intel_gt_set_wedged(gt);
4655 i915_request_put(rq);
4657 for (n = 0; n < nsibling; n++)
4658 st_engine_heartbeat_enable(siblings[n]);
4660 intel_context_put(ve);
4662 igt_spinner_fini(&spin);
4666 static int live_virtual_reset(void *arg)
4668 struct intel_gt *gt = arg;
4669 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4673 * Check that we handle a reset event within a virtual engine.
4674 * Only the physical engine is reset, but we have to check the flow
4675 * of the virtual requests around the reset, and make sure it is not
4679 if (intel_uc_uses_guc_submission(>->uc))
4682 if (!intel_has_reset_engine(gt))
4685 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4688 nsibling = select_siblings(gt, class, siblings);
4692 err = reset_virtual_engine(gt, siblings, nsibling);
4700 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4702 static const struct i915_subtest tests[] = {
4703 SUBTEST(live_sanitycheck),
4704 SUBTEST(live_unlite_switch),
4705 SUBTEST(live_unlite_preempt),
4706 SUBTEST(live_unlite_ring),
4707 SUBTEST(live_pin_rewind),
4708 SUBTEST(live_hold_reset),
4709 SUBTEST(live_error_interrupt),
4710 SUBTEST(live_timeslice_preempt),
4711 SUBTEST(live_timeslice_rewind),
4712 SUBTEST(live_timeslice_queue),
4713 SUBTEST(live_timeslice_nopreempt),
4714 SUBTEST(live_busywait_preempt),
4715 SUBTEST(live_preempt),
4716 SUBTEST(live_late_preempt),
4717 SUBTEST(live_nopreempt),
4718 SUBTEST(live_preempt_cancel),
4719 SUBTEST(live_suppress_self_preempt),
4720 SUBTEST(live_chain_preempt),
4721 SUBTEST(live_preempt_ring),
4722 SUBTEST(live_preempt_gang),
4723 SUBTEST(live_preempt_timeout),
4724 SUBTEST(live_preempt_user),
4725 SUBTEST(live_preempt_smoke),
4726 SUBTEST(live_virtual_engine),
4727 SUBTEST(live_virtual_mask),
4728 SUBTEST(live_virtual_preserved),
4729 SUBTEST(live_virtual_slice),
4730 SUBTEST(live_virtual_bond),
4731 SUBTEST(live_virtual_reset),
4734 if (!HAS_EXECLISTS(i915))
4737 if (intel_gt_is_wedged(&i915->gt))
4740 return intel_gt_live_subtests(tests, &i915->gt);