1 // SPDX-License-Identifier: MIT
3 * Copyright © 2018 Intel Corporation
6 #include <linux/prime_numbers.h>
8 #include "gem/i915_gem_pm.h"
9 #include "gt/intel_engine_heartbeat.h"
10 #include "gt/intel_reset.h"
11 #include "gt/selftest_engine_heartbeat.h"
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
27 static bool is_active(struct i915_request *rq)
29 if (i915_request_is_active(rq))
32 if (i915_request_on_hold(rq))
35 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
41 static int wait_for_submit(struct intel_engine_cs *engine,
42 struct i915_request *rq,
43 unsigned long timeout)
45 /* Ignore our own attempts to suppress excess tasklets */
46 tasklet_hi_schedule(&engine->execlists.tasklet);
50 bool done = time_after(jiffies, timeout);
52 if (i915_request_completed(rq)) /* that was quick! */
55 /* Wait until the HW has acknowleged the submission (or err) */
56 intel_engine_flush_submission(engine);
57 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
67 static int wait_for_reset(struct intel_engine_cs *engine,
68 struct i915_request *rq,
69 unsigned long timeout)
75 intel_engine_flush_submission(engine);
77 if (READ_ONCE(engine->execlists.pending[0]))
80 if (i915_request_completed(rq))
83 if (READ_ONCE(rq->fence.error))
85 } while (time_before(jiffies, timeout));
87 flush_scheduled_work();
89 if (rq->fence.error != -EIO) {
90 pr_err("%s: hanging request %llx:%lld not reset\n",
97 /* Give the request a jiffie to complete after flushing the worker */
98 if (i915_request_wait(rq, 0,
99 max(0l, (long)(timeout - jiffies)) + 1) < 0) {
100 pr_err("%s: hanging request %llx:%lld did not complete\n",
110 static int live_sanitycheck(void *arg)
112 struct intel_gt *gt = arg;
113 struct intel_engine_cs *engine;
114 enum intel_engine_id id;
115 struct igt_spinner spin;
118 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
121 if (igt_spinner_init(&spin, gt))
124 for_each_engine(engine, gt, id) {
125 struct intel_context *ce;
126 struct i915_request *rq;
128 ce = intel_context_create(engine);
134 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
140 i915_request_add(rq);
141 if (!igt_wait_for_spinner(&spin, rq)) {
142 GEM_TRACE("spinner failed to start\n");
144 intel_gt_set_wedged(gt);
149 igt_spinner_end(&spin);
150 if (igt_flush_test(gt->i915)) {
156 intel_context_put(ce);
161 igt_spinner_fini(&spin);
165 static int live_unlite_restore(struct intel_gt *gt, int prio)
167 struct intel_engine_cs *engine;
168 enum intel_engine_id id;
169 struct igt_spinner spin;
173 * Check that we can correctly context switch between 2 instances
174 * on the same engine from the same parent context.
177 if (igt_spinner_init(&spin, gt))
181 for_each_engine(engine, gt, id) {
182 struct intel_context *ce[2] = {};
183 struct i915_request *rq[2];
184 struct igt_live_test t;
187 if (prio && !intel_engine_has_preemption(engine))
190 if (!intel_engine_can_store_dword(engine))
193 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
197 st_engine_heartbeat_disable(engine);
199 for (n = 0; n < ARRAY_SIZE(ce); n++) {
200 struct intel_context *tmp;
202 tmp = intel_context_create(engine);
208 err = intel_context_pin(tmp);
210 intel_context_put(tmp);
215 * Setup the pair of contexts such that if we
216 * lite-restore using the RING_TAIL from ce[1] it
217 * will execute garbage from ce[0]->ring.
219 memset(tmp->ring->vaddr,
220 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
221 tmp->ring->vma->size);
225 GEM_BUG_ON(!ce[1]->ring->size);
226 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
227 lrc_update_regs(ce[1], engine, ce[1]->ring->head);
229 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
231 err = PTR_ERR(rq[0]);
235 i915_request_get(rq[0]);
236 i915_request_add(rq[0]);
237 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
239 if (!igt_wait_for_spinner(&spin, rq[0])) {
240 i915_request_put(rq[0]);
244 rq[1] = i915_request_create(ce[1]);
246 err = PTR_ERR(rq[1]);
247 i915_request_put(rq[0]);
253 * Ensure we do the switch to ce[1] on completion.
255 * rq[0] is already submitted, so this should reduce
256 * to a no-op (a wait on a request on the same engine
257 * uses the submit fence, not the completion fence),
258 * but it will install a dependency on rq[1] for rq[0]
259 * that will prevent the pair being reordered by
262 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
265 i915_request_get(rq[1]);
266 i915_request_add(rq[1]);
267 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
268 i915_request_put(rq[0]);
271 struct i915_sched_attr attr = {
275 /* Alternatively preempt the spinner with ce[1] */
276 engine->schedule(rq[1], &attr);
279 /* And switch back to ce[0] for good measure */
280 rq[0] = i915_request_create(ce[0]);
282 err = PTR_ERR(rq[0]);
283 i915_request_put(rq[1]);
287 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
288 i915_request_get(rq[0]);
289 i915_request_add(rq[0]);
290 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
291 i915_request_put(rq[1]);
292 i915_request_put(rq[0]);
295 intel_engine_flush_submission(engine);
296 igt_spinner_end(&spin);
297 for (n = 0; n < ARRAY_SIZE(ce); n++) {
298 if (IS_ERR_OR_NULL(ce[n]))
301 intel_context_unpin(ce[n]);
302 intel_context_put(ce[n]);
305 st_engine_heartbeat_enable(engine);
306 if (igt_live_test_end(&t))
312 igt_spinner_fini(&spin);
316 static int live_unlite_switch(void *arg)
318 return live_unlite_restore(arg, 0);
321 static int live_unlite_preempt(void *arg)
323 return live_unlite_restore(arg, I915_PRIORITY_MAX);
326 static int live_unlite_ring(void *arg)
328 struct intel_gt *gt = arg;
329 struct intel_engine_cs *engine;
330 struct igt_spinner spin;
331 enum intel_engine_id id;
335 * Setup a preemption event that will cause almost the entire ring
336 * to be unwound, potentially fooling our intel_ring_direction()
337 * into emitting a forward lite-restore instead of the rollback.
340 if (igt_spinner_init(&spin, gt))
343 for_each_engine(engine, gt, id) {
344 struct intel_context *ce[2] = {};
345 struct i915_request *rq;
346 struct igt_live_test t;
349 if (!intel_engine_has_preemption(engine))
352 if (!intel_engine_can_store_dword(engine))
355 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
359 st_engine_heartbeat_disable(engine);
361 for (n = 0; n < ARRAY_SIZE(ce); n++) {
362 struct intel_context *tmp;
364 tmp = intel_context_create(engine);
370 err = intel_context_pin(tmp);
372 intel_context_put(tmp);
376 memset32(tmp->ring->vaddr,
377 0xdeadbeef, /* trigger a hang if executed */
378 tmp->ring->vma->size / sizeof(u32));
383 /* Create max prio spinner, followed by N low prio nops */
384 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
390 i915_request_get(rq);
391 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
392 i915_request_add(rq);
394 if (!igt_wait_for_spinner(&spin, rq)) {
395 intel_gt_set_wedged(gt);
396 i915_request_put(rq);
401 /* Fill the ring, until we will cause a wrap */
403 while (intel_ring_direction(ce[0]->ring,
405 ce[0]->ring->tail) <= 0) {
406 struct i915_request *tmp;
408 tmp = intel_context_create_request(ce[0]);
411 i915_request_put(rq);
415 i915_request_add(tmp);
416 intel_engine_flush_submission(engine);
419 intel_engine_flush_submission(engine);
420 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
426 GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
428 ce[0]->ring->tail) <= 0);
429 i915_request_put(rq);
431 /* Create a second ring to preempt the first ring after rq[0] */
432 rq = intel_context_create_request(ce[1]);
438 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
439 i915_request_get(rq);
440 i915_request_add(rq);
442 err = wait_for_submit(engine, rq, HZ / 2);
443 i915_request_put(rq);
445 pr_err("%s: preemption request was not submitted\n",
450 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
452 ce[0]->ring->tail, ce[0]->ring->emit,
453 ce[1]->ring->tail, ce[1]->ring->emit);
456 intel_engine_flush_submission(engine);
457 igt_spinner_end(&spin);
458 for (n = 0; n < ARRAY_SIZE(ce); n++) {
459 if (IS_ERR_OR_NULL(ce[n]))
462 intel_context_unpin(ce[n]);
463 intel_context_put(ce[n]);
465 st_engine_heartbeat_enable(engine);
466 if (igt_live_test_end(&t))
472 igt_spinner_fini(&spin);
476 static int live_pin_rewind(void *arg)
478 struct intel_gt *gt = arg;
479 struct intel_engine_cs *engine;
480 enum intel_engine_id id;
484 * We have to be careful not to trust intel_ring too much, for example
485 * ring->head is updated upon retire which is out of sync with pinning
486 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
487 * or else we risk writing an older, stale value.
489 * To simulate this, let's apply a bit of deliberate sabotague.
492 for_each_engine(engine, gt, id) {
493 struct intel_context *ce;
494 struct i915_request *rq;
495 struct intel_ring *ring;
496 struct igt_live_test t;
498 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
503 ce = intel_context_create(engine);
509 err = intel_context_pin(ce);
511 intel_context_put(ce);
515 /* Keep the context awake while we play games */
516 err = i915_active_acquire(&ce->active);
518 intel_context_unpin(ce);
519 intel_context_put(ce);
524 /* Poison the ring, and offset the next request from HEAD */
525 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
526 ring->emit = ring->size / 2;
527 ring->tail = ring->emit;
528 GEM_BUG_ON(ring->head);
530 intel_context_unpin(ce);
532 /* Submit a simple nop request */
533 GEM_BUG_ON(intel_context_is_pinned(ce));
534 rq = intel_context_create_request(ce);
535 i915_active_release(&ce->active); /* e.g. async retire */
536 intel_context_put(ce);
541 GEM_BUG_ON(!rq->head);
542 i915_request_add(rq);
544 /* Expect not to hang! */
545 if (igt_live_test_end(&t)) {
554 static int live_hold_reset(void *arg)
556 struct intel_gt *gt = arg;
557 struct intel_engine_cs *engine;
558 enum intel_engine_id id;
559 struct igt_spinner spin;
563 * In order to support offline error capture for fast preempt reset,
564 * we need to decouple the guilty request and ensure that it and its
565 * descendents are not executed while the capture is in progress.
568 if (!intel_has_reset_engine(gt))
571 if (igt_spinner_init(&spin, gt))
574 for_each_engine(engine, gt, id) {
575 struct intel_context *ce;
576 struct i915_request *rq;
578 ce = intel_context_create(engine);
584 st_engine_heartbeat_disable(engine);
586 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
591 i915_request_add(rq);
593 if (!igt_wait_for_spinner(&spin, rq)) {
594 intel_gt_set_wedged(gt);
599 /* We have our request executing, now remove it and reset */
602 if (test_and_set_bit(I915_RESET_ENGINE + id,
605 intel_gt_set_wedged(gt);
609 tasklet_disable(&engine->execlists.tasklet);
611 engine->execlists.tasklet.callback(&engine->execlists.tasklet);
612 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
614 i915_request_get(rq);
615 execlists_hold(engine, rq);
616 GEM_BUG_ON(!i915_request_on_hold(rq));
618 __intel_engine_reset_bh(engine, NULL);
619 GEM_BUG_ON(rq->fence.error != -EIO);
621 tasklet_enable(&engine->execlists.tasklet);
622 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
626 /* Check that we do not resubmit the held request */
627 if (!i915_request_wait(rq, 0, HZ / 5)) {
628 pr_err("%s: on hold request completed!\n",
630 i915_request_put(rq);
634 GEM_BUG_ON(!i915_request_on_hold(rq));
636 /* But is resubmitted on release */
637 execlists_unhold(engine, rq);
638 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
639 pr_err("%s: held request did not complete!\n",
641 intel_gt_set_wedged(gt);
644 i915_request_put(rq);
647 st_engine_heartbeat_enable(engine);
648 intel_context_put(ce);
653 igt_spinner_fini(&spin);
657 static const char *error_repr(int err)
659 return err ? "bad" : "good";
662 static int live_error_interrupt(void *arg)
664 static const struct error_phase {
665 enum { GOOD = 0, BAD = -EIO } error[2];
670 { { GOOD, GOOD } }, /* sentinel */
672 struct intel_gt *gt = arg;
673 struct intel_engine_cs *engine;
674 enum intel_engine_id id;
677 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
678 * of invalid commands in user batches that will cause a GPU hang.
679 * This is a faster mechanism than using hangcheck/heartbeats, but
680 * only detects problems the HW knows about -- it will not warn when
683 * To verify our detection and reset, we throw some invalid commands
684 * at the HW and wait for the interrupt.
687 if (!intel_has_reset_engine(gt))
690 for_each_engine(engine, gt, id) {
691 const struct error_phase *p;
694 st_engine_heartbeat_disable(engine);
696 for (p = phases; p->error[0] != GOOD; p++) {
697 struct i915_request *client[ARRAY_SIZE(phases->error)];
701 memset(client, 0, sizeof(*client));
702 for (i = 0; i < ARRAY_SIZE(client); i++) {
703 struct intel_context *ce;
704 struct i915_request *rq;
706 ce = intel_context_create(engine);
712 rq = intel_context_create_request(ce);
713 intel_context_put(ce);
719 if (rq->engine->emit_init_breadcrumb) {
720 err = rq->engine->emit_init_breadcrumb(rq);
722 i915_request_add(rq);
727 cs = intel_ring_begin(rq, 2);
729 i915_request_add(rq);
742 client[i] = i915_request_get(rq);
743 i915_request_add(rq);
746 err = wait_for_submit(engine, client[0], HZ / 2);
748 pr_err("%s: first request did not start within time!\n",
754 for (i = 0; i < ARRAY_SIZE(client); i++) {
755 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
756 pr_debug("%s: %s request incomplete!\n",
758 error_repr(p->error[i]));
760 if (!i915_request_started(client[i])) {
761 pr_err("%s: %s request not started!\n",
763 error_repr(p->error[i]));
768 /* Kick the tasklet to process the error */
769 intel_engine_flush_submission(engine);
770 if (client[i]->fence.error != p->error[i]) {
771 pr_err("%s: %s request (%s) with wrong error code: %d\n",
773 error_repr(p->error[i]),
774 i915_request_completed(client[i]) ? "completed" : "running",
775 client[i]->fence.error);
782 for (i = 0; i < ARRAY_SIZE(client); i++)
784 i915_request_put(client[i]);
786 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
787 engine->name, p - phases,
788 p->error[0], p->error[1]);
793 st_engine_heartbeat_enable(engine);
795 intel_gt_set_wedged(gt);
804 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
808 cs = intel_ring_begin(rq, 10);
812 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
814 *cs++ = MI_SEMAPHORE_WAIT |
815 MI_SEMAPHORE_GLOBAL_GTT |
817 MI_SEMAPHORE_SAD_NEQ_SDD;
819 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
823 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
824 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
834 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
836 intel_ring_advance(rq, cs);
840 static struct i915_request *
841 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
843 struct intel_context *ce;
844 struct i915_request *rq;
847 ce = intel_context_create(engine);
851 rq = intel_context_create_request(ce);
856 if (rq->engine->emit_init_breadcrumb)
857 err = rq->engine->emit_init_breadcrumb(rq);
859 err = emit_semaphore_chain(rq, vma, idx);
861 i915_request_get(rq);
862 i915_request_add(rq);
867 intel_context_put(ce);
872 release_queue(struct intel_engine_cs *engine,
873 struct i915_vma *vma,
876 struct i915_sched_attr attr = {
879 struct i915_request *rq;
882 rq = intel_engine_create_kernel_request(engine);
886 cs = intel_ring_begin(rq, 4);
888 i915_request_add(rq);
892 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
893 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
897 intel_ring_advance(rq, cs);
899 i915_request_get(rq);
900 i915_request_add(rq);
903 engine->schedule(rq, &attr);
904 local_bh_enable(); /* kick tasklet */
906 i915_request_put(rq);
912 slice_semaphore_queue(struct intel_engine_cs *outer,
913 struct i915_vma *vma,
916 struct intel_engine_cs *engine;
917 struct i915_request *head;
918 enum intel_engine_id id;
921 head = semaphore_queue(outer, vma, n++);
923 return PTR_ERR(head);
925 for_each_engine(engine, outer->gt, id) {
926 if (!intel_engine_has_preemption(engine))
929 for (i = 0; i < count; i++) {
930 struct i915_request *rq;
932 rq = semaphore_queue(engine, vma, n++);
938 i915_request_put(rq);
942 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
946 if (i915_request_wait(head, 0,
947 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
948 pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
949 outer->name, count, n);
951 intel_gt_set_wedged(outer->gt);
956 i915_request_put(head);
960 static int live_timeslice_preempt(void *arg)
962 struct intel_gt *gt = arg;
963 struct drm_i915_gem_object *obj;
964 struct intel_engine_cs *engine;
965 enum intel_engine_id id;
966 struct i915_vma *vma;
971 * If a request takes too long, we would like to give other users
972 * a fair go on the GPU. In particular, users may create batches
973 * that wait upon external input, where that input may even be
974 * supplied by another GPU job. To avoid blocking forever, we
975 * need to preempt the current task and replace it with another
978 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
981 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
985 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
991 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
993 err = PTR_ERR(vaddr);
997 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1001 err = i915_vma_sync(vma);
1005 for_each_engine(engine, gt, id) {
1006 if (!intel_engine_has_preemption(engine))
1009 memset(vaddr, 0, PAGE_SIZE);
1011 st_engine_heartbeat_disable(engine);
1012 err = slice_semaphore_queue(engine, vma, 5);
1013 st_engine_heartbeat_enable(engine);
1017 if (igt_flush_test(gt->i915)) {
1024 i915_vma_unpin(vma);
1026 i915_gem_object_unpin_map(obj);
1028 i915_gem_object_put(obj);
1032 static struct i915_request *
1033 create_rewinder(struct intel_context *ce,
1034 struct i915_request *wait,
1035 void *slot, int idx)
1038 i915_ggtt_offset(ce->engine->status_page.vma) +
1039 offset_in_page(slot);
1040 struct i915_request *rq;
1044 rq = intel_context_create_request(ce);
1049 err = i915_request_await_dma_fence(rq, &wait->fence);
1054 cs = intel_ring_begin(rq, 14);
1060 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1063 *cs++ = MI_SEMAPHORE_WAIT |
1064 MI_SEMAPHORE_GLOBAL_GTT |
1066 MI_SEMAPHORE_SAD_GTE_SDD;
1071 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1072 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1073 *cs++ = offset + idx * sizeof(u32);
1076 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1081 intel_ring_advance(rq, cs);
1085 i915_request_get(rq);
1086 i915_request_add(rq);
1088 i915_request_put(rq);
1089 return ERR_PTR(err);
1095 static int live_timeslice_rewind(void *arg)
1097 struct intel_gt *gt = arg;
1098 struct intel_engine_cs *engine;
1099 enum intel_engine_id id;
1102 * The usual presumption on timeslice expiration is that we replace
1103 * the active context with another. However, given a chain of
1104 * dependencies we may end up with replacing the context with itself,
1105 * but only a few of those requests, forcing us to rewind the
1106 * RING_TAIL of the original request.
1108 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1111 for_each_engine(engine, gt, id) {
1112 enum { A1, A2, B1 };
1113 enum { X = 1, Z, Y };
1114 struct i915_request *rq[3] = {};
1115 struct intel_context *ce;
1116 unsigned long timeslice;
1120 if (!intel_engine_has_timeslices(engine))
1124 * A:rq1 -- semaphore wait, timestamp X
1125 * A:rq2 -- write timestamp Y
1127 * B:rq1 [await A:rq1] -- write timestamp Z
1129 * Force timeslice, release semaphore.
1131 * Expect execution/evaluation order XZY
1134 st_engine_heartbeat_disable(engine);
1135 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1137 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1139 ce = intel_context_create(engine);
1145 rq[A1] = create_rewinder(ce, NULL, slot, X);
1146 if (IS_ERR(rq[A1])) {
1147 intel_context_put(ce);
1151 rq[A2] = create_rewinder(ce, NULL, slot, Y);
1152 intel_context_put(ce);
1156 err = wait_for_submit(engine, rq[A2], HZ / 2);
1158 pr_err("%s: failed to submit first context\n",
1163 ce = intel_context_create(engine);
1169 rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1170 intel_context_put(ce);
1174 err = wait_for_submit(engine, rq[B1], HZ / 2);
1176 pr_err("%s: failed to submit second context\n",
1181 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1182 ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1183 while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1184 /* Wait for the timeslice to kick in */
1185 del_timer(&engine->execlists.timer);
1186 tasklet_hi_schedule(&engine->execlists.tasklet);
1187 intel_engine_flush_submission(engine);
1189 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1190 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1191 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1192 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1194 /* Release the hounds! */
1196 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1198 for (i = 1; i <= 3; i++) {
1199 unsigned long timeout = jiffies + HZ / 2;
1201 while (!READ_ONCE(slot[i]) &&
1202 time_before(jiffies, timeout))
1205 if (!time_before(jiffies, timeout)) {
1206 pr_err("%s: rq[%d] timed out\n",
1207 engine->name, i - 1);
1212 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1216 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1217 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1225 memset32(&slot[0], -1, 4);
1228 engine->props.timeslice_duration_ms = timeslice;
1229 st_engine_heartbeat_enable(engine);
1230 for (i = 0; i < 3; i++)
1231 i915_request_put(rq[i]);
1232 if (igt_flush_test(gt->i915))
1241 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1243 struct i915_request *rq;
1245 rq = intel_engine_create_kernel_request(engine);
1249 i915_request_get(rq);
1250 i915_request_add(rq);
1255 static long slice_timeout(struct intel_engine_cs *engine)
1259 /* Enough time for a timeslice to kick in, and kick out */
1260 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1262 /* Enough time for the nop request to complete */
1268 static int live_timeslice_queue(void *arg)
1270 struct intel_gt *gt = arg;
1271 struct drm_i915_gem_object *obj;
1272 struct intel_engine_cs *engine;
1273 enum intel_engine_id id;
1274 struct i915_vma *vma;
1279 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1280 * timeslicing between them disabled, we *do* enable timeslicing
1281 * if the queue demands it. (Normally, we do not submit if
1282 * ELSP[1] is already occupied, so must rely on timeslicing to
1283 * eject ELSP[0] in favour of the queue.)
1285 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1288 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1290 return PTR_ERR(obj);
1292 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1298 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1299 if (IS_ERR(vaddr)) {
1300 err = PTR_ERR(vaddr);
1304 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1308 err = i915_vma_sync(vma);
1312 for_each_engine(engine, gt, id) {
1313 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1314 struct i915_request *rq, *nop;
1316 if (!intel_engine_has_preemption(engine))
1319 st_engine_heartbeat_disable(engine);
1320 memset(vaddr, 0, PAGE_SIZE);
1322 /* ELSP[0]: semaphore wait */
1323 rq = semaphore_queue(engine, vma, 0);
1328 engine->schedule(rq, &attr);
1329 err = wait_for_submit(engine, rq, HZ / 2);
1331 pr_err("%s: Timed out trying to submit semaphores\n",
1336 /* ELSP[1]: nop request */
1337 nop = nop_request(engine);
1342 err = wait_for_submit(engine, nop, HZ / 2);
1343 i915_request_put(nop);
1345 pr_err("%s: Timed out trying to submit nop\n",
1350 GEM_BUG_ON(i915_request_completed(rq));
1351 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1353 /* Queue: semaphore signal, matching priority as semaphore */
1354 err = release_queue(engine, vma, 1, effective_prio(rq));
1358 /* Wait until we ack the release_queue and start timeslicing */
1361 intel_engine_flush_submission(engine);
1362 } while (READ_ONCE(engine->execlists.pending[0]));
1364 /* Timeslice every jiffy, so within 2 we should signal */
1365 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1366 struct drm_printer p =
1367 drm_info_printer(gt->i915->drm.dev);
1369 pr_err("%s: Failed to timeslice into queue\n",
1371 intel_engine_dump(engine, &p,
1372 "%s\n", engine->name);
1374 memset(vaddr, 0xff, PAGE_SIZE);
1378 i915_request_put(rq);
1380 st_engine_heartbeat_enable(engine);
1386 i915_vma_unpin(vma);
1388 i915_gem_object_unpin_map(obj);
1390 i915_gem_object_put(obj);
1394 static int live_timeslice_nopreempt(void *arg)
1396 struct intel_gt *gt = arg;
1397 struct intel_engine_cs *engine;
1398 enum intel_engine_id id;
1399 struct igt_spinner spin;
1403 * We should not timeslice into a request that is marked with
1404 * I915_REQUEST_NOPREEMPT.
1406 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1409 if (igt_spinner_init(&spin, gt))
1412 for_each_engine(engine, gt, id) {
1413 struct intel_context *ce;
1414 struct i915_request *rq;
1415 unsigned long timeslice;
1417 if (!intel_engine_has_preemption(engine))
1420 ce = intel_context_create(engine);
1426 st_engine_heartbeat_disable(engine);
1427 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1429 /* Create an unpreemptible spinner */
1431 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1432 intel_context_put(ce);
1438 i915_request_get(rq);
1439 i915_request_add(rq);
1441 if (!igt_wait_for_spinner(&spin, rq)) {
1442 i915_request_put(rq);
1447 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1448 i915_request_put(rq);
1450 /* Followed by a maximum priority barrier (heartbeat) */
1452 ce = intel_context_create(engine);
1458 rq = intel_context_create_request(ce);
1459 intel_context_put(ce);
1465 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1466 i915_request_get(rq);
1467 i915_request_add(rq);
1470 * Wait until the barrier is in ELSP, and we know timeslicing
1471 * will have been activated.
1473 if (wait_for_submit(engine, rq, HZ / 2)) {
1474 i915_request_put(rq);
1480 * Since the ELSP[0] request is unpreemptible, it should not
1481 * allow the maximum priority barrier through. Wait long
1482 * enough to see if it is timesliced in by mistake.
1484 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1485 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1489 i915_request_put(rq);
1492 igt_spinner_end(&spin);
1494 xchg(&engine->props.timeslice_duration_ms, timeslice);
1495 st_engine_heartbeat_enable(engine);
1499 if (igt_flush_test(gt->i915)) {
1505 igt_spinner_fini(&spin);
1509 static int live_busywait_preempt(void *arg)
1511 struct intel_gt *gt = arg;
1512 struct i915_gem_context *ctx_hi, *ctx_lo;
1513 struct intel_engine_cs *engine;
1514 struct drm_i915_gem_object *obj;
1515 struct i915_vma *vma;
1516 enum intel_engine_id id;
1521 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1522 * preempt the busywaits used to synchronise between rings.
1525 ctx_hi = kernel_context(gt->i915);
1528 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1530 ctx_lo = kernel_context(gt->i915);
1533 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1535 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1541 map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1547 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1553 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1557 err = i915_vma_sync(vma);
1561 for_each_engine(engine, gt, id) {
1562 struct i915_request *lo, *hi;
1563 struct igt_live_test t;
1566 if (!intel_engine_has_preemption(engine))
1569 if (!intel_engine_can_store_dword(engine))
1572 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1578 * We create two requests. The low priority request
1579 * busywaits on a semaphore (inside the ringbuffer where
1580 * is should be preemptible) and the high priority requests
1581 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1582 * allowing the first request to complete. If preemption
1583 * fails, we hang instead.
1586 lo = igt_request_alloc(ctx_lo, engine);
1592 cs = intel_ring_begin(lo, 8);
1595 i915_request_add(lo);
1599 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1600 *cs++ = i915_ggtt_offset(vma);
1604 /* XXX Do we need a flush + invalidate here? */
1606 *cs++ = MI_SEMAPHORE_WAIT |
1607 MI_SEMAPHORE_GLOBAL_GTT |
1609 MI_SEMAPHORE_SAD_EQ_SDD;
1611 *cs++ = i915_ggtt_offset(vma);
1614 intel_ring_advance(lo, cs);
1616 i915_request_get(lo);
1617 i915_request_add(lo);
1619 if (wait_for(READ_ONCE(*map), 10)) {
1620 i915_request_put(lo);
1625 /* Low priority request should be busywaiting now */
1626 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1627 i915_request_put(lo);
1628 pr_err("%s: Busywaiting request did not!\n",
1634 hi = igt_request_alloc(ctx_hi, engine);
1637 i915_request_put(lo);
1641 cs = intel_ring_begin(hi, 4);
1644 i915_request_add(hi);
1645 i915_request_put(lo);
1649 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1650 *cs++ = i915_ggtt_offset(vma);
1654 intel_ring_advance(hi, cs);
1655 i915_request_add(hi);
1657 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1658 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1660 pr_err("%s: Failed to preempt semaphore busywait!\n",
1663 intel_engine_dump(engine, &p, "%s\n", engine->name);
1666 i915_request_put(lo);
1667 intel_gt_set_wedged(gt);
1671 GEM_BUG_ON(READ_ONCE(*map));
1672 i915_request_put(lo);
1674 if (igt_live_test_end(&t)) {
1682 i915_vma_unpin(vma);
1684 i915_gem_object_unpin_map(obj);
1686 i915_gem_object_put(obj);
1688 kernel_context_close(ctx_lo);
1690 kernel_context_close(ctx_hi);
1694 static struct i915_request *
1695 spinner_create_request(struct igt_spinner *spin,
1696 struct i915_gem_context *ctx,
1697 struct intel_engine_cs *engine,
1700 struct intel_context *ce;
1701 struct i915_request *rq;
1703 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1705 return ERR_CAST(ce);
1707 rq = igt_spinner_create_request(spin, ce, arb);
1708 intel_context_put(ce);
1712 static int live_preempt(void *arg)
1714 struct intel_gt *gt = arg;
1715 struct i915_gem_context *ctx_hi, *ctx_lo;
1716 struct igt_spinner spin_hi, spin_lo;
1717 struct intel_engine_cs *engine;
1718 enum intel_engine_id id;
1721 if (igt_spinner_init(&spin_hi, gt))
1724 if (igt_spinner_init(&spin_lo, gt))
1727 ctx_hi = kernel_context(gt->i915);
1730 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1732 ctx_lo = kernel_context(gt->i915);
1735 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1737 for_each_engine(engine, gt, id) {
1738 struct igt_live_test t;
1739 struct i915_request *rq;
1741 if (!intel_engine_has_preemption(engine))
1744 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1749 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1756 i915_request_add(rq);
1757 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1758 GEM_TRACE("lo spinner failed to start\n");
1760 intel_gt_set_wedged(gt);
1765 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1768 igt_spinner_end(&spin_lo);
1773 i915_request_add(rq);
1774 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1775 GEM_TRACE("hi spinner failed to start\n");
1777 intel_gt_set_wedged(gt);
1782 igt_spinner_end(&spin_hi);
1783 igt_spinner_end(&spin_lo);
1785 if (igt_live_test_end(&t)) {
1793 kernel_context_close(ctx_lo);
1795 kernel_context_close(ctx_hi);
1797 igt_spinner_fini(&spin_lo);
1799 igt_spinner_fini(&spin_hi);
1803 static int live_late_preempt(void *arg)
1805 struct intel_gt *gt = arg;
1806 struct i915_gem_context *ctx_hi, *ctx_lo;
1807 struct igt_spinner spin_hi, spin_lo;
1808 struct intel_engine_cs *engine;
1809 struct i915_sched_attr attr = {};
1810 enum intel_engine_id id;
1813 if (igt_spinner_init(&spin_hi, gt))
1816 if (igt_spinner_init(&spin_lo, gt))
1819 ctx_hi = kernel_context(gt->i915);
1823 ctx_lo = kernel_context(gt->i915);
1827 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1828 ctx_lo->sched.priority = 1;
1830 for_each_engine(engine, gt, id) {
1831 struct igt_live_test t;
1832 struct i915_request *rq;
1834 if (!intel_engine_has_preemption(engine))
1837 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1842 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1849 i915_request_add(rq);
1850 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1851 pr_err("First context failed to start\n");
1855 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1858 igt_spinner_end(&spin_lo);
1863 i915_request_add(rq);
1864 if (igt_wait_for_spinner(&spin_hi, rq)) {
1865 pr_err("Second context overtook first?\n");
1869 attr.priority = I915_PRIORITY_MAX;
1870 engine->schedule(rq, &attr);
1872 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1873 pr_err("High priority context failed to preempt the low priority context\n");
1878 igt_spinner_end(&spin_hi);
1879 igt_spinner_end(&spin_lo);
1881 if (igt_live_test_end(&t)) {
1889 kernel_context_close(ctx_lo);
1891 kernel_context_close(ctx_hi);
1893 igt_spinner_fini(&spin_lo);
1895 igt_spinner_fini(&spin_hi);
1899 igt_spinner_end(&spin_hi);
1900 igt_spinner_end(&spin_lo);
1901 intel_gt_set_wedged(gt);
1906 struct preempt_client {
1907 struct igt_spinner spin;
1908 struct i915_gem_context *ctx;
1911 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1913 c->ctx = kernel_context(gt->i915);
1917 if (igt_spinner_init(&c->spin, gt))
1923 kernel_context_close(c->ctx);
1927 static void preempt_client_fini(struct preempt_client *c)
1929 igt_spinner_fini(&c->spin);
1930 kernel_context_close(c->ctx);
1933 static int live_nopreempt(void *arg)
1935 struct intel_gt *gt = arg;
1936 struct intel_engine_cs *engine;
1937 struct preempt_client a, b;
1938 enum intel_engine_id id;
1942 * Verify that we can disable preemption for an individual request
1943 * that may be being observed and not want to be interrupted.
1946 if (preempt_client_init(gt, &a))
1948 if (preempt_client_init(gt, &b))
1950 b.ctx->sched.priority = I915_PRIORITY_MAX;
1952 for_each_engine(engine, gt, id) {
1953 struct i915_request *rq_a, *rq_b;
1955 if (!intel_engine_has_preemption(engine))
1958 engine->execlists.preempt_hang.count = 0;
1960 rq_a = spinner_create_request(&a.spin,
1964 err = PTR_ERR(rq_a);
1968 /* Low priority client, but unpreemptable! */
1969 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1971 i915_request_add(rq_a);
1972 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1973 pr_err("First client failed to start\n");
1977 rq_b = spinner_create_request(&b.spin,
1981 err = PTR_ERR(rq_b);
1985 i915_request_add(rq_b);
1987 /* B is much more important than A! (But A is unpreemptable.) */
1988 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1990 /* Wait long enough for preemption and timeslicing */
1991 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1992 pr_err("Second client started too early!\n");
1996 igt_spinner_end(&a.spin);
1998 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1999 pr_err("Second client failed to start\n");
2003 igt_spinner_end(&b.spin);
2005 if (engine->execlists.preempt_hang.count) {
2006 pr_err("Preemption recorded x%d; should have been suppressed!\n",
2007 engine->execlists.preempt_hang.count);
2012 if (igt_flush_test(gt->i915))
2018 preempt_client_fini(&b);
2020 preempt_client_fini(&a);
2024 igt_spinner_end(&b.spin);
2025 igt_spinner_end(&a.spin);
2026 intel_gt_set_wedged(gt);
2031 struct live_preempt_cancel {
2032 struct intel_engine_cs *engine;
2033 struct preempt_client a, b;
2036 static int __cancel_active0(struct live_preempt_cancel *arg)
2038 struct i915_request *rq;
2039 struct igt_live_test t;
2042 /* Preempt cancel of ELSP0 */
2043 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2044 if (igt_live_test_begin(&t, arg->engine->i915,
2045 __func__, arg->engine->name))
2048 rq = spinner_create_request(&arg->a.spin,
2049 arg->a.ctx, arg->engine,
2054 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2055 i915_request_get(rq);
2056 i915_request_add(rq);
2057 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2062 intel_context_set_banned(rq->context);
2063 err = intel_engine_pulse(arg->engine);
2067 err = wait_for_reset(arg->engine, rq, HZ / 2);
2069 pr_err("Cancelled inflight0 request did not reset\n");
2074 i915_request_put(rq);
2075 if (igt_live_test_end(&t))
2080 static int __cancel_active1(struct live_preempt_cancel *arg)
2082 struct i915_request *rq[2] = {};
2083 struct igt_live_test t;
2086 /* Preempt cancel of ELSP1 */
2087 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2088 if (igt_live_test_begin(&t, arg->engine->i915,
2089 __func__, arg->engine->name))
2092 rq[0] = spinner_create_request(&arg->a.spin,
2093 arg->a.ctx, arg->engine,
2094 MI_NOOP); /* no preemption */
2096 return PTR_ERR(rq[0]);
2098 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2099 i915_request_get(rq[0]);
2100 i915_request_add(rq[0]);
2101 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2106 rq[1] = spinner_create_request(&arg->b.spin,
2107 arg->b.ctx, arg->engine,
2109 if (IS_ERR(rq[1])) {
2110 err = PTR_ERR(rq[1]);
2114 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2115 i915_request_get(rq[1]);
2116 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2117 i915_request_add(rq[1]);
2121 intel_context_set_banned(rq[1]->context);
2122 err = intel_engine_pulse(arg->engine);
2126 igt_spinner_end(&arg->a.spin);
2127 err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2131 if (rq[0]->fence.error != 0) {
2132 pr_err("Normal inflight0 request did not complete\n");
2137 if (rq[1]->fence.error != -EIO) {
2138 pr_err("Cancelled inflight1 request did not report -EIO\n");
2144 i915_request_put(rq[1]);
2145 i915_request_put(rq[0]);
2146 if (igt_live_test_end(&t))
2151 static int __cancel_queued(struct live_preempt_cancel *arg)
2153 struct i915_request *rq[3] = {};
2154 struct igt_live_test t;
2157 /* Full ELSP and one in the wings */
2158 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2159 if (igt_live_test_begin(&t, arg->engine->i915,
2160 __func__, arg->engine->name))
2163 rq[0] = spinner_create_request(&arg->a.spin,
2164 arg->a.ctx, arg->engine,
2167 return PTR_ERR(rq[0]);
2169 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2170 i915_request_get(rq[0]);
2171 i915_request_add(rq[0]);
2172 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2177 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2178 if (IS_ERR(rq[1])) {
2179 err = PTR_ERR(rq[1]);
2183 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2184 i915_request_get(rq[1]);
2185 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2186 i915_request_add(rq[1]);
2190 rq[2] = spinner_create_request(&arg->b.spin,
2191 arg->a.ctx, arg->engine,
2193 if (IS_ERR(rq[2])) {
2194 err = PTR_ERR(rq[2]);
2198 i915_request_get(rq[2]);
2199 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2200 i915_request_add(rq[2]);
2204 intel_context_set_banned(rq[2]->context);
2205 err = intel_engine_pulse(arg->engine);
2209 err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2213 if (rq[0]->fence.error != -EIO) {
2214 pr_err("Cancelled inflight0 request did not report -EIO\n");
2219 if (rq[1]->fence.error != 0) {
2220 pr_err("Normal inflight1 request did not complete\n");
2225 if (rq[2]->fence.error != -EIO) {
2226 pr_err("Cancelled queued request did not report -EIO\n");
2232 i915_request_put(rq[2]);
2233 i915_request_put(rq[1]);
2234 i915_request_put(rq[0]);
2235 if (igt_live_test_end(&t))
2240 static int __cancel_hostile(struct live_preempt_cancel *arg)
2242 struct i915_request *rq;
2245 /* Preempt cancel non-preemptible spinner in ELSP0 */
2246 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2249 if (!intel_has_reset_engine(arg->engine->gt))
2252 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2253 rq = spinner_create_request(&arg->a.spin,
2254 arg->a.ctx, arg->engine,
2255 MI_NOOP); /* preemption disabled */
2259 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2260 i915_request_get(rq);
2261 i915_request_add(rq);
2262 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2267 intel_context_set_banned(rq->context);
2268 err = intel_engine_pulse(arg->engine); /* force reset */
2272 err = wait_for_reset(arg->engine, rq, HZ / 2);
2274 pr_err("Cancelled inflight0 request did not reset\n");
2279 i915_request_put(rq);
2280 if (igt_flush_test(arg->engine->i915))
2285 static void force_reset_timeout(struct intel_engine_cs *engine)
2287 engine->reset_timeout.probability = 999;
2288 atomic_set(&engine->reset_timeout.times, -1);
2291 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2293 memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2296 static int __cancel_fail(struct live_preempt_cancel *arg)
2298 struct intel_engine_cs *engine = arg->engine;
2299 struct i915_request *rq;
2302 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2305 if (!intel_has_reset_engine(engine->gt))
2308 GEM_TRACE("%s(%s)\n", __func__, engine->name);
2309 rq = spinner_create_request(&arg->a.spin,
2311 MI_NOOP); /* preemption disabled */
2315 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2316 i915_request_get(rq);
2317 i915_request_add(rq);
2318 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2323 intel_context_set_banned(rq->context);
2325 err = intel_engine_pulse(engine);
2329 force_reset_timeout(engine);
2331 /* force preempt reset [failure] */
2332 while (!engine->execlists.pending[0])
2333 intel_engine_flush_submission(engine);
2334 del_timer_sync(&engine->execlists.preempt);
2335 intel_engine_flush_submission(engine);
2337 cancel_reset_timeout(engine);
2339 /* after failure, require heartbeats to reset device */
2340 intel_engine_set_heartbeat(engine, 1);
2341 err = wait_for_reset(engine, rq, HZ / 2);
2342 intel_engine_set_heartbeat(engine,
2343 engine->defaults.heartbeat_interval_ms);
2345 pr_err("Cancelled inflight0 request did not reset\n");
2350 i915_request_put(rq);
2351 if (igt_flush_test(engine->i915))
2356 static int live_preempt_cancel(void *arg)
2358 struct intel_gt *gt = arg;
2359 struct live_preempt_cancel data;
2360 enum intel_engine_id id;
2364 * To cancel an inflight context, we need to first remove it from the
2365 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2368 if (preempt_client_init(gt, &data.a))
2370 if (preempt_client_init(gt, &data.b))
2373 for_each_engine(data.engine, gt, id) {
2374 if (!intel_engine_has_preemption(data.engine))
2377 err = __cancel_active0(&data);
2381 err = __cancel_active1(&data);
2385 err = __cancel_queued(&data);
2389 err = __cancel_hostile(&data);
2393 err = __cancel_fail(&data);
2400 preempt_client_fini(&data.b);
2402 preempt_client_fini(&data.a);
2407 igt_spinner_end(&data.b.spin);
2408 igt_spinner_end(&data.a.spin);
2409 intel_gt_set_wedged(gt);
2413 static int live_suppress_self_preempt(void *arg)
2415 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2416 struct intel_gt *gt = arg;
2417 struct intel_engine_cs *engine;
2418 struct preempt_client a, b;
2419 enum intel_engine_id id;
2423 * Verify that if a preemption request does not cause a change in
2424 * the current execution order, the preempt-to-idle injection is
2425 * skipped and that we do not accidentally apply it after the CS
2429 if (intel_uc_uses_guc_submission(>->uc))
2430 return 0; /* presume black blox */
2432 if (intel_vgpu_active(gt->i915))
2433 return 0; /* GVT forces single port & request submission */
2435 if (preempt_client_init(gt, &a))
2437 if (preempt_client_init(gt, &b))
2440 for_each_engine(engine, gt, id) {
2441 struct i915_request *rq_a, *rq_b;
2444 if (!intel_engine_has_preemption(engine))
2447 if (igt_flush_test(gt->i915))
2450 st_engine_heartbeat_disable(engine);
2451 engine->execlists.preempt_hang.count = 0;
2453 rq_a = spinner_create_request(&a.spin,
2457 err = PTR_ERR(rq_a);
2458 st_engine_heartbeat_enable(engine);
2462 i915_request_add(rq_a);
2463 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2464 pr_err("First client failed to start\n");
2465 st_engine_heartbeat_enable(engine);
2469 /* Keep postponing the timer to avoid premature slicing */
2470 mod_timer(&engine->execlists.timer, jiffies + HZ);
2471 for (depth = 0; depth < 8; depth++) {
2472 rq_b = spinner_create_request(&b.spin,
2476 err = PTR_ERR(rq_b);
2477 st_engine_heartbeat_enable(engine);
2480 i915_request_add(rq_b);
2482 GEM_BUG_ON(i915_request_completed(rq_a));
2483 engine->schedule(rq_a, &attr);
2484 igt_spinner_end(&a.spin);
2486 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2487 pr_err("Second client failed to start\n");
2488 st_engine_heartbeat_enable(engine);
2495 igt_spinner_end(&a.spin);
2497 if (engine->execlists.preempt_hang.count) {
2498 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2500 engine->execlists.preempt_hang.count,
2502 st_engine_heartbeat_enable(engine);
2507 st_engine_heartbeat_enable(engine);
2508 if (igt_flush_test(gt->i915))
2514 preempt_client_fini(&b);
2516 preempt_client_fini(&a);
2520 igt_spinner_end(&b.spin);
2521 igt_spinner_end(&a.spin);
2522 intel_gt_set_wedged(gt);
2527 static int live_chain_preempt(void *arg)
2529 struct intel_gt *gt = arg;
2530 struct intel_engine_cs *engine;
2531 struct preempt_client hi, lo;
2532 enum intel_engine_id id;
2536 * Build a chain AB...BA between two contexts (A, B) and request
2537 * preemption of the last request. It should then complete before
2538 * the previously submitted spinner in B.
2541 if (preempt_client_init(gt, &hi))
2544 if (preempt_client_init(gt, &lo))
2547 for_each_engine(engine, gt, id) {
2548 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2549 struct igt_live_test t;
2550 struct i915_request *rq;
2551 int ring_size, count, i;
2553 if (!intel_engine_has_preemption(engine))
2556 rq = spinner_create_request(&lo.spin,
2562 i915_request_get(rq);
2563 i915_request_add(rq);
2565 ring_size = rq->wa_tail - rq->head;
2567 ring_size += rq->ring->size;
2568 ring_size = rq->ring->size / ring_size;
2569 pr_debug("%s(%s): Using maximum of %d requests\n",
2570 __func__, engine->name, ring_size);
2572 igt_spinner_end(&lo.spin);
2573 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2574 pr_err("Timed out waiting to flush %s\n", engine->name);
2575 i915_request_put(rq);
2578 i915_request_put(rq);
2580 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2585 for_each_prime_number_from(count, 1, ring_size) {
2586 rq = spinner_create_request(&hi.spin,
2591 i915_request_add(rq);
2592 if (!igt_wait_for_spinner(&hi.spin, rq))
2595 rq = spinner_create_request(&lo.spin,
2600 i915_request_add(rq);
2602 for (i = 0; i < count; i++) {
2603 rq = igt_request_alloc(lo.ctx, engine);
2606 i915_request_add(rq);
2609 rq = igt_request_alloc(hi.ctx, engine);
2613 i915_request_get(rq);
2614 i915_request_add(rq);
2615 engine->schedule(rq, &attr);
2617 igt_spinner_end(&hi.spin);
2618 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2619 struct drm_printer p =
2620 drm_info_printer(gt->i915->drm.dev);
2622 pr_err("Failed to preempt over chain of %d\n",
2624 intel_engine_dump(engine, &p,
2625 "%s\n", engine->name);
2626 i915_request_put(rq);
2629 igt_spinner_end(&lo.spin);
2630 i915_request_put(rq);
2632 rq = igt_request_alloc(lo.ctx, engine);
2636 i915_request_get(rq);
2637 i915_request_add(rq);
2639 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2640 struct drm_printer p =
2641 drm_info_printer(gt->i915->drm.dev);
2643 pr_err("Failed to flush low priority chain of %d requests\n",
2645 intel_engine_dump(engine, &p,
2646 "%s\n", engine->name);
2648 i915_request_put(rq);
2651 i915_request_put(rq);
2654 if (igt_live_test_end(&t)) {
2662 preempt_client_fini(&lo);
2664 preempt_client_fini(&hi);
2668 igt_spinner_end(&hi.spin);
2669 igt_spinner_end(&lo.spin);
2670 intel_gt_set_wedged(gt);
2675 static int create_gang(struct intel_engine_cs *engine,
2676 struct i915_request **prev)
2678 struct drm_i915_gem_object *obj;
2679 struct intel_context *ce;
2680 struct i915_request *rq;
2681 struct i915_vma *vma;
2685 ce = intel_context_create(engine);
2689 obj = i915_gem_object_create_internal(engine->i915, 4096);
2695 vma = i915_vma_instance(obj, ce->vm, NULL);
2701 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2705 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2711 /* Semaphore target: spin until zero */
2712 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2714 *cs++ = MI_SEMAPHORE_WAIT |
2716 MI_SEMAPHORE_SAD_EQ_SDD;
2718 *cs++ = lower_32_bits(vma->node.start);
2719 *cs++ = upper_32_bits(vma->node.start);
2722 u64 offset = (*prev)->batch->node.start;
2724 /* Terminate the spinner in the next lower priority batch. */
2725 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2726 *cs++ = lower_32_bits(offset);
2727 *cs++ = upper_32_bits(offset);
2731 *cs++ = MI_BATCH_BUFFER_END;
2732 i915_gem_object_flush_map(obj);
2733 i915_gem_object_unpin_map(obj);
2735 rq = intel_context_create_request(ce);
2741 rq->batch = i915_vma_get(vma);
2742 i915_request_get(rq);
2745 err = i915_request_await_object(rq, vma->obj, false);
2747 err = i915_vma_move_to_active(vma, rq, 0);
2749 err = rq->engine->emit_bb_start(rq,
2752 i915_vma_unlock(vma);
2753 i915_request_add(rq);
2757 i915_gem_object_put(obj);
2758 intel_context_put(ce);
2760 rq->mock.link.next = &(*prev)->mock.link;
2765 i915_vma_put(rq->batch);
2766 i915_request_put(rq);
2768 i915_gem_object_put(obj);
2770 intel_context_put(ce);
2774 static int __live_preempt_ring(struct intel_engine_cs *engine,
2775 struct igt_spinner *spin,
2776 int queue_sz, int ring_sz)
2778 struct intel_context *ce[2] = {};
2779 struct i915_request *rq;
2780 struct igt_live_test t;
2784 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2787 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2788 struct intel_context *tmp;
2790 tmp = intel_context_create(engine);
2796 tmp->ring = __intel_context_ring_size(ring_sz);
2798 err = intel_context_pin(tmp);
2800 intel_context_put(tmp);
2804 memset32(tmp->ring->vaddr,
2805 0xdeadbeef, /* trigger a hang if executed */
2806 tmp->ring->vma->size / sizeof(u32));
2811 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2817 i915_request_get(rq);
2818 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2819 i915_request_add(rq);
2821 if (!igt_wait_for_spinner(spin, rq)) {
2822 intel_gt_set_wedged(engine->gt);
2823 i915_request_put(rq);
2828 /* Fill the ring, until we will cause a wrap */
2830 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2831 struct i915_request *tmp;
2833 tmp = intel_context_create_request(ce[0]);
2836 i915_request_put(rq);
2840 i915_request_add(tmp);
2841 intel_engine_flush_submission(engine);
2844 intel_engine_flush_submission(engine);
2845 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2846 engine->name, queue_sz, n,
2851 i915_request_put(rq);
2853 /* Create a second request to preempt the first ring */
2854 rq = intel_context_create_request(ce[1]);
2860 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2861 i915_request_get(rq);
2862 i915_request_add(rq);
2864 err = wait_for_submit(engine, rq, HZ / 2);
2865 i915_request_put(rq);
2867 pr_err("%s: preemption request was not submitted\n",
2872 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2874 ce[0]->ring->tail, ce[0]->ring->emit,
2875 ce[1]->ring->tail, ce[1]->ring->emit);
2878 intel_engine_flush_submission(engine);
2879 igt_spinner_end(spin);
2880 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2881 if (IS_ERR_OR_NULL(ce[n]))
2884 intel_context_unpin(ce[n]);
2885 intel_context_put(ce[n]);
2887 if (igt_live_test_end(&t))
2892 static int live_preempt_ring(void *arg)
2894 struct intel_gt *gt = arg;
2895 struct intel_engine_cs *engine;
2896 struct igt_spinner spin;
2897 enum intel_engine_id id;
2901 * Check that we rollback large chunks of a ring in order to do a
2902 * preemption event. Similar to live_unlite_ring, but looking at
2903 * ring size rather than the impact of intel_ring_direction().
2906 if (igt_spinner_init(&spin, gt))
2909 for_each_engine(engine, gt, id) {
2912 if (!intel_engine_has_preemption(engine))
2915 if (!intel_engine_can_store_dword(engine))
2918 st_engine_heartbeat_disable(engine);
2920 for (n = 0; n <= 3; n++) {
2921 err = __live_preempt_ring(engine, &spin,
2922 n * SZ_4K / 4, SZ_4K);
2927 st_engine_heartbeat_enable(engine);
2932 igt_spinner_fini(&spin);
2936 static int live_preempt_gang(void *arg)
2938 struct intel_gt *gt = arg;
2939 struct intel_engine_cs *engine;
2940 enum intel_engine_id id;
2943 * Build as long a chain of preempters as we can, with each
2944 * request higher priority than the last. Once we are ready, we release
2945 * the last batch which then precolates down the chain, each releasing
2946 * the next oldest in turn. The intent is to simply push as hard as we
2947 * can with the number of preemptions, trying to exceed narrow HW
2948 * limits. At a minimum, we insist that we can sort all the user
2949 * high priority levels into execution order.
2952 for_each_engine(engine, gt, id) {
2953 struct i915_request *rq = NULL;
2954 struct igt_live_test t;
2955 IGT_TIMEOUT(end_time);
2960 if (!intel_engine_has_preemption(engine))
2963 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2967 struct i915_sched_attr attr = { .priority = prio++ };
2969 err = create_gang(engine, &rq);
2973 /* Submit each spinner at increasing priority */
2974 engine->schedule(rq, &attr);
2975 } while (prio <= I915_PRIORITY_MAX &&
2976 !__igt_timeout(end_time, NULL));
2977 pr_debug("%s: Preempt chain of %d requests\n",
2978 engine->name, prio);
2981 * Such that the last spinner is the highest priority and
2982 * should execute first. When that spinner completes,
2983 * it will terminate the next lowest spinner until there
2984 * are no more spinners and the gang is complete.
2986 cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
2989 i915_gem_object_unpin_map(rq->batch->obj);
2992 intel_gt_set_wedged(gt);
2995 while (rq) { /* wait for each rq from highest to lowest prio */
2996 struct i915_request *n = list_next_entry(rq, mock.link);
2998 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2999 struct drm_printer p =
3000 drm_info_printer(engine->i915->drm.dev);
3002 pr_err("Failed to flush chain of %d requests, at %d\n",
3004 intel_engine_dump(engine, &p,
3005 "%s\n", engine->name);
3010 i915_vma_put(rq->batch);
3011 i915_request_put(rq);
3015 if (igt_live_test_end(&t))
3024 static struct i915_vma *
3025 create_gpr_user(struct intel_engine_cs *engine,
3026 struct i915_vma *result,
3027 unsigned int offset)
3029 struct drm_i915_gem_object *obj;
3030 struct i915_vma *vma;
3035 obj = i915_gem_object_create_internal(engine->i915, 4096);
3037 return ERR_CAST(obj);
3039 vma = i915_vma_instance(obj, result->vm, NULL);
3041 i915_gem_object_put(obj);
3045 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3048 return ERR_PTR(err);
3051 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3054 return ERR_CAST(cs);
3057 /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3058 *cs++ = MI_LOAD_REGISTER_IMM(1);
3059 *cs++ = CS_GPR(engine, 0);
3062 for (i = 1; i < NUM_GPR; i++) {
3068 * As we read and write into the context saved GPR[i], if
3069 * we restart this batch buffer from an earlier point, we
3070 * will repeat the increment and store a value > 1.
3073 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3074 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3075 *cs++ = MI_MATH_ADD;
3076 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3078 addr = result->node.start + offset + i * sizeof(*cs);
3079 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
3080 *cs++ = CS_GPR(engine, 2 * i);
3081 *cs++ = lower_32_bits(addr);
3082 *cs++ = upper_32_bits(addr);
3084 *cs++ = MI_SEMAPHORE_WAIT |
3086 MI_SEMAPHORE_SAD_GTE_SDD;
3088 *cs++ = lower_32_bits(result->node.start);
3089 *cs++ = upper_32_bits(result->node.start);
3092 *cs++ = MI_BATCH_BUFFER_END;
3093 i915_gem_object_flush_map(obj);
3094 i915_gem_object_unpin_map(obj);
3099 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3101 struct drm_i915_gem_object *obj;
3102 struct i915_vma *vma;
3105 obj = i915_gem_object_create_internal(gt->i915, sz);
3107 return ERR_CAST(obj);
3109 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
3111 i915_gem_object_put(obj);
3115 err = i915_ggtt_pin(vma, NULL, 0, 0);
3118 return ERR_PTR(err);
3124 static struct i915_request *
3125 create_gpr_client(struct intel_engine_cs *engine,
3126 struct i915_vma *global,
3127 unsigned int offset)
3129 struct i915_vma *batch, *vma;
3130 struct intel_context *ce;
3131 struct i915_request *rq;
3134 ce = intel_context_create(engine);
3136 return ERR_CAST(ce);
3138 vma = i915_vma_instance(global->obj, ce->vm, NULL);
3144 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3148 batch = create_gpr_user(engine, vma, offset);
3149 if (IS_ERR(batch)) {
3150 err = PTR_ERR(batch);
3154 rq = intel_context_create_request(ce);
3161 err = i915_request_await_object(rq, vma->obj, false);
3163 err = i915_vma_move_to_active(vma, rq, 0);
3164 i915_vma_unlock(vma);
3166 i915_vma_lock(batch);
3168 err = i915_request_await_object(rq, batch->obj, false);
3170 err = i915_vma_move_to_active(batch, rq, 0);
3172 err = rq->engine->emit_bb_start(rq,
3175 i915_vma_unlock(batch);
3176 i915_vma_unpin(batch);
3179 i915_request_get(rq);
3180 i915_request_add(rq);
3183 i915_vma_put(batch);
3185 i915_vma_unpin(vma);
3187 intel_context_put(ce);
3188 return err ? ERR_PTR(err) : rq;
3191 static int preempt_user(struct intel_engine_cs *engine,
3192 struct i915_vma *global,
3195 struct i915_sched_attr attr = {
3196 .priority = I915_PRIORITY_MAX
3198 struct i915_request *rq;
3202 rq = intel_engine_create_kernel_request(engine);
3206 cs = intel_ring_begin(rq, 4);
3208 i915_request_add(rq);
3212 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3213 *cs++ = i915_ggtt_offset(global);
3217 intel_ring_advance(rq, cs);
3219 i915_request_get(rq);
3220 i915_request_add(rq);
3222 engine->schedule(rq, &attr);
3224 if (i915_request_wait(rq, 0, HZ / 2) < 0)
3226 i915_request_put(rq);
3231 static int live_preempt_user(void *arg)
3233 struct intel_gt *gt = arg;
3234 struct intel_engine_cs *engine;
3235 struct i915_vma *global;
3236 enum intel_engine_id id;
3241 * In our other tests, we look at preemption in carefully
3242 * controlled conditions in the ringbuffer. Since most of the
3243 * time is spent in user batches, most of our preemptions naturally
3244 * occur there. We want to verify that when we preempt inside a batch
3245 * we continue on from the current instruction and do not roll back
3246 * to the start, or another earlier arbitration point.
3248 * To verify this, we create a batch which is a mixture of
3249 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3250 * a few preempting contexts thrown into the mix, we look for any
3251 * repeated instructions (which show up as incorrect values).
3254 global = create_global(gt, 4096);
3256 return PTR_ERR(global);
3258 result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3259 if (IS_ERR(result)) {
3260 i915_vma_unpin_and_release(&global, 0);
3261 return PTR_ERR(result);
3264 for_each_engine(engine, gt, id) {
3265 struct i915_request *client[3] = {};
3266 struct igt_live_test t;
3269 if (!intel_engine_has_preemption(engine))
3272 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3273 continue; /* we need per-context GPR */
3275 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3280 memset(result, 0, 4096);
3282 for (i = 0; i < ARRAY_SIZE(client); i++) {
3283 struct i915_request *rq;
3285 rq = create_gpr_client(engine, global,
3286 NUM_GPR * i * sizeof(u32));
3295 /* Continuously preempt the set of 3 running contexts */
3296 for (i = 1; i <= NUM_GPR; i++) {
3297 err = preempt_user(engine, global, i);
3302 if (READ_ONCE(result[0]) != NUM_GPR) {
3303 pr_err("%s: Failed to release semaphore\n",
3309 for (i = 0; i < ARRAY_SIZE(client); i++) {
3312 if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3317 for (gpr = 1; gpr < NUM_GPR; gpr++) {
3318 if (result[NUM_GPR * i + gpr] != 1) {
3319 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3321 i, gpr, result[NUM_GPR * i + gpr]);
3329 for (i = 0; i < ARRAY_SIZE(client); i++) {
3333 i915_request_put(client[i]);
3336 /* Flush the semaphores on error */
3337 smp_store_mb(result[0], -1);
3338 if (igt_live_test_end(&t))
3344 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3348 static int live_preempt_timeout(void *arg)
3350 struct intel_gt *gt = arg;
3351 struct i915_gem_context *ctx_hi, *ctx_lo;
3352 struct igt_spinner spin_lo;
3353 struct intel_engine_cs *engine;
3354 enum intel_engine_id id;
3358 * Check that we force preemption to occur by cancelling the previous
3359 * context if it refuses to yield the GPU.
3361 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3364 if (!intel_has_reset_engine(gt))
3367 if (igt_spinner_init(&spin_lo, gt))
3370 ctx_hi = kernel_context(gt->i915);
3373 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3375 ctx_lo = kernel_context(gt->i915);
3378 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3380 for_each_engine(engine, gt, id) {
3381 unsigned long saved_timeout;
3382 struct i915_request *rq;
3384 if (!intel_engine_has_preemption(engine))
3387 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3388 MI_NOOP); /* preemption disabled */
3394 i915_request_add(rq);
3395 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3396 intel_gt_set_wedged(gt);
3401 rq = igt_request_alloc(ctx_hi, engine);
3403 igt_spinner_end(&spin_lo);
3408 /* Flush the previous CS ack before changing timeouts */
3409 while (READ_ONCE(engine->execlists.pending[0]))
3412 saved_timeout = engine->props.preempt_timeout_ms;
3413 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3415 i915_request_get(rq);
3416 i915_request_add(rq);
3418 intel_engine_flush_submission(engine);
3419 engine->props.preempt_timeout_ms = saved_timeout;
3421 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3422 intel_gt_set_wedged(gt);
3423 i915_request_put(rq);
3428 igt_spinner_end(&spin_lo);
3429 i915_request_put(rq);
3434 kernel_context_close(ctx_lo);
3436 kernel_context_close(ctx_hi);
3438 igt_spinner_fini(&spin_lo);
3442 static int random_range(struct rnd_state *rnd, int min, int max)
3444 return i915_prandom_u32_max_state(max - min, rnd) + min;
3447 static int random_priority(struct rnd_state *rnd)
3449 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3452 struct preempt_smoke {
3453 struct intel_gt *gt;
3454 struct i915_gem_context **contexts;
3455 struct intel_engine_cs *engine;
3456 struct drm_i915_gem_object *batch;
3457 unsigned int ncontext;
3458 struct rnd_state prng;
3459 unsigned long count;
3462 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3464 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3468 static int smoke_submit(struct preempt_smoke *smoke,
3469 struct i915_gem_context *ctx, int prio,
3470 struct drm_i915_gem_object *batch)
3472 struct i915_request *rq;
3473 struct i915_vma *vma = NULL;
3477 struct i915_address_space *vm;
3479 vm = i915_gem_context_get_vm_rcu(ctx);
3480 vma = i915_vma_instance(batch, vm, NULL);
3483 return PTR_ERR(vma);
3485 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3490 ctx->sched.priority = prio;
3492 rq = igt_request_alloc(ctx, smoke->engine);
3500 err = i915_request_await_object(rq, vma->obj, false);
3502 err = i915_vma_move_to_active(vma, rq, 0);
3504 err = rq->engine->emit_bb_start(rq,
3507 i915_vma_unlock(vma);
3510 i915_request_add(rq);
3514 i915_vma_unpin(vma);
3519 static int smoke_crescendo_thread(void *arg)
3521 struct preempt_smoke *smoke = arg;
3522 IGT_TIMEOUT(end_time);
3523 unsigned long count;
3527 struct i915_gem_context *ctx = smoke_context(smoke);
3530 err = smoke_submit(smoke,
3531 ctx, count % I915_PRIORITY_MAX,
3537 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3539 smoke->count = count;
3543 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3544 #define BATCH BIT(0)
3546 struct task_struct *tsk[I915_NUM_ENGINES] = {};
3547 struct preempt_smoke arg[I915_NUM_ENGINES];
3548 struct intel_engine_cs *engine;
3549 enum intel_engine_id id;
3550 unsigned long count;
3553 for_each_engine(engine, smoke->gt, id) {
3555 arg[id].engine = engine;
3556 if (!(flags & BATCH))
3557 arg[id].batch = NULL;
3560 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3561 "igt/smoke:%d", id);
3562 if (IS_ERR(tsk[id])) {
3563 err = PTR_ERR(tsk[id]);
3566 get_task_struct(tsk[id]);
3569 yield(); /* start all threads before we kthread_stop() */
3572 for_each_engine(engine, smoke->gt, id) {
3575 if (IS_ERR_OR_NULL(tsk[id]))
3578 status = kthread_stop(tsk[id]);
3582 count += arg[id].count;
3584 put_task_struct(tsk[id]);
3587 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3588 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3592 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3594 enum intel_engine_id id;
3595 IGT_TIMEOUT(end_time);
3596 unsigned long count;
3600 for_each_engine(smoke->engine, smoke->gt, id) {
3601 struct i915_gem_context *ctx = smoke_context(smoke);
3604 err = smoke_submit(smoke,
3605 ctx, random_priority(&smoke->prng),
3606 flags & BATCH ? smoke->batch : NULL);
3612 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3614 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3615 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3619 static int live_preempt_smoke(void *arg)
3621 struct preempt_smoke smoke = {
3623 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3626 const unsigned int phase[] = { 0, BATCH };
3627 struct igt_live_test t;
3632 smoke.contexts = kmalloc_array(smoke.ncontext,
3633 sizeof(*smoke.contexts),
3635 if (!smoke.contexts)
3639 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3640 if (IS_ERR(smoke.batch)) {
3641 err = PTR_ERR(smoke.batch);
3645 cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3650 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3651 cs[n] = MI_ARB_CHECK;
3652 cs[n] = MI_BATCH_BUFFER_END;
3653 i915_gem_object_flush_map(smoke.batch);
3654 i915_gem_object_unpin_map(smoke.batch);
3656 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3661 for (n = 0; n < smoke.ncontext; n++) {
3662 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3663 if (!smoke.contexts[n])
3667 for (n = 0; n < ARRAY_SIZE(phase); n++) {
3668 err = smoke_crescendo(&smoke, phase[n]);
3672 err = smoke_random(&smoke, phase[n]);
3678 if (igt_live_test_end(&t))
3681 for (n = 0; n < smoke.ncontext; n++) {
3682 if (!smoke.contexts[n])
3684 kernel_context_close(smoke.contexts[n]);
3688 i915_gem_object_put(smoke.batch);
3690 kfree(smoke.contexts);
3695 static int nop_virtual_engine(struct intel_gt *gt,
3696 struct intel_engine_cs **siblings,
3697 unsigned int nsibling,
3700 #define CHAIN BIT(0)
3702 IGT_TIMEOUT(end_time);
3703 struct i915_request *request[16] = {};
3704 struct intel_context *ve[16];
3705 unsigned long n, prime, nc;
3706 struct igt_live_test t;
3707 ktime_t times[2] = {};
3710 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3712 for (n = 0; n < nctx; n++) {
3713 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3714 if (IS_ERR(ve[n])) {
3715 err = PTR_ERR(ve[n]);
3720 err = intel_context_pin(ve[n]);
3722 intel_context_put(ve[n]);
3728 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3732 for_each_prime_number_from(prime, 1, 8192) {
3733 times[1] = ktime_get_raw();
3735 if (flags & CHAIN) {
3736 for (nc = 0; nc < nctx; nc++) {
3737 for (n = 0; n < prime; n++) {
3738 struct i915_request *rq;
3740 rq = i915_request_create(ve[nc]);
3747 i915_request_put(request[nc]);
3748 request[nc] = i915_request_get(rq);
3749 i915_request_add(rq);
3753 for (n = 0; n < prime; n++) {
3754 for (nc = 0; nc < nctx; nc++) {
3755 struct i915_request *rq;
3757 rq = i915_request_create(ve[nc]);
3764 i915_request_put(request[nc]);
3765 request[nc] = i915_request_get(rq);
3766 i915_request_add(rq);
3771 for (nc = 0; nc < nctx; nc++) {
3772 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3773 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3774 __func__, ve[0]->engine->name,
3775 request[nc]->fence.context,
3776 request[nc]->fence.seqno);
3778 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3779 __func__, ve[0]->engine->name,
3780 request[nc]->fence.context,
3781 request[nc]->fence.seqno);
3783 intel_gt_set_wedged(gt);
3788 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3790 times[0] = times[1];
3792 for (nc = 0; nc < nctx; nc++) {
3793 i915_request_put(request[nc]);
3797 if (__igt_timeout(end_time, NULL))
3801 err = igt_live_test_end(&t);
3805 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3806 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3807 prime, div64_u64(ktime_to_ns(times[1]), prime));
3810 if (igt_flush_test(gt->i915))
3813 for (nc = 0; nc < nctx; nc++) {
3814 i915_request_put(request[nc]);
3815 intel_context_unpin(ve[nc]);
3816 intel_context_put(ve[nc]);
3822 __select_siblings(struct intel_gt *gt,
3824 struct intel_engine_cs **siblings,
3825 bool (*filter)(const struct intel_engine_cs *))
3830 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3831 if (!gt->engine_class[class][inst])
3834 if (filter && !filter(gt->engine_class[class][inst]))
3837 siblings[n++] = gt->engine_class[class][inst];
3844 select_siblings(struct intel_gt *gt,
3846 struct intel_engine_cs **siblings)
3848 return __select_siblings(gt, class, siblings, NULL);
3851 static int live_virtual_engine(void *arg)
3853 struct intel_gt *gt = arg;
3854 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3855 struct intel_engine_cs *engine;
3856 enum intel_engine_id id;
3860 if (intel_uc_uses_guc_submission(>->uc))
3863 for_each_engine(engine, gt, id) {
3864 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3866 pr_err("Failed to wrap engine %s: err=%d\n",
3872 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3875 nsibling = select_siblings(gt, class, siblings);
3879 for (n = 1; n <= nsibling + 1; n++) {
3880 err = nop_virtual_engine(gt, siblings, nsibling,
3886 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3894 static int mask_virtual_engine(struct intel_gt *gt,
3895 struct intel_engine_cs **siblings,
3896 unsigned int nsibling)
3898 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3899 struct intel_context *ve;
3900 struct igt_live_test t;
3905 * Check that by setting the execution mask on a request, we can
3906 * restrict it to our desired engine within the virtual engine.
3909 ve = intel_execlists_create_virtual(siblings, nsibling);
3915 err = intel_context_pin(ve);
3919 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3923 for (n = 0; n < nsibling; n++) {
3924 request[n] = i915_request_create(ve);
3925 if (IS_ERR(request[n])) {
3926 err = PTR_ERR(request[n]);
3931 /* Reverse order as it's more likely to be unnatural */
3932 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3934 i915_request_get(request[n]);
3935 i915_request_add(request[n]);
3938 for (n = 0; n < nsibling; n++) {
3939 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3940 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3941 __func__, ve->engine->name,
3942 request[n]->fence.context,
3943 request[n]->fence.seqno);
3945 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3946 __func__, ve->engine->name,
3947 request[n]->fence.context,
3948 request[n]->fence.seqno);
3950 intel_gt_set_wedged(gt);
3955 if (request[n]->engine != siblings[nsibling - n - 1]) {
3956 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3957 request[n]->engine->name,
3958 siblings[nsibling - n - 1]->name);
3964 err = igt_live_test_end(&t);
3966 if (igt_flush_test(gt->i915))
3969 for (n = 0; n < nsibling; n++)
3970 i915_request_put(request[n]);
3973 intel_context_unpin(ve);
3975 intel_context_put(ve);
3980 static int live_virtual_mask(void *arg)
3982 struct intel_gt *gt = arg;
3983 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3987 if (intel_uc_uses_guc_submission(>->uc))
3990 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3991 unsigned int nsibling;
3993 nsibling = select_siblings(gt, class, siblings);
3997 err = mask_virtual_engine(gt, siblings, nsibling);
4005 static int slicein_virtual_engine(struct intel_gt *gt,
4006 struct intel_engine_cs **siblings,
4007 unsigned int nsibling)
4009 const long timeout = slice_timeout(siblings[0]);
4010 struct intel_context *ce;
4011 struct i915_request *rq;
4012 struct igt_spinner spin;
4017 * Virtual requests must take part in timeslicing on the target engines.
4020 if (igt_spinner_init(&spin, gt))
4023 for (n = 0; n < nsibling; n++) {
4024 ce = intel_context_create(siblings[n]);
4030 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4031 intel_context_put(ce);
4037 i915_request_add(rq);
4040 ce = intel_execlists_create_virtual(siblings, nsibling);
4046 rq = intel_context_create_request(ce);
4047 intel_context_put(ce);
4053 i915_request_get(rq);
4054 i915_request_add(rq);
4055 if (i915_request_wait(rq, 0, timeout) < 0) {
4056 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4057 __func__, rq->engine->name);
4059 intel_gt_set_wedged(gt);
4062 i915_request_put(rq);
4065 igt_spinner_end(&spin);
4066 if (igt_flush_test(gt->i915))
4068 igt_spinner_fini(&spin);
4072 static int sliceout_virtual_engine(struct intel_gt *gt,
4073 struct intel_engine_cs **siblings,
4074 unsigned int nsibling)
4076 const long timeout = slice_timeout(siblings[0]);
4077 struct intel_context *ce;
4078 struct i915_request *rq;
4079 struct igt_spinner spin;
4084 * Virtual requests must allow others a fair timeslice.
4087 if (igt_spinner_init(&spin, gt))
4090 /* XXX We do not handle oversubscription and fairness with normal rq */
4091 for (n = 0; n < nsibling; n++) {
4092 ce = intel_execlists_create_virtual(siblings, nsibling);
4098 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4099 intel_context_put(ce);
4105 i915_request_add(rq);
4108 for (n = 0; !err && n < nsibling; n++) {
4109 ce = intel_context_create(siblings[n]);
4115 rq = intel_context_create_request(ce);
4116 intel_context_put(ce);
4122 i915_request_get(rq);
4123 i915_request_add(rq);
4124 if (i915_request_wait(rq, 0, timeout) < 0) {
4125 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4126 __func__, siblings[n]->name);
4128 intel_gt_set_wedged(gt);
4131 i915_request_put(rq);
4135 igt_spinner_end(&spin);
4136 if (igt_flush_test(gt->i915))
4138 igt_spinner_fini(&spin);
4142 static int live_virtual_slice(void *arg)
4144 struct intel_gt *gt = arg;
4145 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4149 if (intel_uc_uses_guc_submission(>->uc))
4152 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4153 unsigned int nsibling;
4155 nsibling = __select_siblings(gt, class, siblings,
4156 intel_engine_has_timeslices);
4160 err = slicein_virtual_engine(gt, siblings, nsibling);
4164 err = sliceout_virtual_engine(gt, siblings, nsibling);
4172 static int preserved_virtual_engine(struct intel_gt *gt,
4173 struct intel_engine_cs **siblings,
4174 unsigned int nsibling)
4176 struct i915_request *last = NULL;
4177 struct intel_context *ve;
4178 struct i915_vma *scratch;
4179 struct igt_live_test t;
4185 __vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4187 if (IS_ERR(scratch))
4188 return PTR_ERR(scratch);
4190 err = i915_vma_sync(scratch);
4194 ve = intel_execlists_create_virtual(siblings, nsibling);
4200 err = intel_context_pin(ve);
4204 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4208 for (n = 0; n < NUM_GPR_DW; n++) {
4209 struct intel_engine_cs *engine = siblings[n % nsibling];
4210 struct i915_request *rq;
4212 rq = i915_request_create(ve);
4218 i915_request_put(last);
4219 last = i915_request_get(rq);
4221 cs = intel_ring_begin(rq, 8);
4223 i915_request_add(rq);
4228 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4229 *cs++ = CS_GPR(engine, n);
4230 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4233 *cs++ = MI_LOAD_REGISTER_IMM(1);
4234 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4238 intel_ring_advance(rq, cs);
4240 /* Restrict this request to run on a particular engine */
4241 rq->execution_mask = engine->mask;
4242 i915_request_add(rq);
4245 if (i915_request_wait(last, 0, HZ / 5) < 0) {
4250 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4256 for (n = 0; n < NUM_GPR_DW; n++) {
4258 pr_err("Incorrect value[%d] found for GPR[%d]\n",
4265 i915_gem_object_unpin_map(scratch->obj);
4268 if (igt_live_test_end(&t))
4270 i915_request_put(last);
4272 intel_context_unpin(ve);
4274 intel_context_put(ve);
4276 i915_vma_unpin_and_release(&scratch, 0);
4280 static int live_virtual_preserved(void *arg)
4282 struct intel_gt *gt = arg;
4283 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4287 * Check that the context image retains non-privileged (user) registers
4288 * from one engine to the next. For this we check that the CS_GPR
4292 if (intel_uc_uses_guc_submission(>->uc))
4295 /* As we use CS_GPR we cannot run before they existed on all engines. */
4296 if (INTEL_GEN(gt->i915) < 9)
4299 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4302 nsibling = select_siblings(gt, class, siblings);
4306 err = preserved_virtual_engine(gt, siblings, nsibling);
4314 static int bond_virtual_engine(struct intel_gt *gt,
4316 struct intel_engine_cs **siblings,
4317 unsigned int nsibling,
4319 #define BOND_SCHEDULE BIT(0)
4321 struct intel_engine_cs *master;
4322 struct i915_request *rq[16];
4323 enum intel_engine_id id;
4324 struct igt_spinner spin;
4329 * A set of bonded requests is intended to be run concurrently
4330 * across a number of engines. We use one request per-engine
4331 * and a magic fence to schedule each of the bonded requests
4332 * at the same time. A consequence of our current scheduler is that
4333 * we only move requests to the HW ready queue when the request
4334 * becomes ready, that is when all of its prerequisite fences have
4335 * been signaled. As one of those fences is the master submit fence,
4336 * there is a delay on all secondary fences as the HW may be
4337 * currently busy. Equally, as all the requests are independent,
4338 * they may have other fences that delay individual request
4339 * submission to HW. Ergo, we do not guarantee that all requests are
4340 * immediately submitted to HW at the same time, just that if the
4341 * rules are abided by, they are ready at the same time as the
4342 * first is submitted. Userspace can embed semaphores in its batch
4343 * to ensure parallel execution of its phases as it requires.
4344 * Though naturally it gets requested that perhaps the scheduler should
4345 * take care of parallel execution, even across preemption events on
4346 * different HW. (The proper answer is of course "lalalala".)
4348 * With the submit-fence, we have identified three possible phases
4349 * of synchronisation depending on the master fence: queued (not
4350 * ready), executing, and signaled. The first two are quite simple
4351 * and checked below. However, the signaled master fence handling is
4352 * contentious. Currently we do not distinguish between a signaled
4353 * fence and an expired fence, as once signaled it does not convey
4354 * any information about the previous execution. It may even be freed
4355 * and hence checking later it may not exist at all. Ergo we currently
4356 * do not apply the bonding constraint for an already signaled fence,
4357 * as our expectation is that it should not constrain the secondaries
4358 * and is outside of the scope of the bonded request API (i.e. all
4359 * userspace requests are meant to be running in parallel). As
4360 * it imposes no constraint, and is effectively a no-op, we do not
4361 * check below as normal execution flows are checked extensively above.
4363 * XXX Is the degenerate handling of signaled submit fences the
4364 * expected behaviour for userpace?
4367 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4369 if (igt_spinner_init(&spin, gt))
4373 rq[0] = ERR_PTR(-ENOMEM);
4374 for_each_engine(master, gt, id) {
4375 struct i915_sw_fence fence = {};
4376 struct intel_context *ce;
4378 if (master->class == class)
4381 ce = intel_context_create(master);
4387 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4389 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4390 intel_context_put(ce);
4391 if (IS_ERR(rq[0])) {
4392 err = PTR_ERR(rq[0]);
4395 i915_request_get(rq[0]);
4397 if (flags & BOND_SCHEDULE) {
4398 onstack_fence_init(&fence);
4399 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4404 i915_request_add(rq[0]);
4408 if (!(flags & BOND_SCHEDULE) &&
4409 !igt_wait_for_spinner(&spin, rq[0])) {
4414 for (n = 0; n < nsibling; n++) {
4415 struct intel_context *ve;
4417 ve = intel_execlists_create_virtual(siblings, nsibling);
4420 onstack_fence_fini(&fence);
4424 err = intel_virtual_engine_attach_bond(ve->engine,
4428 intel_context_put(ve);
4429 onstack_fence_fini(&fence);
4433 err = intel_context_pin(ve);
4434 intel_context_put(ve);
4436 onstack_fence_fini(&fence);
4440 rq[n + 1] = i915_request_create(ve);
4441 intel_context_unpin(ve);
4442 if (IS_ERR(rq[n + 1])) {
4443 err = PTR_ERR(rq[n + 1]);
4444 onstack_fence_fini(&fence);
4447 i915_request_get(rq[n + 1]);
4449 err = i915_request_await_execution(rq[n + 1],
4451 ve->engine->bond_execute);
4452 i915_request_add(rq[n + 1]);
4454 onstack_fence_fini(&fence);
4458 onstack_fence_fini(&fence);
4459 intel_engine_flush_submission(master);
4460 igt_spinner_end(&spin);
4462 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4463 pr_err("Master request did not execute (on %s)!\n",
4464 rq[0]->engine->name);
4469 for (n = 0; n < nsibling; n++) {
4470 if (i915_request_wait(rq[n + 1], 0,
4471 MAX_SCHEDULE_TIMEOUT) < 0) {
4476 if (rq[n + 1]->engine != siblings[n]) {
4477 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4479 rq[n + 1]->engine->name,
4480 rq[0]->engine->name);
4486 for (n = 0; !IS_ERR(rq[n]); n++)
4487 i915_request_put(rq[n]);
4488 rq[0] = ERR_PTR(-ENOMEM);
4492 for (n = 0; !IS_ERR(rq[n]); n++)
4493 i915_request_put(rq[n]);
4494 if (igt_flush_test(gt->i915))
4497 igt_spinner_fini(&spin);
4501 static int live_virtual_bond(void *arg)
4503 static const struct phase {
4508 { "schedule", BOND_SCHEDULE },
4511 struct intel_gt *gt = arg;
4512 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4516 if (intel_uc_uses_guc_submission(>->uc))
4519 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4520 const struct phase *p;
4523 nsibling = select_siblings(gt, class, siblings);
4527 for (p = phases; p->name; p++) {
4528 err = bond_virtual_engine(gt,
4529 class, siblings, nsibling,
4532 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4533 __func__, p->name, class, nsibling, err);
4542 static int reset_virtual_engine(struct intel_gt *gt,
4543 struct intel_engine_cs **siblings,
4544 unsigned int nsibling)
4546 struct intel_engine_cs *engine;
4547 struct intel_context *ve;
4548 struct igt_spinner spin;
4549 struct i915_request *rq;
4554 * In order to support offline error capture for fast preempt reset,
4555 * we need to decouple the guilty request and ensure that it and its
4556 * descendents are not executed while the capture is in progress.
4559 if (igt_spinner_init(&spin, gt))
4562 ve = intel_execlists_create_virtual(siblings, nsibling);
4568 for (n = 0; n < nsibling; n++)
4569 st_engine_heartbeat_disable(siblings[n]);
4571 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4576 i915_request_add(rq);
4578 if (!igt_wait_for_spinner(&spin, rq)) {
4579 intel_gt_set_wedged(gt);
4584 engine = rq->engine;
4585 GEM_BUG_ON(engine == ve->engine);
4587 /* Take ownership of the reset and tasklet */
4589 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4590 >->reset.flags)) {
4592 intel_gt_set_wedged(gt);
4596 tasklet_disable(&engine->execlists.tasklet);
4598 engine->execlists.tasklet.callback(&engine->execlists.tasklet);
4599 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4601 /* Fake a preemption event; failed of course */
4602 spin_lock_irq(&engine->active.lock);
4603 __unwind_incomplete_requests(engine);
4604 spin_unlock_irq(&engine->active.lock);
4605 GEM_BUG_ON(rq->engine != engine);
4607 /* Reset the engine while keeping our active request on hold */
4608 execlists_hold(engine, rq);
4609 GEM_BUG_ON(!i915_request_on_hold(rq));
4611 __intel_engine_reset_bh(engine, NULL);
4612 GEM_BUG_ON(rq->fence.error != -EIO);
4614 /* Release our grasp on the engine, letting CS flow again */
4615 tasklet_enable(&engine->execlists.tasklet);
4616 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags);
4619 /* Check that we do not resubmit the held request */
4620 i915_request_get(rq);
4621 if (!i915_request_wait(rq, 0, HZ / 5)) {
4622 pr_err("%s: on hold request completed!\n",
4624 intel_gt_set_wedged(gt);
4628 GEM_BUG_ON(!i915_request_on_hold(rq));
4630 /* But is resubmitted on release */
4631 execlists_unhold(engine, rq);
4632 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4633 pr_err("%s: held request did not complete!\n",
4635 intel_gt_set_wedged(gt);
4640 i915_request_put(rq);
4642 for (n = 0; n < nsibling; n++)
4643 st_engine_heartbeat_enable(siblings[n]);
4645 intel_context_put(ve);
4647 igt_spinner_fini(&spin);
4651 static int live_virtual_reset(void *arg)
4653 struct intel_gt *gt = arg;
4654 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4658 * Check that we handle a reset event within a virtual engine.
4659 * Only the physical engine is reset, but we have to check the flow
4660 * of the virtual requests around the reset, and make sure it is not
4664 if (intel_uc_uses_guc_submission(>->uc))
4667 if (!intel_has_reset_engine(gt))
4670 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4673 nsibling = select_siblings(gt, class, siblings);
4677 err = reset_virtual_engine(gt, siblings, nsibling);
4685 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4687 static const struct i915_subtest tests[] = {
4688 SUBTEST(live_sanitycheck),
4689 SUBTEST(live_unlite_switch),
4690 SUBTEST(live_unlite_preempt),
4691 SUBTEST(live_unlite_ring),
4692 SUBTEST(live_pin_rewind),
4693 SUBTEST(live_hold_reset),
4694 SUBTEST(live_error_interrupt),
4695 SUBTEST(live_timeslice_preempt),
4696 SUBTEST(live_timeslice_rewind),
4697 SUBTEST(live_timeslice_queue),
4698 SUBTEST(live_timeslice_nopreempt),
4699 SUBTEST(live_busywait_preempt),
4700 SUBTEST(live_preempt),
4701 SUBTEST(live_late_preempt),
4702 SUBTEST(live_nopreempt),
4703 SUBTEST(live_preempt_cancel),
4704 SUBTEST(live_suppress_self_preempt),
4705 SUBTEST(live_chain_preempt),
4706 SUBTEST(live_preempt_ring),
4707 SUBTEST(live_preempt_gang),
4708 SUBTEST(live_preempt_timeout),
4709 SUBTEST(live_preempt_user),
4710 SUBTEST(live_preempt_smoke),
4711 SUBTEST(live_virtual_engine),
4712 SUBTEST(live_virtual_mask),
4713 SUBTEST(live_virtual_preserved),
4714 SUBTEST(live_virtual_slice),
4715 SUBTEST(live_virtual_bond),
4716 SUBTEST(live_virtual_reset),
4719 if (!HAS_EXECLISTS(i915))
4722 if (intel_gt_is_wedged(&i915->gt))
4725 return intel_gt_live_subtests(tests, &i915->gt);