2 * SPDX-License-Identifier: MIT
4 * Copyright © 2018 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
27 static struct i915_vma *create_scratch(struct intel_gt *gt)
29 struct drm_i915_gem_object *obj;
33 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
37 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
39 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
41 i915_gem_object_put(obj);
45 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
47 i915_gem_object_put(obj);
54 static void engine_heartbeat_disable(struct intel_engine_cs *engine)
56 engine->props.heartbeat_interval_ms = 0;
58 intel_engine_pm_get(engine);
59 intel_engine_park_heartbeat(engine);
62 static void engine_heartbeat_enable(struct intel_engine_cs *engine)
64 intel_engine_pm_put(engine);
66 engine->props.heartbeat_interval_ms =
67 engine->defaults.heartbeat_interval_ms;
70 static bool is_active(struct i915_request *rq)
72 if (i915_request_is_active(rq))
75 if (i915_request_on_hold(rq))
78 if (i915_request_started(rq))
84 static int wait_for_submit(struct intel_engine_cs *engine,
85 struct i915_request *rq,
86 unsigned long timeout)
90 bool done = time_after(jiffies, timeout);
92 if (i915_request_completed(rq)) /* that was quick! */
95 /* Wait until the HW has acknowleged the submission (or err) */
96 intel_engine_flush_submission(engine);
97 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
107 static int wait_for_reset(struct intel_engine_cs *engine,
108 struct i915_request *rq,
109 unsigned long timeout)
115 intel_engine_flush_submission(engine);
117 if (READ_ONCE(engine->execlists.pending[0]))
120 if (i915_request_completed(rq))
123 if (READ_ONCE(rq->fence.error))
125 } while (time_before(jiffies, timeout));
127 flush_scheduled_work();
129 if (rq->fence.error != -EIO) {
130 pr_err("%s: hanging request %llx:%lld not reset\n",
137 /* Give the request a jiffie to complete after flushing the worker */
138 if (i915_request_wait(rq, 0,
139 max(0l, (long)(timeout - jiffies)) + 1) < 0) {
140 pr_err("%s: hanging request %llx:%lld did not complete\n",
150 static int live_sanitycheck(void *arg)
152 struct intel_gt *gt = arg;
153 struct intel_engine_cs *engine;
154 enum intel_engine_id id;
155 struct igt_spinner spin;
158 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
161 if (igt_spinner_init(&spin, gt))
164 for_each_engine(engine, gt, id) {
165 struct intel_context *ce;
166 struct i915_request *rq;
168 ce = intel_context_create(engine);
174 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
180 i915_request_add(rq);
181 if (!igt_wait_for_spinner(&spin, rq)) {
182 GEM_TRACE("spinner failed to start\n");
184 intel_gt_set_wedged(gt);
189 igt_spinner_end(&spin);
190 if (igt_flush_test(gt->i915)) {
196 intel_context_put(ce);
201 igt_spinner_fini(&spin);
205 static int live_unlite_restore(struct intel_gt *gt, int prio)
207 struct intel_engine_cs *engine;
208 enum intel_engine_id id;
209 struct igt_spinner spin;
213 * Check that we can correctly context switch between 2 instances
214 * on the same engine from the same parent context.
217 if (igt_spinner_init(&spin, gt))
221 for_each_engine(engine, gt, id) {
222 struct intel_context *ce[2] = {};
223 struct i915_request *rq[2];
224 struct igt_live_test t;
227 if (prio && !intel_engine_has_preemption(engine))
230 if (!intel_engine_can_store_dword(engine))
233 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
237 engine_heartbeat_disable(engine);
239 for (n = 0; n < ARRAY_SIZE(ce); n++) {
240 struct intel_context *tmp;
242 tmp = intel_context_create(engine);
248 err = intel_context_pin(tmp);
250 intel_context_put(tmp);
255 * Setup the pair of contexts such that if we
256 * lite-restore using the RING_TAIL from ce[1] it
257 * will execute garbage from ce[0]->ring.
259 memset(tmp->ring->vaddr,
260 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
261 tmp->ring->vma->size);
265 GEM_BUG_ON(!ce[1]->ring->size);
266 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
267 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
269 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
271 err = PTR_ERR(rq[0]);
275 i915_request_get(rq[0]);
276 i915_request_add(rq[0]);
277 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
279 if (!igt_wait_for_spinner(&spin, rq[0])) {
280 i915_request_put(rq[0]);
284 rq[1] = i915_request_create(ce[1]);
286 err = PTR_ERR(rq[1]);
287 i915_request_put(rq[0]);
293 * Ensure we do the switch to ce[1] on completion.
295 * rq[0] is already submitted, so this should reduce
296 * to a no-op (a wait on a request on the same engine
297 * uses the submit fence, not the completion fence),
298 * but it will install a dependency on rq[1] for rq[0]
299 * that will prevent the pair being reordered by
302 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
305 i915_request_get(rq[1]);
306 i915_request_add(rq[1]);
307 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
308 i915_request_put(rq[0]);
311 struct i915_sched_attr attr = {
315 /* Alternatively preempt the spinner with ce[1] */
316 engine->schedule(rq[1], &attr);
319 /* And switch back to ce[0] for good measure */
320 rq[0] = i915_request_create(ce[0]);
322 err = PTR_ERR(rq[0]);
323 i915_request_put(rq[1]);
327 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
328 i915_request_get(rq[0]);
329 i915_request_add(rq[0]);
330 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
331 i915_request_put(rq[1]);
332 i915_request_put(rq[0]);
335 tasklet_kill(&engine->execlists.tasklet); /* flush submission */
336 igt_spinner_end(&spin);
337 for (n = 0; n < ARRAY_SIZE(ce); n++) {
338 if (IS_ERR_OR_NULL(ce[n]))
341 intel_context_unpin(ce[n]);
342 intel_context_put(ce[n]);
345 engine_heartbeat_enable(engine);
346 if (igt_live_test_end(&t))
352 igt_spinner_fini(&spin);
356 static int live_unlite_switch(void *arg)
358 return live_unlite_restore(arg, 0);
361 static int live_unlite_preempt(void *arg)
363 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
366 static int live_pin_rewind(void *arg)
368 struct intel_gt *gt = arg;
369 struct intel_engine_cs *engine;
370 enum intel_engine_id id;
374 * We have to be careful not to trust intel_ring too much, for example
375 * ring->head is updated upon retire which is out of sync with pinning
376 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
377 * or else we risk writing an older, stale value.
379 * To simulate this, let's apply a bit of deliberate sabotague.
382 for_each_engine(engine, gt, id) {
383 struct intel_context *ce;
384 struct i915_request *rq;
385 struct intel_ring *ring;
386 struct igt_live_test t;
388 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
393 ce = intel_context_create(engine);
399 err = intel_context_pin(ce);
401 intel_context_put(ce);
405 /* Keep the context awake while we play games */
406 err = i915_active_acquire(&ce->active);
408 intel_context_unpin(ce);
409 intel_context_put(ce);
414 /* Poison the ring, and offset the next request from HEAD */
415 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
416 ring->emit = ring->size / 2;
417 ring->tail = ring->emit;
418 GEM_BUG_ON(ring->head);
420 intel_context_unpin(ce);
422 /* Submit a simple nop request */
423 GEM_BUG_ON(intel_context_is_pinned(ce));
424 rq = intel_context_create_request(ce);
425 i915_active_release(&ce->active); /* e.g. async retire */
426 intel_context_put(ce);
431 GEM_BUG_ON(!rq->head);
432 i915_request_add(rq);
434 /* Expect not to hang! */
435 if (igt_live_test_end(&t)) {
444 static int live_hold_reset(void *arg)
446 struct intel_gt *gt = arg;
447 struct intel_engine_cs *engine;
448 enum intel_engine_id id;
449 struct igt_spinner spin;
453 * In order to support offline error capture for fast preempt reset,
454 * we need to decouple the guilty request and ensure that it and its
455 * descendents are not executed while the capture is in progress.
458 if (!intel_has_reset_engine(gt))
461 if (igt_spinner_init(&spin, gt))
464 for_each_engine(engine, gt, id) {
465 struct intel_context *ce;
466 struct i915_request *rq;
468 ce = intel_context_create(engine);
474 engine_heartbeat_disable(engine);
476 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
481 i915_request_add(rq);
483 if (!igt_wait_for_spinner(&spin, rq)) {
484 intel_gt_set_wedged(gt);
489 /* We have our request executing, now remove it and reset */
491 if (test_and_set_bit(I915_RESET_ENGINE + id,
493 intel_gt_set_wedged(gt);
497 tasklet_disable(&engine->execlists.tasklet);
499 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
500 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
502 i915_request_get(rq);
503 execlists_hold(engine, rq);
504 GEM_BUG_ON(!i915_request_on_hold(rq));
506 intel_engine_reset(engine, NULL);
507 GEM_BUG_ON(rq->fence.error != -EIO);
509 tasklet_enable(&engine->execlists.tasklet);
510 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
513 /* Check that we do not resubmit the held request */
514 if (!i915_request_wait(rq, 0, HZ / 5)) {
515 pr_err("%s: on hold request completed!\n",
517 i915_request_put(rq);
521 GEM_BUG_ON(!i915_request_on_hold(rq));
523 /* But is resubmitted on release */
524 execlists_unhold(engine, rq);
525 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
526 pr_err("%s: held request did not complete!\n",
528 intel_gt_set_wedged(gt);
531 i915_request_put(rq);
534 engine_heartbeat_enable(engine);
535 intel_context_put(ce);
540 igt_spinner_fini(&spin);
544 static const char *error_repr(int err)
546 return err ? "bad" : "good";
549 static int live_error_interrupt(void *arg)
551 static const struct error_phase {
552 enum { GOOD = 0, BAD = -EIO } error[2];
557 { { GOOD, GOOD } }, /* sentinel */
559 struct intel_gt *gt = arg;
560 struct intel_engine_cs *engine;
561 enum intel_engine_id id;
564 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
565 * of invalid commands in user batches that will cause a GPU hang.
566 * This is a faster mechanism than using hangcheck/heartbeats, but
567 * only detects problems the HW knows about -- it will not warn when
570 * To verify our detection and reset, we throw some invalid commands
571 * at the HW and wait for the interrupt.
574 if (!intel_has_reset_engine(gt))
577 for_each_engine(engine, gt, id) {
578 const struct error_phase *p;
581 engine_heartbeat_disable(engine);
583 for (p = phases; p->error[0] != GOOD; p++) {
584 struct i915_request *client[ARRAY_SIZE(phases->error)];
588 memset(client, 0, sizeof(*client));
589 for (i = 0; i < ARRAY_SIZE(client); i++) {
590 struct intel_context *ce;
591 struct i915_request *rq;
593 ce = intel_context_create(engine);
599 rq = intel_context_create_request(ce);
600 intel_context_put(ce);
606 if (rq->engine->emit_init_breadcrumb) {
607 err = rq->engine->emit_init_breadcrumb(rq);
609 i915_request_add(rq);
614 cs = intel_ring_begin(rq, 2);
616 i915_request_add(rq);
629 client[i] = i915_request_get(rq);
630 i915_request_add(rq);
633 err = wait_for_submit(engine, client[0], HZ / 2);
635 pr_err("%s: first request did not start within time!\n",
641 for (i = 0; i < ARRAY_SIZE(client); i++) {
642 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
643 pr_debug("%s: %s request incomplete!\n",
645 error_repr(p->error[i]));
647 if (!i915_request_started(client[i])) {
648 pr_err("%s: %s request not started!\n",
650 error_repr(p->error[i]));
655 /* Kick the tasklet to process the error */
656 intel_engine_flush_submission(engine);
657 if (client[i]->fence.error != p->error[i]) {
658 pr_err("%s: %s request (%s) with wrong error code: %d\n",
660 error_repr(p->error[i]),
661 i915_request_completed(client[i]) ? "completed" : "running",
662 client[i]->fence.error);
669 for (i = 0; i < ARRAY_SIZE(client); i++)
671 i915_request_put(client[i]);
673 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
674 engine->name, p - phases,
675 p->error[0], p->error[1]);
680 engine_heartbeat_enable(engine);
682 intel_gt_set_wedged(gt);
691 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
695 cs = intel_ring_begin(rq, 10);
699 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
701 *cs++ = MI_SEMAPHORE_WAIT |
702 MI_SEMAPHORE_GLOBAL_GTT |
704 MI_SEMAPHORE_SAD_NEQ_SDD;
706 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
710 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
711 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
721 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
723 intel_ring_advance(rq, cs);
727 static struct i915_request *
728 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
730 struct intel_context *ce;
731 struct i915_request *rq;
734 ce = intel_context_create(engine);
738 rq = intel_context_create_request(ce);
743 if (rq->engine->emit_init_breadcrumb)
744 err = rq->engine->emit_init_breadcrumb(rq);
746 err = emit_semaphore_chain(rq, vma, idx);
748 i915_request_get(rq);
749 i915_request_add(rq);
754 intel_context_put(ce);
759 release_queue(struct intel_engine_cs *engine,
760 struct i915_vma *vma,
763 struct i915_sched_attr attr = {
766 struct i915_request *rq;
769 rq = intel_engine_create_kernel_request(engine);
773 cs = intel_ring_begin(rq, 4);
775 i915_request_add(rq);
779 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
780 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
784 intel_ring_advance(rq, cs);
786 i915_request_get(rq);
787 i915_request_add(rq);
790 engine->schedule(rq, &attr);
791 local_bh_enable(); /* kick tasklet */
793 i915_request_put(rq);
799 slice_semaphore_queue(struct intel_engine_cs *outer,
800 struct i915_vma *vma,
803 struct intel_engine_cs *engine;
804 struct i915_request *head;
805 enum intel_engine_id id;
808 head = semaphore_queue(outer, vma, n++);
810 return PTR_ERR(head);
812 for_each_engine(engine, outer->gt, id) {
813 for (i = 0; i < count; i++) {
814 struct i915_request *rq;
816 rq = semaphore_queue(engine, vma, n++);
822 i915_request_put(rq);
826 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
830 if (i915_request_wait(head, 0,
831 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
832 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
835 intel_gt_set_wedged(outer->gt);
840 i915_request_put(head);
844 static int live_timeslice_preempt(void *arg)
846 struct intel_gt *gt = arg;
847 struct drm_i915_gem_object *obj;
848 struct i915_vma *vma;
854 * If a request takes too long, we would like to give other users
855 * a fair go on the GPU. In particular, users may create batches
856 * that wait upon external input, where that input may even be
857 * supplied by another GPU job. To avoid blocking forever, we
858 * need to preempt the current task and replace it with another
861 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
864 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
868 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
874 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
876 err = PTR_ERR(vaddr);
880 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
884 err = i915_vma_sync(vma);
888 for_each_prime_number_from(count, 1, 16) {
889 struct intel_engine_cs *engine;
890 enum intel_engine_id id;
892 for_each_engine(engine, gt, id) {
893 if (!intel_engine_has_preemption(engine))
896 memset(vaddr, 0, PAGE_SIZE);
898 engine_heartbeat_disable(engine);
899 err = slice_semaphore_queue(engine, vma, count);
900 engine_heartbeat_enable(engine);
904 if (igt_flush_test(gt->i915)) {
914 i915_gem_object_unpin_map(obj);
916 i915_gem_object_put(obj);
920 static struct i915_request *
921 create_rewinder(struct intel_context *ce,
922 struct i915_request *wait,
926 i915_ggtt_offset(ce->engine->status_page.vma) +
927 offset_in_page(slot);
928 struct i915_request *rq;
932 rq = intel_context_create_request(ce);
937 err = i915_request_await_dma_fence(rq, &wait->fence);
942 cs = intel_ring_begin(rq, 14);
948 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
951 *cs++ = MI_SEMAPHORE_WAIT |
952 MI_SEMAPHORE_GLOBAL_GTT |
954 MI_SEMAPHORE_SAD_GTE_SDD;
959 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
960 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
961 *cs++ = offset + idx * sizeof(u32);
964 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
969 intel_ring_advance(rq, cs);
971 rq->sched.attr.priority = I915_PRIORITY_MASK;
974 i915_request_get(rq);
975 i915_request_add(rq);
977 i915_request_put(rq);
984 static int live_timeslice_rewind(void *arg)
986 struct intel_gt *gt = arg;
987 struct intel_engine_cs *engine;
988 enum intel_engine_id id;
991 * The usual presumption on timeslice expiration is that we replace
992 * the active context with another. However, given a chain of
993 * dependencies we may end up with replacing the context with itself,
994 * but only a few of those requests, forcing us to rewind the
995 * RING_TAIL of the original request.
997 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1000 for_each_engine(engine, gt, id) {
1001 enum { A1, A2, B1 };
1002 enum { X = 1, Z, Y };
1003 struct i915_request *rq[3] = {};
1004 struct intel_context *ce;
1005 unsigned long timeslice;
1009 if (!intel_engine_has_timeslices(engine))
1013 * A:rq1 -- semaphore wait, timestamp X
1014 * A:rq2 -- write timestamp Y
1016 * B:rq1 [await A:rq1] -- write timestamp Z
1018 * Force timeslice, release semaphore.
1020 * Expect execution/evaluation order XZY
1023 engine_heartbeat_disable(engine);
1024 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1026 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1028 ce = intel_context_create(engine);
1034 rq[0] = create_rewinder(ce, NULL, slot, X);
1035 if (IS_ERR(rq[0])) {
1036 intel_context_put(ce);
1040 rq[1] = create_rewinder(ce, NULL, slot, Y);
1041 intel_context_put(ce);
1045 err = wait_for_submit(engine, rq[1], HZ / 2);
1047 pr_err("%s: failed to submit first context\n",
1052 ce = intel_context_create(engine);
1058 rq[2] = create_rewinder(ce, rq[0], slot, Z);
1059 intel_context_put(ce);
1063 err = wait_for_submit(engine, rq[2], HZ / 2);
1065 pr_err("%s: failed to submit second context\n",
1070 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1071 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
1072 /* Wait for the timeslice to kick in */
1073 del_timer(&engine->execlists.timer);
1074 tasklet_hi_schedule(&engine->execlists.tasklet);
1075 intel_engine_flush_submission(engine);
1077 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1078 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1079 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1080 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1082 /* Release the hounds! */
1084 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1086 for (i = 1; i <= 3; i++) {
1087 unsigned long timeout = jiffies + HZ / 2;
1089 while (!READ_ONCE(slot[i]) &&
1090 time_before(jiffies, timeout))
1093 if (!time_before(jiffies, timeout)) {
1094 pr_err("%s: rq[%d] timed out\n",
1095 engine->name, i - 1);
1100 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1104 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1105 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1113 memset32(&slot[0], -1, 4);
1116 engine->props.timeslice_duration_ms = timeslice;
1117 engine_heartbeat_enable(engine);
1118 for (i = 0; i < 3; i++)
1119 i915_request_put(rq[i]);
1120 if (igt_flush_test(gt->i915))
1129 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1131 struct i915_request *rq;
1133 rq = intel_engine_create_kernel_request(engine);
1137 i915_request_get(rq);
1138 i915_request_add(rq);
1143 static long timeslice_threshold(const struct intel_engine_cs *engine)
1145 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
1148 static int live_timeslice_queue(void *arg)
1150 struct intel_gt *gt = arg;
1151 struct drm_i915_gem_object *obj;
1152 struct intel_engine_cs *engine;
1153 enum intel_engine_id id;
1154 struct i915_vma *vma;
1159 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1160 * timeslicing between them disabled, we *do* enable timeslicing
1161 * if the queue demands it. (Normally, we do not submit if
1162 * ELSP[1] is already occupied, so must rely on timeslicing to
1163 * eject ELSP[0] in favour of the queue.)
1165 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1168 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1170 return PTR_ERR(obj);
1172 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1178 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1179 if (IS_ERR(vaddr)) {
1180 err = PTR_ERR(vaddr);
1184 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1188 err = i915_vma_sync(vma);
1192 for_each_engine(engine, gt, id) {
1193 struct i915_sched_attr attr = {
1194 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1196 struct i915_request *rq, *nop;
1198 if (!intel_engine_has_preemption(engine))
1201 engine_heartbeat_disable(engine);
1202 memset(vaddr, 0, PAGE_SIZE);
1204 /* ELSP[0]: semaphore wait */
1205 rq = semaphore_queue(engine, vma, 0);
1210 engine->schedule(rq, &attr);
1211 err = wait_for_submit(engine, rq, HZ / 2);
1213 pr_err("%s: Timed out trying to submit semaphores\n",
1218 /* ELSP[1]: nop request */
1219 nop = nop_request(engine);
1224 err = wait_for_submit(engine, nop, HZ / 2);
1225 i915_request_put(nop);
1227 pr_err("%s: Timed out trying to submit nop\n",
1232 GEM_BUG_ON(i915_request_completed(rq));
1233 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1235 /* Queue: semaphore signal, matching priority as semaphore */
1236 err = release_queue(engine, vma, 1, effective_prio(rq));
1240 /* Wait until we ack the release_queue and start timeslicing */
1243 intel_engine_flush_submission(engine);
1244 } while (READ_ONCE(engine->execlists.pending[0]));
1246 if (!READ_ONCE(engine->execlists.timer.expires) &&
1247 execlists_active(&engine->execlists) == rq &&
1248 !i915_request_completed(rq)) {
1249 struct drm_printer p =
1250 drm_info_printer(gt->i915->drm.dev);
1252 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
1254 intel_engine_dump(engine, &p,
1255 "%s\n", engine->name);
1258 memset(vaddr, 0xff, PAGE_SIZE);
1262 /* Timeslice every jiffy, so within 2 we should signal */
1263 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
1264 struct drm_printer p =
1265 drm_info_printer(gt->i915->drm.dev);
1267 pr_err("%s: Failed to timeslice into queue\n",
1269 intel_engine_dump(engine, &p,
1270 "%s\n", engine->name);
1272 memset(vaddr, 0xff, PAGE_SIZE);
1276 i915_request_put(rq);
1278 engine_heartbeat_enable(engine);
1284 i915_vma_unpin(vma);
1286 i915_gem_object_unpin_map(obj);
1288 i915_gem_object_put(obj);
1292 static int live_timeslice_nopreempt(void *arg)
1294 struct intel_gt *gt = arg;
1295 struct intel_engine_cs *engine;
1296 enum intel_engine_id id;
1297 struct igt_spinner spin;
1301 * We should not timeslice into a request that is marked with
1302 * I915_REQUEST_NOPREEMPT.
1304 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1307 if (igt_spinner_init(&spin, gt))
1310 for_each_engine(engine, gt, id) {
1311 struct intel_context *ce;
1312 struct i915_request *rq;
1313 unsigned long timeslice;
1315 if (!intel_engine_has_preemption(engine))
1318 ce = intel_context_create(engine);
1324 engine_heartbeat_disable(engine);
1325 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1327 /* Create an unpreemptible spinner */
1329 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1330 intel_context_put(ce);
1336 i915_request_get(rq);
1337 i915_request_add(rq);
1339 if (!igt_wait_for_spinner(&spin, rq)) {
1340 i915_request_put(rq);
1345 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1346 i915_request_put(rq);
1348 /* Followed by a maximum priority barrier (heartbeat) */
1350 ce = intel_context_create(engine);
1356 rq = intel_context_create_request(ce);
1357 intel_context_put(ce);
1363 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1364 i915_request_get(rq);
1365 i915_request_add(rq);
1368 * Wait until the barrier is in ELSP, and we know timeslicing
1369 * will have been activated.
1371 if (wait_for_submit(engine, rq, HZ / 2)) {
1372 i915_request_put(rq);
1378 * Since the ELSP[0] request is unpreemptible, it should not
1379 * allow the maximum priority barrier through. Wait long
1380 * enough to see if it is timesliced in by mistake.
1382 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) {
1383 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1387 i915_request_put(rq);
1390 igt_spinner_end(&spin);
1392 xchg(&engine->props.timeslice_duration_ms, timeslice);
1393 engine_heartbeat_enable(engine);
1397 if (igt_flush_test(gt->i915)) {
1403 igt_spinner_fini(&spin);
1407 static int live_busywait_preempt(void *arg)
1409 struct intel_gt *gt = arg;
1410 struct i915_gem_context *ctx_hi, *ctx_lo;
1411 struct intel_engine_cs *engine;
1412 struct drm_i915_gem_object *obj;
1413 struct i915_vma *vma;
1414 enum intel_engine_id id;
1419 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1420 * preempt the busywaits used to synchronise between rings.
1423 ctx_hi = kernel_context(gt->i915);
1426 ctx_hi->sched.priority =
1427 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1429 ctx_lo = kernel_context(gt->i915);
1432 ctx_lo->sched.priority =
1433 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1435 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1441 map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1447 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1453 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1457 err = i915_vma_sync(vma);
1461 for_each_engine(engine, gt, id) {
1462 struct i915_request *lo, *hi;
1463 struct igt_live_test t;
1466 if (!intel_engine_has_preemption(engine))
1469 if (!intel_engine_can_store_dword(engine))
1472 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1478 * We create two requests. The low priority request
1479 * busywaits on a semaphore (inside the ringbuffer where
1480 * is should be preemptible) and the high priority requests
1481 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1482 * allowing the first request to complete. If preemption
1483 * fails, we hang instead.
1486 lo = igt_request_alloc(ctx_lo, engine);
1492 cs = intel_ring_begin(lo, 8);
1495 i915_request_add(lo);
1499 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1500 *cs++ = i915_ggtt_offset(vma);
1504 /* XXX Do we need a flush + invalidate here? */
1506 *cs++ = MI_SEMAPHORE_WAIT |
1507 MI_SEMAPHORE_GLOBAL_GTT |
1509 MI_SEMAPHORE_SAD_EQ_SDD;
1511 *cs++ = i915_ggtt_offset(vma);
1514 intel_ring_advance(lo, cs);
1516 i915_request_get(lo);
1517 i915_request_add(lo);
1519 if (wait_for(READ_ONCE(*map), 10)) {
1520 i915_request_put(lo);
1525 /* Low priority request should be busywaiting now */
1526 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1527 i915_request_put(lo);
1528 pr_err("%s: Busywaiting request did not!\n",
1534 hi = igt_request_alloc(ctx_hi, engine);
1537 i915_request_put(lo);
1541 cs = intel_ring_begin(hi, 4);
1544 i915_request_add(hi);
1545 i915_request_put(lo);
1549 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1550 *cs++ = i915_ggtt_offset(vma);
1554 intel_ring_advance(hi, cs);
1555 i915_request_add(hi);
1557 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1558 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1560 pr_err("%s: Failed to preempt semaphore busywait!\n",
1563 intel_engine_dump(engine, &p, "%s\n", engine->name);
1566 i915_request_put(lo);
1567 intel_gt_set_wedged(gt);
1571 GEM_BUG_ON(READ_ONCE(*map));
1572 i915_request_put(lo);
1574 if (igt_live_test_end(&t)) {
1582 i915_vma_unpin(vma);
1584 i915_gem_object_unpin_map(obj);
1586 i915_gem_object_put(obj);
1588 kernel_context_close(ctx_lo);
1590 kernel_context_close(ctx_hi);
1594 static struct i915_request *
1595 spinner_create_request(struct igt_spinner *spin,
1596 struct i915_gem_context *ctx,
1597 struct intel_engine_cs *engine,
1600 struct intel_context *ce;
1601 struct i915_request *rq;
1603 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1605 return ERR_CAST(ce);
1607 rq = igt_spinner_create_request(spin, ce, arb);
1608 intel_context_put(ce);
1612 static int live_preempt(void *arg)
1614 struct intel_gt *gt = arg;
1615 struct i915_gem_context *ctx_hi, *ctx_lo;
1616 struct igt_spinner spin_hi, spin_lo;
1617 struct intel_engine_cs *engine;
1618 enum intel_engine_id id;
1621 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1624 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1625 pr_err("Logical preemption supported, but not exposed\n");
1627 if (igt_spinner_init(&spin_hi, gt))
1630 if (igt_spinner_init(&spin_lo, gt))
1633 ctx_hi = kernel_context(gt->i915);
1636 ctx_hi->sched.priority =
1637 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1639 ctx_lo = kernel_context(gt->i915);
1642 ctx_lo->sched.priority =
1643 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1645 for_each_engine(engine, gt, id) {
1646 struct igt_live_test t;
1647 struct i915_request *rq;
1649 if (!intel_engine_has_preemption(engine))
1652 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1657 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1664 i915_request_add(rq);
1665 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1666 GEM_TRACE("lo spinner failed to start\n");
1668 intel_gt_set_wedged(gt);
1673 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1676 igt_spinner_end(&spin_lo);
1681 i915_request_add(rq);
1682 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1683 GEM_TRACE("hi spinner failed to start\n");
1685 intel_gt_set_wedged(gt);
1690 igt_spinner_end(&spin_hi);
1691 igt_spinner_end(&spin_lo);
1693 if (igt_live_test_end(&t)) {
1701 kernel_context_close(ctx_lo);
1703 kernel_context_close(ctx_hi);
1705 igt_spinner_fini(&spin_lo);
1707 igt_spinner_fini(&spin_hi);
1711 static int live_late_preempt(void *arg)
1713 struct intel_gt *gt = arg;
1714 struct i915_gem_context *ctx_hi, *ctx_lo;
1715 struct igt_spinner spin_hi, spin_lo;
1716 struct intel_engine_cs *engine;
1717 struct i915_sched_attr attr = {};
1718 enum intel_engine_id id;
1721 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1724 if (igt_spinner_init(&spin_hi, gt))
1727 if (igt_spinner_init(&spin_lo, gt))
1730 ctx_hi = kernel_context(gt->i915);
1734 ctx_lo = kernel_context(gt->i915);
1738 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1739 ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1741 for_each_engine(engine, gt, id) {
1742 struct igt_live_test t;
1743 struct i915_request *rq;
1745 if (!intel_engine_has_preemption(engine))
1748 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1753 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1760 i915_request_add(rq);
1761 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1762 pr_err("First context failed to start\n");
1766 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1769 igt_spinner_end(&spin_lo);
1774 i915_request_add(rq);
1775 if (igt_wait_for_spinner(&spin_hi, rq)) {
1776 pr_err("Second context overtook first?\n");
1780 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1781 engine->schedule(rq, &attr);
1783 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1784 pr_err("High priority context failed to preempt the low priority context\n");
1789 igt_spinner_end(&spin_hi);
1790 igt_spinner_end(&spin_lo);
1792 if (igt_live_test_end(&t)) {
1800 kernel_context_close(ctx_lo);
1802 kernel_context_close(ctx_hi);
1804 igt_spinner_fini(&spin_lo);
1806 igt_spinner_fini(&spin_hi);
1810 igt_spinner_end(&spin_hi);
1811 igt_spinner_end(&spin_lo);
1812 intel_gt_set_wedged(gt);
1817 struct preempt_client {
1818 struct igt_spinner spin;
1819 struct i915_gem_context *ctx;
1822 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1824 c->ctx = kernel_context(gt->i915);
1828 if (igt_spinner_init(&c->spin, gt))
1834 kernel_context_close(c->ctx);
1838 static void preempt_client_fini(struct preempt_client *c)
1840 igt_spinner_fini(&c->spin);
1841 kernel_context_close(c->ctx);
1844 static int live_nopreempt(void *arg)
1846 struct intel_gt *gt = arg;
1847 struct intel_engine_cs *engine;
1848 struct preempt_client a, b;
1849 enum intel_engine_id id;
1853 * Verify that we can disable preemption for an individual request
1854 * that may be being observed and not want to be interrupted.
1857 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1860 if (preempt_client_init(gt, &a))
1862 if (preempt_client_init(gt, &b))
1864 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1866 for_each_engine(engine, gt, id) {
1867 struct i915_request *rq_a, *rq_b;
1869 if (!intel_engine_has_preemption(engine))
1872 engine->execlists.preempt_hang.count = 0;
1874 rq_a = spinner_create_request(&a.spin,
1878 err = PTR_ERR(rq_a);
1882 /* Low priority client, but unpreemptable! */
1883 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1885 i915_request_add(rq_a);
1886 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1887 pr_err("First client failed to start\n");
1891 rq_b = spinner_create_request(&b.spin,
1895 err = PTR_ERR(rq_b);
1899 i915_request_add(rq_b);
1901 /* B is much more important than A! (But A is unpreemptable.) */
1902 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1904 /* Wait long enough for preemption and timeslicing */
1905 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1906 pr_err("Second client started too early!\n");
1910 igt_spinner_end(&a.spin);
1912 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1913 pr_err("Second client failed to start\n");
1917 igt_spinner_end(&b.spin);
1919 if (engine->execlists.preempt_hang.count) {
1920 pr_err("Preemption recorded x%d; should have been suppressed!\n",
1921 engine->execlists.preempt_hang.count);
1926 if (igt_flush_test(gt->i915))
1932 preempt_client_fini(&b);
1934 preempt_client_fini(&a);
1938 igt_spinner_end(&b.spin);
1939 igt_spinner_end(&a.spin);
1940 intel_gt_set_wedged(gt);
1945 struct live_preempt_cancel {
1946 struct intel_engine_cs *engine;
1947 struct preempt_client a, b;
1950 static int __cancel_active0(struct live_preempt_cancel *arg)
1952 struct i915_request *rq;
1953 struct igt_live_test t;
1956 /* Preempt cancel of ELSP0 */
1957 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1958 if (igt_live_test_begin(&t, arg->engine->i915,
1959 __func__, arg->engine->name))
1962 rq = spinner_create_request(&arg->a.spin,
1963 arg->a.ctx, arg->engine,
1968 clear_bit(CONTEXT_BANNED, &rq->context->flags);
1969 i915_request_get(rq);
1970 i915_request_add(rq);
1971 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1976 intel_context_set_banned(rq->context);
1977 err = intel_engine_pulse(arg->engine);
1981 err = wait_for_reset(arg->engine, rq, HZ / 2);
1983 pr_err("Cancelled inflight0 request did not reset\n");
1988 i915_request_put(rq);
1989 if (igt_live_test_end(&t))
1994 static int __cancel_active1(struct live_preempt_cancel *arg)
1996 struct i915_request *rq[2] = {};
1997 struct igt_live_test t;
2000 /* Preempt cancel of ELSP1 */
2001 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2002 if (igt_live_test_begin(&t, arg->engine->i915,
2003 __func__, arg->engine->name))
2006 rq[0] = spinner_create_request(&arg->a.spin,
2007 arg->a.ctx, arg->engine,
2008 MI_NOOP); /* no preemption */
2010 return PTR_ERR(rq[0]);
2012 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2013 i915_request_get(rq[0]);
2014 i915_request_add(rq[0]);
2015 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2020 rq[1] = spinner_create_request(&arg->b.spin,
2021 arg->b.ctx, arg->engine,
2023 if (IS_ERR(rq[1])) {
2024 err = PTR_ERR(rq[1]);
2028 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2029 i915_request_get(rq[1]);
2030 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2031 i915_request_add(rq[1]);
2035 intel_context_set_banned(rq[1]->context);
2036 err = intel_engine_pulse(arg->engine);
2040 igt_spinner_end(&arg->a.spin);
2041 err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2045 if (rq[0]->fence.error != 0) {
2046 pr_err("Normal inflight0 request did not complete\n");
2051 if (rq[1]->fence.error != -EIO) {
2052 pr_err("Cancelled inflight1 request did not report -EIO\n");
2058 i915_request_put(rq[1]);
2059 i915_request_put(rq[0]);
2060 if (igt_live_test_end(&t))
2065 static int __cancel_queued(struct live_preempt_cancel *arg)
2067 struct i915_request *rq[3] = {};
2068 struct igt_live_test t;
2071 /* Full ELSP and one in the wings */
2072 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2073 if (igt_live_test_begin(&t, arg->engine->i915,
2074 __func__, arg->engine->name))
2077 rq[0] = spinner_create_request(&arg->a.spin,
2078 arg->a.ctx, arg->engine,
2081 return PTR_ERR(rq[0]);
2083 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2084 i915_request_get(rq[0]);
2085 i915_request_add(rq[0]);
2086 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2091 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2092 if (IS_ERR(rq[1])) {
2093 err = PTR_ERR(rq[1]);
2097 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2098 i915_request_get(rq[1]);
2099 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2100 i915_request_add(rq[1]);
2104 rq[2] = spinner_create_request(&arg->b.spin,
2105 arg->a.ctx, arg->engine,
2107 if (IS_ERR(rq[2])) {
2108 err = PTR_ERR(rq[2]);
2112 i915_request_get(rq[2]);
2113 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2114 i915_request_add(rq[2]);
2118 intel_context_set_banned(rq[2]->context);
2119 err = intel_engine_pulse(arg->engine);
2123 err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2127 if (rq[0]->fence.error != -EIO) {
2128 pr_err("Cancelled inflight0 request did not report -EIO\n");
2133 if (rq[1]->fence.error != 0) {
2134 pr_err("Normal inflight1 request did not complete\n");
2139 if (rq[2]->fence.error != -EIO) {
2140 pr_err("Cancelled queued request did not report -EIO\n");
2146 i915_request_put(rq[2]);
2147 i915_request_put(rq[1]);
2148 i915_request_put(rq[0]);
2149 if (igt_live_test_end(&t))
2154 static int __cancel_hostile(struct live_preempt_cancel *arg)
2156 struct i915_request *rq;
2159 /* Preempt cancel non-preemptible spinner in ELSP0 */
2160 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2163 if (!intel_has_reset_engine(arg->engine->gt))
2166 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2167 rq = spinner_create_request(&arg->a.spin,
2168 arg->a.ctx, arg->engine,
2169 MI_NOOP); /* preemption disabled */
2173 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2174 i915_request_get(rq);
2175 i915_request_add(rq);
2176 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2181 intel_context_set_banned(rq->context);
2182 err = intel_engine_pulse(arg->engine); /* force reset */
2186 err = wait_for_reset(arg->engine, rq, HZ / 2);
2188 pr_err("Cancelled inflight0 request did not reset\n");
2193 i915_request_put(rq);
2194 if (igt_flush_test(arg->engine->i915))
2199 static int live_preempt_cancel(void *arg)
2201 struct intel_gt *gt = arg;
2202 struct live_preempt_cancel data;
2203 enum intel_engine_id id;
2207 * To cancel an inflight context, we need to first remove it from the
2208 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2211 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2214 if (preempt_client_init(gt, &data.a))
2216 if (preempt_client_init(gt, &data.b))
2219 for_each_engine(data.engine, gt, id) {
2220 if (!intel_engine_has_preemption(data.engine))
2223 err = __cancel_active0(&data);
2227 err = __cancel_active1(&data);
2231 err = __cancel_queued(&data);
2235 err = __cancel_hostile(&data);
2242 preempt_client_fini(&data.b);
2244 preempt_client_fini(&data.a);
2249 igt_spinner_end(&data.b.spin);
2250 igt_spinner_end(&data.a.spin);
2251 intel_gt_set_wedged(gt);
2255 static int live_suppress_self_preempt(void *arg)
2257 struct intel_gt *gt = arg;
2258 struct intel_engine_cs *engine;
2259 struct i915_sched_attr attr = {
2260 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2262 struct preempt_client a, b;
2263 enum intel_engine_id id;
2267 * Verify that if a preemption request does not cause a change in
2268 * the current execution order, the preempt-to-idle injection is
2269 * skipped and that we do not accidentally apply it after the CS
2273 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2276 if (intel_uc_uses_guc_submission(>->uc))
2277 return 0; /* presume black blox */
2279 if (intel_vgpu_active(gt->i915))
2280 return 0; /* GVT forces single port & request submission */
2282 if (preempt_client_init(gt, &a))
2284 if (preempt_client_init(gt, &b))
2287 for_each_engine(engine, gt, id) {
2288 struct i915_request *rq_a, *rq_b;
2291 if (!intel_engine_has_preemption(engine))
2294 if (igt_flush_test(gt->i915))
2297 intel_engine_pm_get(engine);
2298 engine->execlists.preempt_hang.count = 0;
2300 rq_a = spinner_create_request(&a.spin,
2304 err = PTR_ERR(rq_a);
2305 intel_engine_pm_put(engine);
2309 i915_request_add(rq_a);
2310 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2311 pr_err("First client failed to start\n");
2312 intel_engine_pm_put(engine);
2316 /* Keep postponing the timer to avoid premature slicing */
2317 mod_timer(&engine->execlists.timer, jiffies + HZ);
2318 for (depth = 0; depth < 8; depth++) {
2319 rq_b = spinner_create_request(&b.spin,
2323 err = PTR_ERR(rq_b);
2324 intel_engine_pm_put(engine);
2327 i915_request_add(rq_b);
2329 GEM_BUG_ON(i915_request_completed(rq_a));
2330 engine->schedule(rq_a, &attr);
2331 igt_spinner_end(&a.spin);
2333 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2334 pr_err("Second client failed to start\n");
2335 intel_engine_pm_put(engine);
2342 igt_spinner_end(&a.spin);
2344 if (engine->execlists.preempt_hang.count) {
2345 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2347 engine->execlists.preempt_hang.count,
2349 intel_engine_pm_put(engine);
2354 intel_engine_pm_put(engine);
2355 if (igt_flush_test(gt->i915))
2361 preempt_client_fini(&b);
2363 preempt_client_fini(&a);
2367 igt_spinner_end(&b.spin);
2368 igt_spinner_end(&a.spin);
2369 intel_gt_set_wedged(gt);
2374 static int __i915_sw_fence_call
2375 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
2380 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
2382 struct i915_request *rq;
2384 rq = kzalloc(sizeof(*rq), GFP_KERNEL);
2388 rq->engine = engine;
2390 spin_lock_init(&rq->lock);
2391 INIT_LIST_HEAD(&rq->fence.cb_list);
2392 rq->fence.lock = &rq->lock;
2393 rq->fence.ops = &i915_fence_ops;
2395 i915_sched_node_init(&rq->sched);
2397 /* mark this request as permanently incomplete */
2398 rq->fence.seqno = 1;
2399 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
2400 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
2401 GEM_BUG_ON(i915_request_completed(rq));
2403 i915_sw_fence_init(&rq->submit, dummy_notify);
2404 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
2406 spin_lock_init(&rq->lock);
2407 rq->fence.lock = &rq->lock;
2408 INIT_LIST_HEAD(&rq->fence.cb_list);
2413 static void dummy_request_free(struct i915_request *dummy)
2415 /* We have to fake the CS interrupt to kick the next request */
2416 i915_sw_fence_commit(&dummy->submit);
2418 i915_request_mark_complete(dummy);
2419 dma_fence_signal(&dummy->fence);
2421 i915_sched_node_fini(&dummy->sched);
2422 i915_sw_fence_fini(&dummy->submit);
2424 dma_fence_free(&dummy->fence);
2427 static int live_suppress_wait_preempt(void *arg)
2429 struct intel_gt *gt = arg;
2430 struct preempt_client client[4];
2431 struct i915_request *rq[ARRAY_SIZE(client)] = {};
2432 struct intel_engine_cs *engine;
2433 enum intel_engine_id id;
2438 * Waiters are given a little priority nudge, but not enough
2439 * to actually cause any preemption. Double check that we do
2440 * not needlessly generate preempt-to-idle cycles.
2443 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2446 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
2448 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
2450 if (preempt_client_init(gt, &client[2])) /* head of queue */
2452 if (preempt_client_init(gt, &client[3])) /* bystander */
2455 for_each_engine(engine, gt, id) {
2458 if (!intel_engine_has_preemption(engine))
2461 if (!engine->emit_init_breadcrumb)
2464 for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
2465 struct i915_request *dummy;
2467 engine->execlists.preempt_hang.count = 0;
2469 dummy = dummy_request(engine);
2473 for (i = 0; i < ARRAY_SIZE(client); i++) {
2474 struct i915_request *this;
2476 this = spinner_create_request(&client[i].spin,
2477 client[i].ctx, engine,
2480 err = PTR_ERR(this);
2484 /* Disable NEWCLIENT promotion */
2485 __i915_active_fence_set(&i915_request_timeline(this)->last_request,
2488 rq[i] = i915_request_get(this);
2489 i915_request_add(this);
2492 dummy_request_free(dummy);
2494 GEM_BUG_ON(i915_request_completed(rq[0]));
2495 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
2496 pr_err("%s: First client failed to start\n",
2500 GEM_BUG_ON(!i915_request_started(rq[0]));
2502 if (i915_request_wait(rq[depth],
2505 pr_err("%s: Waiter depth:%d completed!\n",
2506 engine->name, depth);
2510 for (i = 0; i < ARRAY_SIZE(client); i++) {
2511 igt_spinner_end(&client[i].spin);
2512 i915_request_put(rq[i]);
2516 if (igt_flush_test(gt->i915))
2519 if (engine->execlists.preempt_hang.count) {
2520 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
2522 engine->execlists.preempt_hang.count,
2532 preempt_client_fini(&client[3]);
2534 preempt_client_fini(&client[2]);
2536 preempt_client_fini(&client[1]);
2538 preempt_client_fini(&client[0]);
2542 for (i = 0; i < ARRAY_SIZE(client); i++) {
2543 igt_spinner_end(&client[i].spin);
2544 i915_request_put(rq[i]);
2546 intel_gt_set_wedged(gt);
2551 static int live_chain_preempt(void *arg)
2553 struct intel_gt *gt = arg;
2554 struct intel_engine_cs *engine;
2555 struct preempt_client hi, lo;
2556 enum intel_engine_id id;
2560 * Build a chain AB...BA between two contexts (A, B) and request
2561 * preemption of the last request. It should then complete before
2562 * the previously submitted spinner in B.
2565 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2568 if (preempt_client_init(gt, &hi))
2571 if (preempt_client_init(gt, &lo))
2574 for_each_engine(engine, gt, id) {
2575 struct i915_sched_attr attr = {
2576 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2578 struct igt_live_test t;
2579 struct i915_request *rq;
2580 int ring_size, count, i;
2582 if (!intel_engine_has_preemption(engine))
2585 rq = spinner_create_request(&lo.spin,
2591 i915_request_get(rq);
2592 i915_request_add(rq);
2594 ring_size = rq->wa_tail - rq->head;
2596 ring_size += rq->ring->size;
2597 ring_size = rq->ring->size / ring_size;
2598 pr_debug("%s(%s): Using maximum of %d requests\n",
2599 __func__, engine->name, ring_size);
2601 igt_spinner_end(&lo.spin);
2602 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2603 pr_err("Timed out waiting to flush %s\n", engine->name);
2604 i915_request_put(rq);
2607 i915_request_put(rq);
2609 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2614 for_each_prime_number_from(count, 1, ring_size) {
2615 rq = spinner_create_request(&hi.spin,
2620 i915_request_add(rq);
2621 if (!igt_wait_for_spinner(&hi.spin, rq))
2624 rq = spinner_create_request(&lo.spin,
2629 i915_request_add(rq);
2631 for (i = 0; i < count; i++) {
2632 rq = igt_request_alloc(lo.ctx, engine);
2635 i915_request_add(rq);
2638 rq = igt_request_alloc(hi.ctx, engine);
2642 i915_request_get(rq);
2643 i915_request_add(rq);
2644 engine->schedule(rq, &attr);
2646 igt_spinner_end(&hi.spin);
2647 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2648 struct drm_printer p =
2649 drm_info_printer(gt->i915->drm.dev);
2651 pr_err("Failed to preempt over chain of %d\n",
2653 intel_engine_dump(engine, &p,
2654 "%s\n", engine->name);
2655 i915_request_put(rq);
2658 igt_spinner_end(&lo.spin);
2659 i915_request_put(rq);
2661 rq = igt_request_alloc(lo.ctx, engine);
2665 i915_request_get(rq);
2666 i915_request_add(rq);
2668 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2669 struct drm_printer p =
2670 drm_info_printer(gt->i915->drm.dev);
2672 pr_err("Failed to flush low priority chain of %d requests\n",
2674 intel_engine_dump(engine, &p,
2675 "%s\n", engine->name);
2677 i915_request_put(rq);
2680 i915_request_put(rq);
2683 if (igt_live_test_end(&t)) {
2691 preempt_client_fini(&lo);
2693 preempt_client_fini(&hi);
2697 igt_spinner_end(&hi.spin);
2698 igt_spinner_end(&lo.spin);
2699 intel_gt_set_wedged(gt);
2704 static int create_gang(struct intel_engine_cs *engine,
2705 struct i915_request **prev)
2707 struct drm_i915_gem_object *obj;
2708 struct intel_context *ce;
2709 struct i915_request *rq;
2710 struct i915_vma *vma;
2714 ce = intel_context_create(engine);
2718 obj = i915_gem_object_create_internal(engine->i915, 4096);
2724 vma = i915_vma_instance(obj, ce->vm, NULL);
2730 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2734 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2738 /* Semaphore target: spin until zero */
2739 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2741 *cs++ = MI_SEMAPHORE_WAIT |
2743 MI_SEMAPHORE_SAD_EQ_SDD;
2745 *cs++ = lower_32_bits(vma->node.start);
2746 *cs++ = upper_32_bits(vma->node.start);
2749 u64 offset = (*prev)->batch->node.start;
2751 /* Terminate the spinner in the next lower priority batch. */
2752 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2753 *cs++ = lower_32_bits(offset);
2754 *cs++ = upper_32_bits(offset);
2758 *cs++ = MI_BATCH_BUFFER_END;
2759 i915_gem_object_flush_map(obj);
2760 i915_gem_object_unpin_map(obj);
2762 rq = intel_context_create_request(ce);
2766 rq->batch = i915_vma_get(vma);
2767 i915_request_get(rq);
2770 err = i915_request_await_object(rq, vma->obj, false);
2772 err = i915_vma_move_to_active(vma, rq, 0);
2774 err = rq->engine->emit_bb_start(rq,
2777 i915_vma_unlock(vma);
2778 i915_request_add(rq);
2782 i915_gem_object_put(obj);
2783 intel_context_put(ce);
2785 rq->client_link.next = &(*prev)->client_link;
2790 i915_vma_put(rq->batch);
2791 i915_request_put(rq);
2793 i915_gem_object_put(obj);
2795 intel_context_put(ce);
2799 static int live_preempt_gang(void *arg)
2801 struct intel_gt *gt = arg;
2802 struct intel_engine_cs *engine;
2803 enum intel_engine_id id;
2805 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2809 * Build as long a chain of preempters as we can, with each
2810 * request higher priority than the last. Once we are ready, we release
2811 * the last batch which then precolates down the chain, each releasing
2812 * the next oldest in turn. The intent is to simply push as hard as we
2813 * can with the number of preemptions, trying to exceed narrow HW
2814 * limits. At a minimum, we insist that we can sort all the user
2815 * high priority levels into execution order.
2818 for_each_engine(engine, gt, id) {
2819 struct i915_request *rq = NULL;
2820 struct igt_live_test t;
2821 IGT_TIMEOUT(end_time);
2826 if (!intel_engine_has_preemption(engine))
2829 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2833 struct i915_sched_attr attr = {
2834 .priority = I915_USER_PRIORITY(prio++),
2837 err = create_gang(engine, &rq);
2841 /* Submit each spinner at increasing priority */
2842 engine->schedule(rq, &attr);
2844 if (prio <= I915_PRIORITY_MAX)
2847 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2850 if (__igt_timeout(end_time, NULL))
2853 pr_debug("%s: Preempt chain of %d requests\n",
2854 engine->name, prio);
2857 * Such that the last spinner is the highest priority and
2858 * should execute first. When that spinner completes,
2859 * it will terminate the next lowest spinner until there
2860 * are no more spinners and the gang is complete.
2862 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2865 i915_gem_object_unpin_map(rq->batch->obj);
2868 intel_gt_set_wedged(gt);
2871 while (rq) { /* wait for each rq from highest to lowest prio */
2872 struct i915_request *n =
2873 list_next_entry(rq, client_link);
2875 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2876 struct drm_printer p =
2877 drm_info_printer(engine->i915->drm.dev);
2879 pr_err("Failed to flush chain of %d requests, at %d\n",
2880 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2881 intel_engine_dump(engine, &p,
2882 "%s\n", engine->name);
2887 i915_vma_put(rq->batch);
2888 i915_request_put(rq);
2892 if (igt_live_test_end(&t))
2901 static struct i915_vma *
2902 create_gpr_user(struct intel_engine_cs *engine,
2903 struct i915_vma *result,
2904 unsigned int offset)
2906 struct drm_i915_gem_object *obj;
2907 struct i915_vma *vma;
2912 obj = i915_gem_object_create_internal(engine->i915, 4096);
2914 return ERR_CAST(obj);
2916 vma = i915_vma_instance(obj, result->vm, NULL);
2918 i915_gem_object_put(obj);
2922 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2925 return ERR_PTR(err);
2928 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2931 return ERR_CAST(cs);
2934 /* All GPR are clear for new contexts. We use GPR(0) as a constant */
2935 *cs++ = MI_LOAD_REGISTER_IMM(1);
2936 *cs++ = CS_GPR(engine, 0);
2939 for (i = 1; i < NUM_GPR; i++) {
2945 * As we read and write into the context saved GPR[i], if
2946 * we restart this batch buffer from an earlier point, we
2947 * will repeat the increment and store a value > 1.
2950 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
2951 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
2952 *cs++ = MI_MATH_ADD;
2953 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
2955 addr = result->node.start + offset + i * sizeof(*cs);
2956 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
2957 *cs++ = CS_GPR(engine, 2 * i);
2958 *cs++ = lower_32_bits(addr);
2959 *cs++ = upper_32_bits(addr);
2961 *cs++ = MI_SEMAPHORE_WAIT |
2963 MI_SEMAPHORE_SAD_GTE_SDD;
2965 *cs++ = lower_32_bits(result->node.start);
2966 *cs++ = upper_32_bits(result->node.start);
2969 *cs++ = MI_BATCH_BUFFER_END;
2970 i915_gem_object_flush_map(obj);
2971 i915_gem_object_unpin_map(obj);
2976 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
2978 struct drm_i915_gem_object *obj;
2979 struct i915_vma *vma;
2982 obj = i915_gem_object_create_internal(gt->i915, sz);
2984 return ERR_CAST(obj);
2986 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
2988 i915_gem_object_put(obj);
2992 err = i915_ggtt_pin(vma, 0, 0);
2995 return ERR_PTR(err);
3001 static struct i915_request *
3002 create_gpr_client(struct intel_engine_cs *engine,
3003 struct i915_vma *global,
3004 unsigned int offset)
3006 struct i915_vma *batch, *vma;
3007 struct intel_context *ce;
3008 struct i915_request *rq;
3011 ce = intel_context_create(engine);
3013 return ERR_CAST(ce);
3015 vma = i915_vma_instance(global->obj, ce->vm, NULL);
3021 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3025 batch = create_gpr_user(engine, vma, offset);
3026 if (IS_ERR(batch)) {
3027 err = PTR_ERR(batch);
3031 rq = intel_context_create_request(ce);
3038 err = i915_request_await_object(rq, vma->obj, false);
3040 err = i915_vma_move_to_active(vma, rq, 0);
3041 i915_vma_unlock(vma);
3043 i915_vma_lock(batch);
3045 err = i915_request_await_object(rq, batch->obj, false);
3047 err = i915_vma_move_to_active(batch, rq, 0);
3049 err = rq->engine->emit_bb_start(rq,
3052 i915_vma_unlock(batch);
3053 i915_vma_unpin(batch);
3056 i915_request_get(rq);
3057 i915_request_add(rq);
3060 i915_vma_put(batch);
3062 i915_vma_unpin(vma);
3064 intel_context_put(ce);
3065 return err ? ERR_PTR(err) : rq;
3068 static int preempt_user(struct intel_engine_cs *engine,
3069 struct i915_vma *global,
3072 struct i915_sched_attr attr = {
3073 .priority = I915_PRIORITY_MAX
3075 struct i915_request *rq;
3079 rq = intel_engine_create_kernel_request(engine);
3083 cs = intel_ring_begin(rq, 4);
3085 i915_request_add(rq);
3089 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3090 *cs++ = i915_ggtt_offset(global);
3094 intel_ring_advance(rq, cs);
3096 i915_request_get(rq);
3097 i915_request_add(rq);
3099 engine->schedule(rq, &attr);
3101 if (i915_request_wait(rq, 0, HZ / 2) < 0)
3103 i915_request_put(rq);
3108 static int live_preempt_user(void *arg)
3110 struct intel_gt *gt = arg;
3111 struct intel_engine_cs *engine;
3112 struct i915_vma *global;
3113 enum intel_engine_id id;
3117 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3121 * In our other tests, we look at preemption in carefully
3122 * controlled conditions in the ringbuffer. Since most of the
3123 * time is spent in user batches, most of our preemptions naturally
3124 * occur there. We want to verify that when we preempt inside a batch
3125 * we continue on from the current instruction and do not roll back
3126 * to the start, or another earlier arbitration point.
3128 * To verify this, we create a batch which is a mixture of
3129 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3130 * a few preempting contexts thrown into the mix, we look for any
3131 * repeated instructions (which show up as incorrect values).
3134 global = create_global(gt, 4096);
3136 return PTR_ERR(global);
3138 result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
3139 if (IS_ERR(result)) {
3140 i915_vma_unpin_and_release(&global, 0);
3141 return PTR_ERR(result);
3144 for_each_engine(engine, gt, id) {
3145 struct i915_request *client[3] = {};
3146 struct igt_live_test t;
3149 if (!intel_engine_has_preemption(engine))
3152 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3153 continue; /* we need per-context GPR */
3155 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3160 memset(result, 0, 4096);
3162 for (i = 0; i < ARRAY_SIZE(client); i++) {
3163 struct i915_request *rq;
3165 rq = create_gpr_client(engine, global,
3166 NUM_GPR * i * sizeof(u32));
3173 /* Continuously preempt the set of 3 running contexts */
3174 for (i = 1; i <= NUM_GPR; i++) {
3175 err = preempt_user(engine, global, i);
3180 if (READ_ONCE(result[0]) != NUM_GPR) {
3181 pr_err("%s: Failed to release semaphore\n",
3187 for (i = 0; i < ARRAY_SIZE(client); i++) {
3190 if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3195 for (gpr = 1; gpr < NUM_GPR; gpr++) {
3196 if (result[NUM_GPR * i + gpr] != 1) {
3197 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3199 i, gpr, result[NUM_GPR * i + gpr]);
3207 for (i = 0; i < ARRAY_SIZE(client); i++) {
3211 i915_request_put(client[i]);
3214 /* Flush the semaphores on error */
3215 smp_store_mb(result[0], -1);
3216 if (igt_live_test_end(&t))
3222 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3226 static int live_preempt_timeout(void *arg)
3228 struct intel_gt *gt = arg;
3229 struct i915_gem_context *ctx_hi, *ctx_lo;
3230 struct igt_spinner spin_lo;
3231 struct intel_engine_cs *engine;
3232 enum intel_engine_id id;
3236 * Check that we force preemption to occur by cancelling the previous
3237 * context if it refuses to yield the GPU.
3239 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3242 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3245 if (!intel_has_reset_engine(gt))
3248 if (igt_spinner_init(&spin_lo, gt))
3251 ctx_hi = kernel_context(gt->i915);
3254 ctx_hi->sched.priority =
3255 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
3257 ctx_lo = kernel_context(gt->i915);
3260 ctx_lo->sched.priority =
3261 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
3263 for_each_engine(engine, gt, id) {
3264 unsigned long saved_timeout;
3265 struct i915_request *rq;
3267 if (!intel_engine_has_preemption(engine))
3270 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3271 MI_NOOP); /* preemption disabled */
3277 i915_request_add(rq);
3278 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3279 intel_gt_set_wedged(gt);
3284 rq = igt_request_alloc(ctx_hi, engine);
3286 igt_spinner_end(&spin_lo);
3291 /* Flush the previous CS ack before changing timeouts */
3292 while (READ_ONCE(engine->execlists.pending[0]))
3295 saved_timeout = engine->props.preempt_timeout_ms;
3296 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3298 i915_request_get(rq);
3299 i915_request_add(rq);
3301 intel_engine_flush_submission(engine);
3302 engine->props.preempt_timeout_ms = saved_timeout;
3304 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3305 intel_gt_set_wedged(gt);
3306 i915_request_put(rq);
3311 igt_spinner_end(&spin_lo);
3312 i915_request_put(rq);
3317 kernel_context_close(ctx_lo);
3319 kernel_context_close(ctx_hi);
3321 igt_spinner_fini(&spin_lo);
3325 static int random_range(struct rnd_state *rnd, int min, int max)
3327 return i915_prandom_u32_max_state(max - min, rnd) + min;
3330 static int random_priority(struct rnd_state *rnd)
3332 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3335 struct preempt_smoke {
3336 struct intel_gt *gt;
3337 struct i915_gem_context **contexts;
3338 struct intel_engine_cs *engine;
3339 struct drm_i915_gem_object *batch;
3340 unsigned int ncontext;
3341 struct rnd_state prng;
3342 unsigned long count;
3345 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3347 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3351 static int smoke_submit(struct preempt_smoke *smoke,
3352 struct i915_gem_context *ctx, int prio,
3353 struct drm_i915_gem_object *batch)
3355 struct i915_request *rq;
3356 struct i915_vma *vma = NULL;
3360 struct i915_address_space *vm;
3362 vm = i915_gem_context_get_vm_rcu(ctx);
3363 vma = i915_vma_instance(batch, vm, NULL);
3366 return PTR_ERR(vma);
3368 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3373 ctx->sched.priority = prio;
3375 rq = igt_request_alloc(ctx, smoke->engine);
3383 err = i915_request_await_object(rq, vma->obj, false);
3385 err = i915_vma_move_to_active(vma, rq, 0);
3387 err = rq->engine->emit_bb_start(rq,
3390 i915_vma_unlock(vma);
3393 i915_request_add(rq);
3397 i915_vma_unpin(vma);
3402 static int smoke_crescendo_thread(void *arg)
3404 struct preempt_smoke *smoke = arg;
3405 IGT_TIMEOUT(end_time);
3406 unsigned long count;
3410 struct i915_gem_context *ctx = smoke_context(smoke);
3413 err = smoke_submit(smoke,
3414 ctx, count % I915_PRIORITY_MAX,
3420 } while (!__igt_timeout(end_time, NULL));
3422 smoke->count = count;
3426 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3427 #define BATCH BIT(0)
3429 struct task_struct *tsk[I915_NUM_ENGINES] = {};
3430 struct preempt_smoke arg[I915_NUM_ENGINES];
3431 struct intel_engine_cs *engine;
3432 enum intel_engine_id id;
3433 unsigned long count;
3436 for_each_engine(engine, smoke->gt, id) {
3438 arg[id].engine = engine;
3439 if (!(flags & BATCH))
3440 arg[id].batch = NULL;
3443 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3444 "igt/smoke:%d", id);
3445 if (IS_ERR(tsk[id])) {
3446 err = PTR_ERR(tsk[id]);
3449 get_task_struct(tsk[id]);
3452 yield(); /* start all threads before we kthread_stop() */
3455 for_each_engine(engine, smoke->gt, id) {
3458 if (IS_ERR_OR_NULL(tsk[id]))
3461 status = kthread_stop(tsk[id]);
3465 count += arg[id].count;
3467 put_task_struct(tsk[id]);
3470 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3472 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3476 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3478 enum intel_engine_id id;
3479 IGT_TIMEOUT(end_time);
3480 unsigned long count;
3484 for_each_engine(smoke->engine, smoke->gt, id) {
3485 struct i915_gem_context *ctx = smoke_context(smoke);
3488 err = smoke_submit(smoke,
3489 ctx, random_priority(&smoke->prng),
3490 flags & BATCH ? smoke->batch : NULL);
3496 } while (!__igt_timeout(end_time, NULL));
3498 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3500 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3504 static int live_preempt_smoke(void *arg)
3506 struct preempt_smoke smoke = {
3508 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3511 const unsigned int phase[] = { 0, BATCH };
3512 struct igt_live_test t;
3517 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3520 smoke.contexts = kmalloc_array(smoke.ncontext,
3521 sizeof(*smoke.contexts),
3523 if (!smoke.contexts)
3527 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3528 if (IS_ERR(smoke.batch)) {
3529 err = PTR_ERR(smoke.batch);
3533 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3538 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3539 cs[n] = MI_ARB_CHECK;
3540 cs[n] = MI_BATCH_BUFFER_END;
3541 i915_gem_object_flush_map(smoke.batch);
3542 i915_gem_object_unpin_map(smoke.batch);
3544 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3549 for (n = 0; n < smoke.ncontext; n++) {
3550 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3551 if (!smoke.contexts[n])
3555 for (n = 0; n < ARRAY_SIZE(phase); n++) {
3556 err = smoke_crescendo(&smoke, phase[n]);
3560 err = smoke_random(&smoke, phase[n]);
3566 if (igt_live_test_end(&t))
3569 for (n = 0; n < smoke.ncontext; n++) {
3570 if (!smoke.contexts[n])
3572 kernel_context_close(smoke.contexts[n]);
3576 i915_gem_object_put(smoke.batch);
3578 kfree(smoke.contexts);
3583 static int nop_virtual_engine(struct intel_gt *gt,
3584 struct intel_engine_cs **siblings,
3585 unsigned int nsibling,
3588 #define CHAIN BIT(0)
3590 IGT_TIMEOUT(end_time);
3591 struct i915_request *request[16] = {};
3592 struct intel_context *ve[16];
3593 unsigned long n, prime, nc;
3594 struct igt_live_test t;
3595 ktime_t times[2] = {};
3598 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3600 for (n = 0; n < nctx; n++) {
3601 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3602 if (IS_ERR(ve[n])) {
3603 err = PTR_ERR(ve[n]);
3608 err = intel_context_pin(ve[n]);
3610 intel_context_put(ve[n]);
3616 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3620 for_each_prime_number_from(prime, 1, 8192) {
3621 times[1] = ktime_get_raw();
3623 if (flags & CHAIN) {
3624 for (nc = 0; nc < nctx; nc++) {
3625 for (n = 0; n < prime; n++) {
3626 struct i915_request *rq;
3628 rq = i915_request_create(ve[nc]);
3635 i915_request_put(request[nc]);
3636 request[nc] = i915_request_get(rq);
3637 i915_request_add(rq);
3641 for (n = 0; n < prime; n++) {
3642 for (nc = 0; nc < nctx; nc++) {
3643 struct i915_request *rq;
3645 rq = i915_request_create(ve[nc]);
3652 i915_request_put(request[nc]);
3653 request[nc] = i915_request_get(rq);
3654 i915_request_add(rq);
3659 for (nc = 0; nc < nctx; nc++) {
3660 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3661 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3662 __func__, ve[0]->engine->name,
3663 request[nc]->fence.context,
3664 request[nc]->fence.seqno);
3666 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3667 __func__, ve[0]->engine->name,
3668 request[nc]->fence.context,
3669 request[nc]->fence.seqno);
3671 intel_gt_set_wedged(gt);
3676 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3678 times[0] = times[1];
3680 for (nc = 0; nc < nctx; nc++) {
3681 i915_request_put(request[nc]);
3685 if (__igt_timeout(end_time, NULL))
3689 err = igt_live_test_end(&t);
3693 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3694 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3695 prime, div64_u64(ktime_to_ns(times[1]), prime));
3698 if (igt_flush_test(gt->i915))
3701 for (nc = 0; nc < nctx; nc++) {
3702 i915_request_put(request[nc]);
3703 intel_context_unpin(ve[nc]);
3704 intel_context_put(ve[nc]);
3709 static int live_virtual_engine(void *arg)
3711 struct intel_gt *gt = arg;
3712 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3713 struct intel_engine_cs *engine;
3714 enum intel_engine_id id;
3715 unsigned int class, inst;
3718 if (intel_uc_uses_guc_submission(>->uc))
3721 for_each_engine(engine, gt, id) {
3722 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3724 pr_err("Failed to wrap engine %s: err=%d\n",
3730 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3734 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3735 if (!gt->engine_class[class][inst])
3738 siblings[nsibling++] = gt->engine_class[class][inst];
3743 for (n = 1; n <= nsibling + 1; n++) {
3744 err = nop_virtual_engine(gt, siblings, nsibling,
3750 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3758 static int mask_virtual_engine(struct intel_gt *gt,
3759 struct intel_engine_cs **siblings,
3760 unsigned int nsibling)
3762 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3763 struct intel_context *ve;
3764 struct igt_live_test t;
3769 * Check that by setting the execution mask on a request, we can
3770 * restrict it to our desired engine within the virtual engine.
3773 ve = intel_execlists_create_virtual(siblings, nsibling);
3779 err = intel_context_pin(ve);
3783 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3787 for (n = 0; n < nsibling; n++) {
3788 request[n] = i915_request_create(ve);
3789 if (IS_ERR(request[n])) {
3790 err = PTR_ERR(request[n]);
3795 /* Reverse order as it's more likely to be unnatural */
3796 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3798 i915_request_get(request[n]);
3799 i915_request_add(request[n]);
3802 for (n = 0; n < nsibling; n++) {
3803 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3804 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3805 __func__, ve->engine->name,
3806 request[n]->fence.context,
3807 request[n]->fence.seqno);
3809 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3810 __func__, ve->engine->name,
3811 request[n]->fence.context,
3812 request[n]->fence.seqno);
3814 intel_gt_set_wedged(gt);
3819 if (request[n]->engine != siblings[nsibling - n - 1]) {
3820 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3821 request[n]->engine->name,
3822 siblings[nsibling - n - 1]->name);
3828 err = igt_live_test_end(&t);
3830 if (igt_flush_test(gt->i915))
3833 for (n = 0; n < nsibling; n++)
3834 i915_request_put(request[n]);
3837 intel_context_unpin(ve);
3839 intel_context_put(ve);
3844 static int live_virtual_mask(void *arg)
3846 struct intel_gt *gt = arg;
3847 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3848 unsigned int class, inst;
3851 if (intel_uc_uses_guc_submission(>->uc))
3854 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3855 unsigned int nsibling;
3858 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3859 if (!gt->engine_class[class][inst])
3862 siblings[nsibling++] = gt->engine_class[class][inst];
3867 err = mask_virtual_engine(gt, siblings, nsibling);
3875 static int preserved_virtual_engine(struct intel_gt *gt,
3876 struct intel_engine_cs **siblings,
3877 unsigned int nsibling)
3879 struct i915_request *last = NULL;
3880 struct intel_context *ve;
3881 struct i915_vma *scratch;
3882 struct igt_live_test t;
3887 scratch = create_scratch(siblings[0]->gt);
3888 if (IS_ERR(scratch))
3889 return PTR_ERR(scratch);
3891 err = i915_vma_sync(scratch);
3895 ve = intel_execlists_create_virtual(siblings, nsibling);
3901 err = intel_context_pin(ve);
3905 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3909 for (n = 0; n < NUM_GPR_DW; n++) {
3910 struct intel_engine_cs *engine = siblings[n % nsibling];
3911 struct i915_request *rq;
3913 rq = i915_request_create(ve);
3919 i915_request_put(last);
3920 last = i915_request_get(rq);
3922 cs = intel_ring_begin(rq, 8);
3924 i915_request_add(rq);
3929 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3930 *cs++ = CS_GPR(engine, n);
3931 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3934 *cs++ = MI_LOAD_REGISTER_IMM(1);
3935 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3939 intel_ring_advance(rq, cs);
3941 /* Restrict this request to run on a particular engine */
3942 rq->execution_mask = engine->mask;
3943 i915_request_add(rq);
3946 if (i915_request_wait(last, 0, HZ / 5) < 0) {
3951 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3957 for (n = 0; n < NUM_GPR_DW; n++) {
3959 pr_err("Incorrect value[%d] found for GPR[%d]\n",
3966 i915_gem_object_unpin_map(scratch->obj);
3969 if (igt_live_test_end(&t))
3971 i915_request_put(last);
3973 intel_context_unpin(ve);
3975 intel_context_put(ve);
3977 i915_vma_unpin_and_release(&scratch, 0);
3981 static int live_virtual_preserved(void *arg)
3983 struct intel_gt *gt = arg;
3984 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3985 unsigned int class, inst;
3988 * Check that the context image retains non-privileged (user) registers
3989 * from one engine to the next. For this we check that the CS_GPR
3993 if (intel_uc_uses_guc_submission(>->uc))
3996 /* As we use CS_GPR we cannot run before they existed on all engines. */
3997 if (INTEL_GEN(gt->i915) < 9)
4000 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4004 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4005 if (!gt->engine_class[class][inst])
4008 siblings[nsibling++] = gt->engine_class[class][inst];
4013 err = preserved_virtual_engine(gt, siblings, nsibling);
4021 static int bond_virtual_engine(struct intel_gt *gt,
4023 struct intel_engine_cs **siblings,
4024 unsigned int nsibling,
4026 #define BOND_SCHEDULE BIT(0)
4028 struct intel_engine_cs *master;
4029 struct i915_request *rq[16];
4030 enum intel_engine_id id;
4031 struct igt_spinner spin;
4036 * A set of bonded requests is intended to be run concurrently
4037 * across a number of engines. We use one request per-engine
4038 * and a magic fence to schedule each of the bonded requests
4039 * at the same time. A consequence of our current scheduler is that
4040 * we only move requests to the HW ready queue when the request
4041 * becomes ready, that is when all of its prerequisite fences have
4042 * been signaled. As one of those fences is the master submit fence,
4043 * there is a delay on all secondary fences as the HW may be
4044 * currently busy. Equally, as all the requests are independent,
4045 * they may have other fences that delay individual request
4046 * submission to HW. Ergo, we do not guarantee that all requests are
4047 * immediately submitted to HW at the same time, just that if the
4048 * rules are abided by, they are ready at the same time as the
4049 * first is submitted. Userspace can embed semaphores in its batch
4050 * to ensure parallel execution of its phases as it requires.
4051 * Though naturally it gets requested that perhaps the scheduler should
4052 * take care of parallel execution, even across preemption events on
4053 * different HW. (The proper answer is of course "lalalala".)
4055 * With the submit-fence, we have identified three possible phases
4056 * of synchronisation depending on the master fence: queued (not
4057 * ready), executing, and signaled. The first two are quite simple
4058 * and checked below. However, the signaled master fence handling is
4059 * contentious. Currently we do not distinguish between a signaled
4060 * fence and an expired fence, as once signaled it does not convey
4061 * any information about the previous execution. It may even be freed
4062 * and hence checking later it may not exist at all. Ergo we currently
4063 * do not apply the bonding constraint for an already signaled fence,
4064 * as our expectation is that it should not constrain the secondaries
4065 * and is outside of the scope of the bonded request API (i.e. all
4066 * userspace requests are meant to be running in parallel). As
4067 * it imposes no constraint, and is effectively a no-op, we do not
4068 * check below as normal execution flows are checked extensively above.
4070 * XXX Is the degenerate handling of signaled submit fences the
4071 * expected behaviour for userpace?
4074 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4076 if (igt_spinner_init(&spin, gt))
4080 rq[0] = ERR_PTR(-ENOMEM);
4081 for_each_engine(master, gt, id) {
4082 struct i915_sw_fence fence = {};
4083 struct intel_context *ce;
4085 if (master->class == class)
4088 ce = intel_context_create(master);
4094 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4096 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4097 intel_context_put(ce);
4098 if (IS_ERR(rq[0])) {
4099 err = PTR_ERR(rq[0]);
4102 i915_request_get(rq[0]);
4104 if (flags & BOND_SCHEDULE) {
4105 onstack_fence_init(&fence);
4106 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4111 i915_request_add(rq[0]);
4115 if (!(flags & BOND_SCHEDULE) &&
4116 !igt_wait_for_spinner(&spin, rq[0])) {
4121 for (n = 0; n < nsibling; n++) {
4122 struct intel_context *ve;
4124 ve = intel_execlists_create_virtual(siblings, nsibling);
4127 onstack_fence_fini(&fence);
4131 err = intel_virtual_engine_attach_bond(ve->engine,
4135 intel_context_put(ve);
4136 onstack_fence_fini(&fence);
4140 err = intel_context_pin(ve);
4141 intel_context_put(ve);
4143 onstack_fence_fini(&fence);
4147 rq[n + 1] = i915_request_create(ve);
4148 intel_context_unpin(ve);
4149 if (IS_ERR(rq[n + 1])) {
4150 err = PTR_ERR(rq[n + 1]);
4151 onstack_fence_fini(&fence);
4154 i915_request_get(rq[n + 1]);
4156 err = i915_request_await_execution(rq[n + 1],
4158 ve->engine->bond_execute);
4159 i915_request_add(rq[n + 1]);
4161 onstack_fence_fini(&fence);
4165 onstack_fence_fini(&fence);
4166 intel_engine_flush_submission(master);
4167 igt_spinner_end(&spin);
4169 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4170 pr_err("Master request did not execute (on %s)!\n",
4171 rq[0]->engine->name);
4176 for (n = 0; n < nsibling; n++) {
4177 if (i915_request_wait(rq[n + 1], 0,
4178 MAX_SCHEDULE_TIMEOUT) < 0) {
4183 if (rq[n + 1]->engine != siblings[n]) {
4184 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4186 rq[n + 1]->engine->name,
4187 rq[0]->engine->name);
4193 for (n = 0; !IS_ERR(rq[n]); n++)
4194 i915_request_put(rq[n]);
4195 rq[0] = ERR_PTR(-ENOMEM);
4199 for (n = 0; !IS_ERR(rq[n]); n++)
4200 i915_request_put(rq[n]);
4201 if (igt_flush_test(gt->i915))
4204 igt_spinner_fini(&spin);
4208 static int live_virtual_bond(void *arg)
4210 static const struct phase {
4215 { "schedule", BOND_SCHEDULE },
4218 struct intel_gt *gt = arg;
4219 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4220 unsigned int class, inst;
4223 if (intel_uc_uses_guc_submission(>->uc))
4226 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4227 const struct phase *p;
4231 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4232 if (!gt->engine_class[class][inst])
4235 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
4236 siblings[nsibling++] = gt->engine_class[class][inst];
4241 for (p = phases; p->name; p++) {
4242 err = bond_virtual_engine(gt,
4243 class, siblings, nsibling,
4246 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4247 __func__, p->name, class, nsibling, err);
4256 static int reset_virtual_engine(struct intel_gt *gt,
4257 struct intel_engine_cs **siblings,
4258 unsigned int nsibling)
4260 struct intel_engine_cs *engine;
4261 struct intel_context *ve;
4262 struct igt_spinner spin;
4263 struct i915_request *rq;
4268 * In order to support offline error capture for fast preempt reset,
4269 * we need to decouple the guilty request and ensure that it and its
4270 * descendents are not executed while the capture is in progress.
4273 if (igt_spinner_init(&spin, gt))
4276 ve = intel_execlists_create_virtual(siblings, nsibling);
4282 for (n = 0; n < nsibling; n++)
4283 engine_heartbeat_disable(siblings[n]);
4285 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4290 i915_request_add(rq);
4292 if (!igt_wait_for_spinner(&spin, rq)) {
4293 intel_gt_set_wedged(gt);
4298 engine = rq->engine;
4299 GEM_BUG_ON(engine == ve->engine);
4301 /* Take ownership of the reset and tasklet */
4302 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4303 >->reset.flags)) {
4304 intel_gt_set_wedged(gt);
4308 tasklet_disable(&engine->execlists.tasklet);
4310 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
4311 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4313 /* Fake a preemption event; failed of course */
4314 spin_lock_irq(&engine->active.lock);
4315 __unwind_incomplete_requests(engine);
4316 spin_unlock_irq(&engine->active.lock);
4317 GEM_BUG_ON(rq->engine != ve->engine);
4319 /* Reset the engine while keeping our active request on hold */
4320 execlists_hold(engine, rq);
4321 GEM_BUG_ON(!i915_request_on_hold(rq));
4323 intel_engine_reset(engine, NULL);
4324 GEM_BUG_ON(rq->fence.error != -EIO);
4326 /* Release our grasp on the engine, letting CS flow again */
4327 tasklet_enable(&engine->execlists.tasklet);
4328 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags);
4330 /* Check that we do not resubmit the held request */
4331 i915_request_get(rq);
4332 if (!i915_request_wait(rq, 0, HZ / 5)) {
4333 pr_err("%s: on hold request completed!\n",
4335 intel_gt_set_wedged(gt);
4339 GEM_BUG_ON(!i915_request_on_hold(rq));
4341 /* But is resubmitted on release */
4342 execlists_unhold(engine, rq);
4343 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4344 pr_err("%s: held request did not complete!\n",
4346 intel_gt_set_wedged(gt);
4351 i915_request_put(rq);
4353 for (n = 0; n < nsibling; n++)
4354 engine_heartbeat_enable(siblings[n]);
4356 intel_context_put(ve);
4358 igt_spinner_fini(&spin);
4362 static int live_virtual_reset(void *arg)
4364 struct intel_gt *gt = arg;
4365 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4366 unsigned int class, inst;
4369 * Check that we handle a reset event within a virtual engine.
4370 * Only the physical engine is reset, but we have to check the flow
4371 * of the virtual requests around the reset, and make sure it is not
4375 if (intel_uc_uses_guc_submission(>->uc))
4378 if (!intel_has_reset_engine(gt))
4381 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4385 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4386 if (!gt->engine_class[class][inst])
4389 siblings[nsibling++] = gt->engine_class[class][inst];
4394 err = reset_virtual_engine(gt, siblings, nsibling);
4402 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4404 static const struct i915_subtest tests[] = {
4405 SUBTEST(live_sanitycheck),
4406 SUBTEST(live_unlite_switch),
4407 SUBTEST(live_unlite_preempt),
4408 SUBTEST(live_pin_rewind),
4409 SUBTEST(live_hold_reset),
4410 SUBTEST(live_error_interrupt),
4411 SUBTEST(live_timeslice_preempt),
4412 SUBTEST(live_timeslice_rewind),
4413 SUBTEST(live_timeslice_queue),
4414 SUBTEST(live_timeslice_nopreempt),
4415 SUBTEST(live_busywait_preempt),
4416 SUBTEST(live_preempt),
4417 SUBTEST(live_late_preempt),
4418 SUBTEST(live_nopreempt),
4419 SUBTEST(live_preempt_cancel),
4420 SUBTEST(live_suppress_self_preempt),
4421 SUBTEST(live_suppress_wait_preempt),
4422 SUBTEST(live_chain_preempt),
4423 SUBTEST(live_preempt_gang),
4424 SUBTEST(live_preempt_timeout),
4425 SUBTEST(live_preempt_user),
4426 SUBTEST(live_preempt_smoke),
4427 SUBTEST(live_virtual_engine),
4428 SUBTEST(live_virtual_mask),
4429 SUBTEST(live_virtual_preserved),
4430 SUBTEST(live_virtual_bond),
4431 SUBTEST(live_virtual_reset),
4434 if (!HAS_EXECLISTS(i915))
4437 if (intel_gt_is_wedged(&i915->gt))
4440 return intel_gt_live_subtests(tests, &i915->gt);
4443 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
4446 i915_ggtt_offset(ce->engine->status_page.vma) +
4447 offset_in_page(slot);
4448 struct i915_request *rq;
4451 rq = intel_context_create_request(ce);
4455 cs = intel_ring_begin(rq, 4);
4457 i915_request_add(rq);
4461 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4466 intel_ring_advance(rq, cs);
4468 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4469 i915_request_add(rq);
4473 static int context_flush(struct intel_context *ce, long timeout)
4475 struct i915_request *rq;
4476 struct dma_fence *fence;
4479 rq = intel_engine_create_kernel_request(ce->engine);
4483 fence = i915_active_fence_get(&ce->timeline->last_request);
4485 i915_request_await_dma_fence(rq, fence);
4486 dma_fence_put(fence);
4489 rq = i915_request_get(rq);
4490 i915_request_add(rq);
4491 if (i915_request_wait(rq, 0, timeout) < 0)
4493 i915_request_put(rq);
4495 rmb(); /* We know the request is written, make sure all state is too! */
4499 static int live_lrc_layout(void *arg)
4501 struct intel_gt *gt = arg;
4502 struct intel_engine_cs *engine;
4503 enum intel_engine_id id;
4508 * Check the registers offsets we use to create the initial reg state
4509 * match the layout saved by HW.
4512 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4517 for_each_engine(engine, gt, id) {
4521 if (!engine->default_state)
4524 hw = shmem_pin_map(engine->default_state);
4529 hw += LRC_STATE_OFFSET / sizeof(*hw);
4531 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4532 engine->kernel_context,
4534 engine->kernel_context->ring,
4547 pr_debug("%s: skipped instruction %x at dword %d\n",
4548 engine->name, lri, dw);
4553 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4554 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4555 engine->name, dw, lri);
4560 if (lrc[dw] != lri) {
4561 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4562 engine->name, dw, lri, lrc[dw]);
4572 if (hw[dw] != lrc[dw]) {
4573 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4574 engine->name, dw, hw[dw], lrc[dw]);
4580 * Skip over the actual register value as we
4581 * expect that to differ.
4586 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4589 pr_info("%s: HW register image:\n", engine->name);
4590 igt_hexdump(hw, PAGE_SIZE);
4592 pr_info("%s: SW register image:\n", engine->name);
4593 igt_hexdump(lrc, PAGE_SIZE);
4596 shmem_unpin_map(engine->default_state, hw);
4605 static int find_offset(const u32 *lri, u32 offset)
4609 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4610 if (lri[i] == offset)
4616 static int live_lrc_fixed(void *arg)
4618 struct intel_gt *gt = arg;
4619 struct intel_engine_cs *engine;
4620 enum intel_engine_id id;
4624 * Check the assumed register offsets match the actual locations in
4625 * the context image.
4628 for_each_engine(engine, gt, id) {
4635 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4640 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4645 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4650 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4655 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4656 lrc_ring_mi_mode(engine),
4660 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4665 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
4666 lrc_ring_wa_bb_per_ctx(engine),
4667 "RING_BB_PER_CTX_PTR"
4670 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
4671 lrc_ring_indirect_ptr(engine),
4672 "RING_INDIRECT_CTX_PTR"
4675 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
4676 lrc_ring_indirect_offset(engine),
4677 "RING_INDIRECT_CTX_OFFSET"
4680 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4682 "RING_CTX_TIMESTAMP"
4685 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
4686 lrc_ring_gpr0(engine),
4690 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
4691 lrc_ring_cmd_buf_cctl(engine),
4698 if (!engine->default_state)
4701 hw = shmem_pin_map(engine->default_state);
4706 hw += LRC_STATE_OFFSET / sizeof(*hw);
4708 for (t = tbl; t->name; t++) {
4709 int dw = find_offset(hw, t->reg);
4711 if (dw != t->offset) {
4712 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4722 shmem_unpin_map(engine->default_state, hw);
4728 static int __live_lrc_state(struct intel_engine_cs *engine,
4729 struct i915_vma *scratch)
4731 struct intel_context *ce;
4732 struct i915_request *rq;
4738 u32 expected[MAX_IDX];
4743 ce = intel_context_create(engine);
4747 err = intel_context_pin(ce);
4751 rq = i915_request_create(ce);
4757 cs = intel_ring_begin(rq, 4 * MAX_IDX);
4760 i915_request_add(rq);
4764 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4765 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
4766 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
4769 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
4771 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4772 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
4773 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
4776 i915_vma_lock(scratch);
4777 err = i915_request_await_object(rq, scratch->obj, true);
4779 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4780 i915_vma_unlock(scratch);
4782 i915_request_get(rq);
4783 i915_request_add(rq);
4787 intel_engine_flush_submission(engine);
4788 expected[RING_TAIL_IDX] = ce->ring->tail;
4790 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4795 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4801 for (n = 0; n < MAX_IDX; n++) {
4802 if (cs[n] != expected[n]) {
4803 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
4804 engine->name, n, cs[n], expected[n]);
4810 i915_gem_object_unpin_map(scratch->obj);
4813 i915_request_put(rq);
4815 intel_context_unpin(ce);
4817 intel_context_put(ce);
4821 static int live_lrc_state(void *arg)
4823 struct intel_gt *gt = arg;
4824 struct intel_engine_cs *engine;
4825 struct i915_vma *scratch;
4826 enum intel_engine_id id;
4830 * Check the live register state matches what we expect for this
4834 scratch = create_scratch(gt);
4835 if (IS_ERR(scratch))
4836 return PTR_ERR(scratch);
4838 for_each_engine(engine, gt, id) {
4839 err = __live_lrc_state(engine, scratch);
4844 if (igt_flush_test(gt->i915))
4847 i915_vma_unpin_and_release(&scratch, 0);
4851 static int gpr_make_dirty(struct intel_context *ce)
4853 struct i915_request *rq;
4857 rq = intel_context_create_request(ce);
4861 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
4863 i915_request_add(rq);
4867 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
4868 for (n = 0; n < NUM_GPR_DW; n++) {
4869 *cs++ = CS_GPR(ce->engine, n);
4870 *cs++ = STACK_MAGIC;
4874 intel_ring_advance(rq, cs);
4876 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4877 i915_request_add(rq);
4882 static struct i915_request *
4883 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
4886 i915_ggtt_offset(ce->engine->status_page.vma) +
4887 offset_in_page(slot);
4888 struct i915_request *rq;
4893 rq = intel_context_create_request(ce);
4897 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
4899 i915_request_add(rq);
4900 return ERR_CAST(cs);
4903 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4906 *cs++ = MI_SEMAPHORE_WAIT |
4907 MI_SEMAPHORE_GLOBAL_GTT |
4909 MI_SEMAPHORE_SAD_NEQ_SDD;
4914 for (n = 0; n < NUM_GPR_DW; n++) {
4915 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4916 *cs++ = CS_GPR(ce->engine, n);
4917 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4921 i915_vma_lock(scratch);
4922 err = i915_request_await_object(rq, scratch->obj, true);
4924 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4925 i915_vma_unlock(scratch);
4927 i915_request_get(rq);
4928 i915_request_add(rq);
4930 i915_request_put(rq);
4937 static int __live_lrc_gpr(struct intel_engine_cs *engine,
4938 struct i915_vma *scratch,
4941 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
4942 struct intel_context *ce;
4943 struct i915_request *rq;
4948 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
4949 return 0; /* GPR only on rcs0 for gen8 */
4951 err = gpr_make_dirty(engine->kernel_context);
4955 ce = intel_context_create(engine);
4959 rq = __gpr_read(ce, scratch, slot);
4965 err = wait_for_submit(engine, rq, HZ / 2);
4970 err = gpr_make_dirty(engine->kernel_context);
4974 err = emit_semaphore_signal(engine->kernel_context, slot);
4982 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4987 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4993 for (n = 0; n < NUM_GPR_DW; n++) {
4995 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4997 n / 2, n & 1 ? "udw" : "ldw",
5004 i915_gem_object_unpin_map(scratch->obj);
5007 memset32(&slot[0], -1, 4);
5009 i915_request_put(rq);
5011 intel_context_put(ce);
5015 static int live_lrc_gpr(void *arg)
5017 struct intel_gt *gt = arg;
5018 struct intel_engine_cs *engine;
5019 struct i915_vma *scratch;
5020 enum intel_engine_id id;
5024 * Check that GPR registers are cleared in new contexts as we need
5025 * to avoid leaking any information from previous contexts.
5028 scratch = create_scratch(gt);
5029 if (IS_ERR(scratch))
5030 return PTR_ERR(scratch);
5032 for_each_engine(engine, gt, id) {
5033 engine_heartbeat_disable(engine);
5035 err = __live_lrc_gpr(engine, scratch, false);
5039 err = __live_lrc_gpr(engine, scratch, true);
5044 engine_heartbeat_enable(engine);
5045 if (igt_flush_test(gt->i915))
5051 i915_vma_unpin_and_release(&scratch, 0);
5055 static struct i915_request *
5056 create_timestamp(struct intel_context *ce, void *slot, int idx)
5059 i915_ggtt_offset(ce->engine->status_page.vma) +
5060 offset_in_page(slot);
5061 struct i915_request *rq;
5065 rq = intel_context_create_request(ce);
5069 cs = intel_ring_begin(rq, 10);
5075 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5078 *cs++ = MI_SEMAPHORE_WAIT |
5079 MI_SEMAPHORE_GLOBAL_GTT |
5081 MI_SEMAPHORE_SAD_NEQ_SDD;
5086 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5087 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
5088 *cs++ = offset + idx * sizeof(u32);
5091 intel_ring_advance(rq, cs);
5093 rq->sched.attr.priority = I915_PRIORITY_MASK;
5096 i915_request_get(rq);
5097 i915_request_add(rq);
5099 i915_request_put(rq);
5100 return ERR_PTR(err);
5106 struct lrc_timestamp {
5107 struct intel_engine_cs *engine;
5108 struct intel_context *ce[2];
5112 static bool timestamp_advanced(u32 start, u32 end)
5114 return (s32)(end - start) > 0;
5117 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
5119 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
5120 struct i915_request *rq;
5124 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
5125 rq = create_timestamp(arg->ce[0], slot, 1);
5129 err = wait_for_submit(rq->engine, rq, HZ / 2);
5134 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
5135 err = emit_semaphore_signal(arg->ce[1], slot);
5143 /* And wait for switch to kernel (to save our context to memory) */
5144 err = context_flush(arg->ce[0], HZ / 2);
5148 if (!timestamp_advanced(arg->poison, slot[1])) {
5149 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
5150 arg->engine->name, preempt ? "preempt" : "simple",
5151 arg->poison, slot[1]);
5155 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
5156 if (!timestamp_advanced(slot[1], timestamp)) {
5157 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
5158 arg->engine->name, preempt ? "preempt" : "simple",
5159 slot[1], timestamp);
5164 memset32(slot, -1, 4);
5165 i915_request_put(rq);
5169 static int live_lrc_timestamp(void *arg)
5171 struct lrc_timestamp data = {};
5172 struct intel_gt *gt = arg;
5173 enum intel_engine_id id;
5174 const u32 poison[] = {
5182 * We want to verify that the timestamp is saved and restore across
5183 * context switches and is monotonic.
5185 * So we do this with a little bit of LRC poisoning to check various
5186 * boundary conditions, and see what happens if we preempt the context
5187 * with a second request (carrying more poison into the timestamp).
5190 for_each_engine(data.engine, gt, id) {
5193 engine_heartbeat_disable(data.engine);
5195 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5196 struct intel_context *tmp;
5198 tmp = intel_context_create(data.engine);
5204 err = intel_context_pin(tmp);
5206 intel_context_put(tmp);
5213 for (i = 0; i < ARRAY_SIZE(poison); i++) {
5214 data.poison = poison[i];
5216 err = __lrc_timestamp(&data, false);
5220 err = __lrc_timestamp(&data, true);
5226 engine_heartbeat_enable(data.engine);
5227 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5231 intel_context_unpin(data.ce[i]);
5232 intel_context_put(data.ce[i]);
5235 if (igt_flush_test(gt->i915))
5244 static struct i915_vma *
5245 create_user_vma(struct i915_address_space *vm, unsigned long size)
5247 struct drm_i915_gem_object *obj;
5248 struct i915_vma *vma;
5251 obj = i915_gem_object_create_internal(vm->i915, size);
5253 return ERR_CAST(obj);
5255 vma = i915_vma_instance(obj, vm, NULL);
5257 i915_gem_object_put(obj);
5261 err = i915_vma_pin(vma, 0, 0, PIN_USER);
5263 i915_gem_object_put(obj);
5264 return ERR_PTR(err);
5270 static struct i915_vma *
5271 store_context(struct intel_context *ce, struct i915_vma *scratch)
5273 struct i915_vma *batch;
5274 u32 dw, x, *cs, *hw;
5277 batch = create_user_vma(ce->vm, SZ_64K);
5281 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5283 i915_vma_put(batch);
5284 return ERR_CAST(cs);
5287 defaults = shmem_pin_map(ce->engine->default_state);
5289 i915_gem_object_unpin_map(batch->obj);
5290 i915_vma_put(batch);
5291 return ERR_PTR(-ENOMEM);
5297 hw += LRC_STATE_OFFSET / sizeof(*hw);
5299 u32 len = hw[dw] & 0x7f;
5306 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5312 len = (len + 1) / 2;
5314 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
5316 *cs++ = lower_32_bits(scratch->node.start + x);
5317 *cs++ = upper_32_bits(scratch->node.start + x);
5322 } while (dw < PAGE_SIZE / sizeof(u32) &&
5323 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5325 *cs++ = MI_BATCH_BUFFER_END;
5327 shmem_unpin_map(ce->engine->default_state, defaults);
5329 i915_gem_object_flush_map(batch->obj);
5330 i915_gem_object_unpin_map(batch->obj);
5335 static int move_to_active(struct i915_request *rq,
5336 struct i915_vma *vma,
5342 err = i915_request_await_object(rq, vma->obj, flags);
5344 err = i915_vma_move_to_active(vma, rq, flags);
5345 i915_vma_unlock(vma);
5350 static struct i915_request *
5351 record_registers(struct intel_context *ce,
5352 struct i915_vma *before,
5353 struct i915_vma *after,
5356 struct i915_vma *b_before, *b_after;
5357 struct i915_request *rq;
5361 b_before = store_context(ce, before);
5362 if (IS_ERR(b_before))
5363 return ERR_CAST(b_before);
5365 b_after = store_context(ce, after);
5366 if (IS_ERR(b_after)) {
5367 rq = ERR_CAST(b_after);
5371 rq = intel_context_create_request(ce);
5375 err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
5379 err = move_to_active(rq, b_before, 0);
5383 err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
5387 err = move_to_active(rq, b_after, 0);
5391 cs = intel_ring_begin(rq, 14);
5397 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5398 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5399 *cs++ = lower_32_bits(b_before->node.start);
5400 *cs++ = upper_32_bits(b_before->node.start);
5402 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5403 *cs++ = MI_SEMAPHORE_WAIT |
5404 MI_SEMAPHORE_GLOBAL_GTT |
5406 MI_SEMAPHORE_SAD_NEQ_SDD;
5408 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5409 offset_in_page(sema);
5413 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5414 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5415 *cs++ = lower_32_bits(b_after->node.start);
5416 *cs++ = upper_32_bits(b_after->node.start);
5418 intel_ring_advance(rq, cs);
5420 WRITE_ONCE(*sema, 0);
5421 i915_request_get(rq);
5422 i915_request_add(rq);
5424 i915_vma_put(b_after);
5426 i915_vma_put(b_before);
5430 i915_request_add(rq);
5435 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
5437 struct i915_vma *batch;
5441 batch = create_user_vma(ce->vm, SZ_64K);
5445 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5447 i915_vma_put(batch);
5448 return ERR_CAST(cs);
5451 defaults = shmem_pin_map(ce->engine->default_state);
5453 i915_gem_object_unpin_map(batch->obj);
5454 i915_vma_put(batch);
5455 return ERR_PTR(-ENOMEM);
5460 hw += LRC_STATE_OFFSET / sizeof(*hw);
5462 u32 len = hw[dw] & 0x7f;
5469 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5475 len = (len + 1) / 2;
5476 *cs++ = MI_LOAD_REGISTER_IMM(len);
5482 } while (dw < PAGE_SIZE / sizeof(u32) &&
5483 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5485 *cs++ = MI_BATCH_BUFFER_END;
5487 shmem_unpin_map(ce->engine->default_state, defaults);
5489 i915_gem_object_flush_map(batch->obj);
5490 i915_gem_object_unpin_map(batch->obj);
5495 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
5497 struct i915_request *rq;
5498 struct i915_vma *batch;
5502 batch = load_context(ce, poison);
5504 return PTR_ERR(batch);
5506 rq = intel_context_create_request(ce);
5512 err = move_to_active(rq, batch, 0);
5516 cs = intel_ring_begin(rq, 8);
5522 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5523 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5524 *cs++ = lower_32_bits(batch->node.start);
5525 *cs++ = upper_32_bits(batch->node.start);
5527 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
5528 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5529 offset_in_page(sema);
5533 intel_ring_advance(rq, cs);
5535 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5537 i915_request_add(rq);
5539 i915_vma_put(batch);
5543 static bool is_moving(u32 a, u32 b)
5548 static int compare_isolation(struct intel_engine_cs *engine,
5549 struct i915_vma *ref[2],
5550 struct i915_vma *result[2],
5551 struct intel_context *ce,
5554 u32 x, dw, *hw, *lrc;
5559 A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
5561 return PTR_ERR(A[0]);
5563 A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
5565 err = PTR_ERR(A[1]);
5569 B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
5571 err = PTR_ERR(B[0]);
5575 B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
5577 err = PTR_ERR(B[1]);
5581 lrc = i915_gem_object_pin_map(ce->state->obj,
5582 i915_coherent_map_type(engine->i915));
5587 lrc += LRC_STATE_OFFSET / sizeof(*hw);
5589 defaults = shmem_pin_map(ce->engine->default_state);
5598 hw += LRC_STATE_OFFSET / sizeof(*hw);
5600 u32 len = hw[dw] & 0x7f;
5607 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5613 len = (len + 1) / 2;
5615 if (!is_moving(A[0][x], A[1][x]) &&
5616 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
5617 switch (hw[dw] & 4095) {
5618 case 0x30: /* RING_HEAD */
5619 case 0x34: /* RING_TAIL */
5623 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5626 A[0][x], B[0][x], B[1][x],
5627 poison, lrc[dw + 1]);
5634 } while (dw < PAGE_SIZE / sizeof(u32) &&
5635 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5637 shmem_unpin_map(ce->engine->default_state, defaults);
5639 i915_gem_object_unpin_map(ce->state->obj);
5641 i915_gem_object_unpin_map(result[1]->obj);
5643 i915_gem_object_unpin_map(result[0]->obj);
5645 i915_gem_object_unpin_map(ref[1]->obj);
5647 i915_gem_object_unpin_map(ref[0]->obj);
5651 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
5653 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
5654 struct i915_vma *ref[2], *result[2];
5655 struct intel_context *A, *B;
5656 struct i915_request *rq;
5659 A = intel_context_create(engine);
5663 B = intel_context_create(engine);
5669 ref[0] = create_user_vma(A->vm, SZ_64K);
5670 if (IS_ERR(ref[0])) {
5671 err = PTR_ERR(ref[0]);
5675 ref[1] = create_user_vma(A->vm, SZ_64K);
5676 if (IS_ERR(ref[1])) {
5677 err = PTR_ERR(ref[1]);
5681 rq = record_registers(A, ref[0], ref[1], sema);
5687 WRITE_ONCE(*sema, 1);
5690 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5691 i915_request_put(rq);
5695 i915_request_put(rq);
5697 result[0] = create_user_vma(A->vm, SZ_64K);
5698 if (IS_ERR(result[0])) {
5699 err = PTR_ERR(result[0]);
5703 result[1] = create_user_vma(A->vm, SZ_64K);
5704 if (IS_ERR(result[1])) {
5705 err = PTR_ERR(result[1]);
5709 rq = record_registers(A, result[0], result[1], sema);
5715 err = poison_registers(B, poison, sema);
5717 WRITE_ONCE(*sema, -1);
5718 i915_request_put(rq);
5722 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5723 i915_request_put(rq);
5727 i915_request_put(rq);
5729 err = compare_isolation(engine, ref, result, A, poison);
5732 i915_vma_put(result[1]);
5734 i915_vma_put(result[0]);
5736 i915_vma_put(ref[1]);
5738 i915_vma_put(ref[0]);
5740 intel_context_put(B);
5742 intel_context_put(A);
5746 static bool skip_isolation(const struct intel_engine_cs *engine)
5748 if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
5751 if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
5757 static int live_lrc_isolation(void *arg)
5759 struct intel_gt *gt = arg;
5760 struct intel_engine_cs *engine;
5761 enum intel_engine_id id;
5762 const u32 poison[] = {
5772 * Our goal is try and verify that per-context state cannot be
5773 * tampered with by another non-privileged client.
5775 * We take the list of context registers from the LRI in the default
5776 * context image and attempt to modify that list from a remote context.
5779 for_each_engine(engine, gt, id) {
5782 /* Just don't even ask */
5783 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
5784 skip_isolation(engine))
5787 intel_engine_pm_get(engine);
5788 for (i = 0; i < ARRAY_SIZE(poison); i++) {
5791 result = __lrc_isolation(engine, poison[i]);
5795 result = __lrc_isolation(engine, ~poison[i]);
5799 intel_engine_pm_put(engine);
5800 if (igt_flush_test(gt->i915)) {
5809 static int indirect_ctx_submit_req(struct intel_context *ce)
5811 struct i915_request *rq;
5814 rq = intel_context_create_request(ce);
5818 i915_request_get(rq);
5819 i915_request_add(rq);
5821 if (i915_request_wait(rq, 0, HZ / 5) < 0)
5824 i915_request_put(rq);
5829 #define CTX_BB_CANARY_OFFSET (3 * 1024)
5830 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
5833 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
5835 *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
5836 MI_SRM_LRM_GLOBAL_GTT |
5838 *cs++ = i915_mmio_reg_offset(RING_START(0));
5839 *cs++ = i915_ggtt_offset(ce->state) +
5840 context_wa_bb_offset(ce) +
5841 CTX_BB_CANARY_OFFSET;
5848 indirect_ctx_bb_setup(struct intel_context *ce)
5850 u32 *cs = context_indirect_bb(ce);
5852 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
5854 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
5857 static bool check_ring_start(struct intel_context *ce)
5859 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
5860 LRC_STATE_OFFSET + context_wa_bb_offset(ce);
5862 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
5865 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
5866 ctx_bb[CTX_BB_CANARY_INDEX],
5867 ce->lrc_reg_state[CTX_RING_START]);
5872 static int indirect_ctx_bb_check(struct intel_context *ce)
5876 err = indirect_ctx_submit_req(ce);
5880 if (!check_ring_start(ce))
5886 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
5888 struct intel_context *a, *b;
5891 a = intel_context_create(engine);
5894 err = intel_context_pin(a);
5898 b = intel_context_create(engine);
5903 err = intel_context_pin(b);
5907 /* We use the already reserved extra page in context state */
5908 if (!a->wa_bb_page) {
5909 GEM_BUG_ON(b->wa_bb_page);
5910 GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
5915 * In order to test that our per context bb is truly per context,
5916 * and executes at the intended spot on context restoring process,
5917 * make the batch store the ring start value to memory.
5918 * As ring start is restored apriori of starting the indirect ctx bb and
5919 * as it will be different for each context, it fits to this purpose.
5921 indirect_ctx_bb_setup(a);
5922 indirect_ctx_bb_setup(b);
5924 err = indirect_ctx_bb_check(a);
5928 err = indirect_ctx_bb_check(b);
5931 intel_context_unpin(b);
5933 intel_context_put(b);
5935 intel_context_unpin(a);
5937 intel_context_put(a);
5942 static int live_lrc_indirect_ctx_bb(void *arg)
5944 struct intel_gt *gt = arg;
5945 struct intel_engine_cs *engine;
5946 enum intel_engine_id id;
5949 for_each_engine(engine, gt, id) {
5950 intel_engine_pm_get(engine);
5951 err = __live_lrc_indirect_ctx_bb(engine);
5952 intel_engine_pm_put(engine);
5954 if (igt_flush_test(gt->i915))
5964 static void garbage_reset(struct intel_engine_cs *engine,
5965 struct i915_request *rq)
5967 const unsigned int bit = I915_RESET_ENGINE + engine->id;
5968 unsigned long *lock = &engine->gt->reset.flags;
5970 if (test_and_set_bit(bit, lock))
5973 tasklet_disable(&engine->execlists.tasklet);
5975 if (!rq->fence.error)
5976 intel_engine_reset(engine, NULL);
5978 tasklet_enable(&engine->execlists.tasklet);
5979 clear_and_wake_up_bit(bit, lock);
5982 static struct i915_request *garbage(struct intel_context *ce,
5983 struct rnd_state *prng)
5985 struct i915_request *rq;
5988 err = intel_context_pin(ce);
5990 return ERR_PTR(err);
5992 prandom_bytes_state(prng,
5994 ce->engine->context_size -
5997 rq = intel_context_create_request(ce);
6003 i915_request_get(rq);
6004 i915_request_add(rq);
6008 intel_context_unpin(ce);
6009 return ERR_PTR(err);
6012 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
6014 struct intel_context *ce;
6015 struct i915_request *hang;
6018 ce = intel_context_create(engine);
6022 hang = garbage(ce, prng);
6024 err = PTR_ERR(hang);
6028 if (wait_for_submit(engine, hang, HZ / 2)) {
6029 i915_request_put(hang);
6034 intel_context_set_banned(ce);
6035 garbage_reset(engine, hang);
6037 intel_engine_flush_submission(engine);
6038 if (!hang->fence.error) {
6039 i915_request_put(hang);
6040 pr_err("%s: corrupted context was not reset\n",
6046 if (i915_request_wait(hang, 0, HZ / 2) < 0) {
6047 pr_err("%s: corrupted context did not recover\n",
6049 i915_request_put(hang);
6053 i915_request_put(hang);
6056 intel_context_put(ce);
6060 static int live_lrc_garbage(void *arg)
6062 struct intel_gt *gt = arg;
6063 struct intel_engine_cs *engine;
6064 enum intel_engine_id id;
6067 * Verify that we can recover if one context state is completely
6071 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
6074 for_each_engine(engine, gt, id) {
6075 I915_RND_STATE(prng);
6078 if (!intel_has_reset_engine(engine->gt))
6081 intel_engine_pm_get(engine);
6082 for (i = 0; i < 3; i++) {
6083 err = __lrc_garbage(engine, &prng);
6087 intel_engine_pm_put(engine);
6089 if (igt_flush_test(gt->i915))
6098 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
6100 struct intel_context *ce;
6101 struct i915_request *rq;
6102 IGT_TIMEOUT(end_time);
6105 ce = intel_context_create(engine);
6109 ce->runtime.num_underflow = 0;
6110 ce->runtime.max_underflow = 0;
6113 unsigned int loop = 1024;
6116 rq = intel_context_create_request(ce);
6123 i915_request_get(rq);
6125 i915_request_add(rq);
6128 if (__igt_timeout(end_time, NULL))
6131 i915_request_put(rq);
6134 err = i915_request_wait(rq, 0, HZ / 5);
6136 pr_err("%s: request not completed!\n", engine->name);
6140 igt_flush_test(engine->i915);
6142 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
6144 intel_context_get_total_runtime_ns(ce),
6145 intel_context_get_avg_runtime_ns(ce));
6148 if (ce->runtime.num_underflow) {
6149 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
6151 ce->runtime.num_underflow,
6152 ce->runtime.max_underflow);
6158 i915_request_put(rq);
6160 intel_context_put(ce);
6164 static int live_pphwsp_runtime(void *arg)
6166 struct intel_gt *gt = arg;
6167 struct intel_engine_cs *engine;
6168 enum intel_engine_id id;
6172 * Check that cumulative context runtime as stored in the pphwsp[16]
6176 for_each_engine(engine, gt, id) {
6177 err = __live_pphwsp_runtime(engine);
6182 if (igt_flush_test(gt->i915))
6188 int intel_lrc_live_selftests(struct drm_i915_private *i915)
6190 static const struct i915_subtest tests[] = {
6191 SUBTEST(live_lrc_layout),
6192 SUBTEST(live_lrc_fixed),
6193 SUBTEST(live_lrc_state),
6194 SUBTEST(live_lrc_gpr),
6195 SUBTEST(live_lrc_isolation),
6196 SUBTEST(live_lrc_timestamp),
6197 SUBTEST(live_lrc_garbage),
6198 SUBTEST(live_pphwsp_runtime),
6199 SUBTEST(live_lrc_indirect_ctx_bb),
6202 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
6205 return intel_gt_live_subtests(tests, &i915->gt);