2 * SPDX-License-Identifier: MIT
4 * Copyright © 2018 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
26 static struct i915_vma *create_scratch(struct intel_gt *gt)
28 struct drm_i915_gem_object *obj;
32 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
36 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
38 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
40 i915_gem_object_put(obj);
44 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
46 i915_gem_object_put(obj);
53 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
56 *saved = engine->props.heartbeat_interval_ms;
57 engine->props.heartbeat_interval_ms = 0;
59 intel_engine_pm_get(engine);
60 intel_engine_park_heartbeat(engine);
63 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
66 intel_engine_pm_put(engine);
68 engine->props.heartbeat_interval_ms = saved;
71 static int live_sanitycheck(void *arg)
73 struct intel_gt *gt = arg;
74 struct intel_engine_cs *engine;
75 enum intel_engine_id id;
76 struct igt_spinner spin;
79 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
82 if (igt_spinner_init(&spin, gt))
85 for_each_engine(engine, gt, id) {
86 struct intel_context *ce;
87 struct i915_request *rq;
89 ce = intel_context_create(engine);
95 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
101 i915_request_add(rq);
102 if (!igt_wait_for_spinner(&spin, rq)) {
103 GEM_TRACE("spinner failed to start\n");
105 intel_gt_set_wedged(gt);
110 igt_spinner_end(&spin);
111 if (igt_flush_test(gt->i915)) {
117 intel_context_put(ce);
122 igt_spinner_fini(&spin);
126 static int live_unlite_restore(struct intel_gt *gt, int prio)
128 struct intel_engine_cs *engine;
129 enum intel_engine_id id;
130 struct igt_spinner spin;
134 * Check that we can correctly context switch between 2 instances
135 * on the same engine from the same parent context.
138 if (igt_spinner_init(&spin, gt))
142 for_each_engine(engine, gt, id) {
143 struct intel_context *ce[2] = {};
144 struct i915_request *rq[2];
145 struct igt_live_test t;
149 if (prio && !intel_engine_has_preemption(engine))
152 if (!intel_engine_can_store_dword(engine))
155 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
159 engine_heartbeat_disable(engine, &saved);
161 for (n = 0; n < ARRAY_SIZE(ce); n++) {
162 struct intel_context *tmp;
164 tmp = intel_context_create(engine);
170 err = intel_context_pin(tmp);
172 intel_context_put(tmp);
177 * Setup the pair of contexts such that if we
178 * lite-restore using the RING_TAIL from ce[1] it
179 * will execute garbage from ce[0]->ring.
181 memset(tmp->ring->vaddr,
182 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
183 tmp->ring->vma->size);
187 GEM_BUG_ON(!ce[1]->ring->size);
188 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
189 __execlists_update_reg_state(ce[1], engine);
191 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
193 err = PTR_ERR(rq[0]);
197 i915_request_get(rq[0]);
198 i915_request_add(rq[0]);
199 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
201 if (!igt_wait_for_spinner(&spin, rq[0])) {
202 i915_request_put(rq[0]);
206 rq[1] = i915_request_create(ce[1]);
208 err = PTR_ERR(rq[1]);
209 i915_request_put(rq[0]);
215 * Ensure we do the switch to ce[1] on completion.
217 * rq[0] is already submitted, so this should reduce
218 * to a no-op (a wait on a request on the same engine
219 * uses the submit fence, not the completion fence),
220 * but it will install a dependency on rq[1] for rq[0]
221 * that will prevent the pair being reordered by
224 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
227 i915_request_get(rq[1]);
228 i915_request_add(rq[1]);
229 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
230 i915_request_put(rq[0]);
233 struct i915_sched_attr attr = {
237 /* Alternatively preempt the spinner with ce[1] */
238 engine->schedule(rq[1], &attr);
241 /* And switch back to ce[0] for good measure */
242 rq[0] = i915_request_create(ce[0]);
244 err = PTR_ERR(rq[0]);
245 i915_request_put(rq[1]);
249 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
250 i915_request_get(rq[0]);
251 i915_request_add(rq[0]);
252 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
253 i915_request_put(rq[1]);
254 i915_request_put(rq[0]);
257 tasklet_kill(&engine->execlists.tasklet); /* flush submission */
258 igt_spinner_end(&spin);
259 for (n = 0; n < ARRAY_SIZE(ce); n++) {
260 if (IS_ERR_OR_NULL(ce[n]))
263 intel_context_unpin(ce[n]);
264 intel_context_put(ce[n]);
267 engine_heartbeat_enable(engine, saved);
268 if (igt_live_test_end(&t))
274 igt_spinner_fini(&spin);
278 static int live_unlite_switch(void *arg)
280 return live_unlite_restore(arg, 0);
283 static int live_unlite_preempt(void *arg)
285 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
289 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
293 cs = intel_ring_begin(rq, 10);
297 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
299 *cs++ = MI_SEMAPHORE_WAIT |
300 MI_SEMAPHORE_GLOBAL_GTT |
302 MI_SEMAPHORE_SAD_NEQ_SDD;
304 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
308 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
309 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
319 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
321 intel_ring_advance(rq, cs);
325 static struct i915_request *
326 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
328 struct intel_context *ce;
329 struct i915_request *rq;
332 ce = intel_context_create(engine);
336 rq = intel_context_create_request(ce);
341 if (rq->engine->emit_init_breadcrumb)
342 err = rq->engine->emit_init_breadcrumb(rq);
344 err = emit_semaphore_chain(rq, vma, idx);
346 i915_request_get(rq);
347 i915_request_add(rq);
352 intel_context_put(ce);
357 release_queue(struct intel_engine_cs *engine,
358 struct i915_vma *vma,
361 struct i915_sched_attr attr = {
364 struct i915_request *rq;
367 rq = intel_engine_create_kernel_request(engine);
371 cs = intel_ring_begin(rq, 4);
373 i915_request_add(rq);
377 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
378 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
382 intel_ring_advance(rq, cs);
384 i915_request_get(rq);
385 i915_request_add(rq);
388 engine->schedule(rq, &attr);
389 local_bh_enable(); /* kick tasklet */
391 i915_request_put(rq);
397 slice_semaphore_queue(struct intel_engine_cs *outer,
398 struct i915_vma *vma,
401 struct intel_engine_cs *engine;
402 struct i915_request *head;
403 enum intel_engine_id id;
406 head = semaphore_queue(outer, vma, n++);
408 return PTR_ERR(head);
410 for_each_engine(engine, outer->gt, id) {
411 for (i = 0; i < count; i++) {
412 struct i915_request *rq;
414 rq = semaphore_queue(engine, vma, n++);
420 i915_request_put(rq);
424 err = release_queue(outer, vma, n, INT_MAX);
428 if (i915_request_wait(head, 0,
429 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
430 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
433 intel_gt_set_wedged(outer->gt);
438 i915_request_put(head);
442 static int live_timeslice_preempt(void *arg)
444 struct intel_gt *gt = arg;
445 struct drm_i915_gem_object *obj;
446 struct i915_vma *vma;
452 * If a request takes too long, we would like to give other users
453 * a fair go on the GPU. In particular, users may create batches
454 * that wait upon external input, where that input may even be
455 * supplied by another GPU job. To avoid blocking forever, we
456 * need to preempt the current task and replace it with another
459 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
462 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
466 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
472 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
474 err = PTR_ERR(vaddr);
478 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
482 for_each_prime_number_from(count, 1, 16) {
483 struct intel_engine_cs *engine;
484 enum intel_engine_id id;
486 for_each_engine(engine, gt, id) {
489 if (!intel_engine_has_preemption(engine))
492 memset(vaddr, 0, PAGE_SIZE);
494 engine_heartbeat_disable(engine, &saved);
495 err = slice_semaphore_queue(engine, vma, count);
496 engine_heartbeat_enable(engine, saved);
500 if (igt_flush_test(gt->i915)) {
510 i915_gem_object_unpin_map(obj);
512 i915_gem_object_put(obj);
516 static struct i915_request *nop_request(struct intel_engine_cs *engine)
518 struct i915_request *rq;
520 rq = intel_engine_create_kernel_request(engine);
524 i915_request_get(rq);
525 i915_request_add(rq);
530 static int wait_for_submit(struct intel_engine_cs *engine,
531 struct i915_request *rq,
532 unsigned long timeout)
537 intel_engine_flush_submission(engine);
538 if (i915_request_is_active(rq))
540 } while (time_before(jiffies, timeout));
545 static long timeslice_threshold(const struct intel_engine_cs *engine)
547 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
550 static int live_timeslice_queue(void *arg)
552 struct intel_gt *gt = arg;
553 struct drm_i915_gem_object *obj;
554 struct intel_engine_cs *engine;
555 enum intel_engine_id id;
556 struct i915_vma *vma;
561 * Make sure that even if ELSP[0] and ELSP[1] are filled with
562 * timeslicing between them disabled, we *do* enable timeslicing
563 * if the queue demands it. (Normally, we do not submit if
564 * ELSP[1] is already occupied, so must rely on timeslicing to
565 * eject ELSP[0] in favour of the queue.)
567 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
570 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
574 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
580 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
582 err = PTR_ERR(vaddr);
586 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
590 for_each_engine(engine, gt, id) {
591 struct i915_sched_attr attr = {
592 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
594 struct i915_request *rq, *nop;
597 if (!intel_engine_has_preemption(engine))
600 engine_heartbeat_disable(engine, &saved);
601 memset(vaddr, 0, PAGE_SIZE);
603 /* ELSP[0]: semaphore wait */
604 rq = semaphore_queue(engine, vma, 0);
609 engine->schedule(rq, &attr);
610 err = wait_for_submit(engine, rq, HZ / 2);
612 pr_err("%s: Timed out trying to submit semaphores\n",
617 /* ELSP[1]: nop request */
618 nop = nop_request(engine);
623 err = wait_for_submit(engine, nop, HZ / 2);
624 i915_request_put(nop);
626 pr_err("%s: Timed out trying to submit nop\n",
631 GEM_BUG_ON(i915_request_completed(rq));
632 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
634 /* Queue: semaphore signal, matching priority as semaphore */
635 err = release_queue(engine, vma, 1, effective_prio(rq));
639 intel_engine_flush_submission(engine);
640 if (!READ_ONCE(engine->execlists.timer.expires) &&
641 !i915_request_completed(rq)) {
642 struct drm_printer p =
643 drm_info_printer(gt->i915->drm.dev);
645 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
647 intel_engine_dump(engine, &p,
648 "%s\n", engine->name);
651 memset(vaddr, 0xff, PAGE_SIZE);
655 /* Timeslice every jiffy, so within 2 we should signal */
656 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
657 struct drm_printer p =
658 drm_info_printer(gt->i915->drm.dev);
660 pr_err("%s: Failed to timeslice into queue\n",
662 intel_engine_dump(engine, &p,
663 "%s\n", engine->name);
665 memset(vaddr, 0xff, PAGE_SIZE);
669 i915_request_put(rq);
671 engine_heartbeat_enable(engine, saved);
678 i915_gem_object_unpin_map(obj);
680 i915_gem_object_put(obj);
684 static int live_busywait_preempt(void *arg)
686 struct intel_gt *gt = arg;
687 struct i915_gem_context *ctx_hi, *ctx_lo;
688 struct intel_engine_cs *engine;
689 struct drm_i915_gem_object *obj;
690 struct i915_vma *vma;
691 enum intel_engine_id id;
696 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
697 * preempt the busywaits used to synchronise between rings.
700 ctx_hi = kernel_context(gt->i915);
703 ctx_hi->sched.priority =
704 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
706 ctx_lo = kernel_context(gt->i915);
709 ctx_lo->sched.priority =
710 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
712 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
718 map = i915_gem_object_pin_map(obj, I915_MAP_WC);
724 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
730 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
734 for_each_engine(engine, gt, id) {
735 struct i915_request *lo, *hi;
736 struct igt_live_test t;
739 if (!intel_engine_has_preemption(engine))
742 if (!intel_engine_can_store_dword(engine))
745 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
751 * We create two requests. The low priority request
752 * busywaits on a semaphore (inside the ringbuffer where
753 * is should be preemptible) and the high priority requests
754 * uses a MI_STORE_DWORD_IMM to update the semaphore value
755 * allowing the first request to complete. If preemption
756 * fails, we hang instead.
759 lo = igt_request_alloc(ctx_lo, engine);
765 cs = intel_ring_begin(lo, 8);
768 i915_request_add(lo);
772 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
773 *cs++ = i915_ggtt_offset(vma);
777 /* XXX Do we need a flush + invalidate here? */
779 *cs++ = MI_SEMAPHORE_WAIT |
780 MI_SEMAPHORE_GLOBAL_GTT |
782 MI_SEMAPHORE_SAD_EQ_SDD;
784 *cs++ = i915_ggtt_offset(vma);
787 intel_ring_advance(lo, cs);
789 i915_request_get(lo);
790 i915_request_add(lo);
792 if (wait_for(READ_ONCE(*map), 10)) {
793 i915_request_put(lo);
798 /* Low priority request should be busywaiting now */
799 if (i915_request_wait(lo, 0, 1) != -ETIME) {
800 i915_request_put(lo);
801 pr_err("%s: Busywaiting request did not!\n",
807 hi = igt_request_alloc(ctx_hi, engine);
810 i915_request_put(lo);
814 cs = intel_ring_begin(hi, 4);
817 i915_request_add(hi);
818 i915_request_put(lo);
822 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
823 *cs++ = i915_ggtt_offset(vma);
827 intel_ring_advance(hi, cs);
828 i915_request_add(hi);
830 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
831 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
833 pr_err("%s: Failed to preempt semaphore busywait!\n",
836 intel_engine_dump(engine, &p, "%s\n", engine->name);
839 i915_request_put(lo);
840 intel_gt_set_wedged(gt);
844 GEM_BUG_ON(READ_ONCE(*map));
845 i915_request_put(lo);
847 if (igt_live_test_end(&t)) {
857 i915_gem_object_unpin_map(obj);
859 i915_gem_object_put(obj);
861 kernel_context_close(ctx_lo);
863 kernel_context_close(ctx_hi);
867 static struct i915_request *
868 spinner_create_request(struct igt_spinner *spin,
869 struct i915_gem_context *ctx,
870 struct intel_engine_cs *engine,
873 struct intel_context *ce;
874 struct i915_request *rq;
876 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
880 rq = igt_spinner_create_request(spin, ce, arb);
881 intel_context_put(ce);
885 static int live_preempt(void *arg)
887 struct intel_gt *gt = arg;
888 struct i915_gem_context *ctx_hi, *ctx_lo;
889 struct igt_spinner spin_hi, spin_lo;
890 struct intel_engine_cs *engine;
891 enum intel_engine_id id;
894 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
897 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
898 pr_err("Logical preemption supported, but not exposed\n");
900 if (igt_spinner_init(&spin_hi, gt))
903 if (igt_spinner_init(&spin_lo, gt))
906 ctx_hi = kernel_context(gt->i915);
909 ctx_hi->sched.priority =
910 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
912 ctx_lo = kernel_context(gt->i915);
915 ctx_lo->sched.priority =
916 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
918 for_each_engine(engine, gt, id) {
919 struct igt_live_test t;
920 struct i915_request *rq;
922 if (!intel_engine_has_preemption(engine))
925 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
930 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
937 i915_request_add(rq);
938 if (!igt_wait_for_spinner(&spin_lo, rq)) {
939 GEM_TRACE("lo spinner failed to start\n");
941 intel_gt_set_wedged(gt);
946 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
949 igt_spinner_end(&spin_lo);
954 i915_request_add(rq);
955 if (!igt_wait_for_spinner(&spin_hi, rq)) {
956 GEM_TRACE("hi spinner failed to start\n");
958 intel_gt_set_wedged(gt);
963 igt_spinner_end(&spin_hi);
964 igt_spinner_end(&spin_lo);
966 if (igt_live_test_end(&t)) {
974 kernel_context_close(ctx_lo);
976 kernel_context_close(ctx_hi);
978 igt_spinner_fini(&spin_lo);
980 igt_spinner_fini(&spin_hi);
984 static int live_late_preempt(void *arg)
986 struct intel_gt *gt = arg;
987 struct i915_gem_context *ctx_hi, *ctx_lo;
988 struct igt_spinner spin_hi, spin_lo;
989 struct intel_engine_cs *engine;
990 struct i915_sched_attr attr = {};
991 enum intel_engine_id id;
994 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
997 if (igt_spinner_init(&spin_hi, gt))
1000 if (igt_spinner_init(&spin_lo, gt))
1003 ctx_hi = kernel_context(gt->i915);
1007 ctx_lo = kernel_context(gt->i915);
1011 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1012 ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1014 for_each_engine(engine, gt, id) {
1015 struct igt_live_test t;
1016 struct i915_request *rq;
1018 if (!intel_engine_has_preemption(engine))
1021 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1026 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1033 i915_request_add(rq);
1034 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1035 pr_err("First context failed to start\n");
1039 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1042 igt_spinner_end(&spin_lo);
1047 i915_request_add(rq);
1048 if (igt_wait_for_spinner(&spin_hi, rq)) {
1049 pr_err("Second context overtook first?\n");
1053 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1054 engine->schedule(rq, &attr);
1056 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1057 pr_err("High priority context failed to preempt the low priority context\n");
1062 igt_spinner_end(&spin_hi);
1063 igt_spinner_end(&spin_lo);
1065 if (igt_live_test_end(&t)) {
1073 kernel_context_close(ctx_lo);
1075 kernel_context_close(ctx_hi);
1077 igt_spinner_fini(&spin_lo);
1079 igt_spinner_fini(&spin_hi);
1083 igt_spinner_end(&spin_hi);
1084 igt_spinner_end(&spin_lo);
1085 intel_gt_set_wedged(gt);
1090 struct preempt_client {
1091 struct igt_spinner spin;
1092 struct i915_gem_context *ctx;
1095 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1097 c->ctx = kernel_context(gt->i915);
1101 if (igt_spinner_init(&c->spin, gt))
1107 kernel_context_close(c->ctx);
1111 static void preempt_client_fini(struct preempt_client *c)
1113 igt_spinner_fini(&c->spin);
1114 kernel_context_close(c->ctx);
1117 static int live_nopreempt(void *arg)
1119 struct intel_gt *gt = arg;
1120 struct intel_engine_cs *engine;
1121 struct preempt_client a, b;
1122 enum intel_engine_id id;
1126 * Verify that we can disable preemption for an individual request
1127 * that may be being observed and not want to be interrupted.
1130 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1133 if (preempt_client_init(gt, &a))
1135 if (preempt_client_init(gt, &b))
1137 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1139 for_each_engine(engine, gt, id) {
1140 struct i915_request *rq_a, *rq_b;
1142 if (!intel_engine_has_preemption(engine))
1145 engine->execlists.preempt_hang.count = 0;
1147 rq_a = spinner_create_request(&a.spin,
1151 err = PTR_ERR(rq_a);
1155 /* Low priority client, but unpreemptable! */
1156 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1158 i915_request_add(rq_a);
1159 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1160 pr_err("First client failed to start\n");
1164 rq_b = spinner_create_request(&b.spin,
1168 err = PTR_ERR(rq_b);
1172 i915_request_add(rq_b);
1174 /* B is much more important than A! (But A is unpreemptable.) */
1175 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1177 /* Wait long enough for preemption and timeslicing */
1178 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1179 pr_err("Second client started too early!\n");
1183 igt_spinner_end(&a.spin);
1185 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1186 pr_err("Second client failed to start\n");
1190 igt_spinner_end(&b.spin);
1192 if (engine->execlists.preempt_hang.count) {
1193 pr_err("Preemption recorded x%d; should have been suppressed!\n",
1194 engine->execlists.preempt_hang.count);
1199 if (igt_flush_test(gt->i915))
1205 preempt_client_fini(&b);
1207 preempt_client_fini(&a);
1211 igt_spinner_end(&b.spin);
1212 igt_spinner_end(&a.spin);
1213 intel_gt_set_wedged(gt);
1218 struct live_preempt_cancel {
1219 struct intel_engine_cs *engine;
1220 struct preempt_client a, b;
1223 static int __cancel_active0(struct live_preempt_cancel *arg)
1225 struct i915_request *rq;
1226 struct igt_live_test t;
1229 /* Preempt cancel of ELSP0 */
1230 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1231 if (igt_live_test_begin(&t, arg->engine->i915,
1232 __func__, arg->engine->name))
1235 rq = spinner_create_request(&arg->a.spin,
1236 arg->a.ctx, arg->engine,
1241 clear_bit(CONTEXT_BANNED, &rq->context->flags);
1242 i915_request_get(rq);
1243 i915_request_add(rq);
1244 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1249 intel_context_set_banned(rq->context);
1250 err = intel_engine_pulse(arg->engine);
1254 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1259 if (rq->fence.error != -EIO) {
1260 pr_err("Cancelled inflight0 request did not report -EIO\n");
1266 i915_request_put(rq);
1267 if (igt_live_test_end(&t))
1272 static int __cancel_active1(struct live_preempt_cancel *arg)
1274 struct i915_request *rq[2] = {};
1275 struct igt_live_test t;
1278 /* Preempt cancel of ELSP1 */
1279 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1280 if (igt_live_test_begin(&t, arg->engine->i915,
1281 __func__, arg->engine->name))
1284 rq[0] = spinner_create_request(&arg->a.spin,
1285 arg->a.ctx, arg->engine,
1286 MI_NOOP); /* no preemption */
1288 return PTR_ERR(rq[0]);
1290 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1291 i915_request_get(rq[0]);
1292 i915_request_add(rq[0]);
1293 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1298 rq[1] = spinner_create_request(&arg->b.spin,
1299 arg->b.ctx, arg->engine,
1301 if (IS_ERR(rq[1])) {
1302 err = PTR_ERR(rq[1]);
1306 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1307 i915_request_get(rq[1]);
1308 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1309 i915_request_add(rq[1]);
1313 intel_context_set_banned(rq[1]->context);
1314 err = intel_engine_pulse(arg->engine);
1318 igt_spinner_end(&arg->a.spin);
1319 if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
1324 if (rq[0]->fence.error != 0) {
1325 pr_err("Normal inflight0 request did not complete\n");
1330 if (rq[1]->fence.error != -EIO) {
1331 pr_err("Cancelled inflight1 request did not report -EIO\n");
1337 i915_request_put(rq[1]);
1338 i915_request_put(rq[0]);
1339 if (igt_live_test_end(&t))
1344 static int __cancel_queued(struct live_preempt_cancel *arg)
1346 struct i915_request *rq[3] = {};
1347 struct igt_live_test t;
1350 /* Full ELSP and one in the wings */
1351 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1352 if (igt_live_test_begin(&t, arg->engine->i915,
1353 __func__, arg->engine->name))
1356 rq[0] = spinner_create_request(&arg->a.spin,
1357 arg->a.ctx, arg->engine,
1360 return PTR_ERR(rq[0]);
1362 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1363 i915_request_get(rq[0]);
1364 i915_request_add(rq[0]);
1365 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1370 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1371 if (IS_ERR(rq[1])) {
1372 err = PTR_ERR(rq[1]);
1376 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1377 i915_request_get(rq[1]);
1378 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1379 i915_request_add(rq[1]);
1383 rq[2] = spinner_create_request(&arg->b.spin,
1384 arg->a.ctx, arg->engine,
1386 if (IS_ERR(rq[2])) {
1387 err = PTR_ERR(rq[2]);
1391 i915_request_get(rq[2]);
1392 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1393 i915_request_add(rq[2]);
1397 intel_context_set_banned(rq[2]->context);
1398 err = intel_engine_pulse(arg->engine);
1402 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1407 if (rq[0]->fence.error != -EIO) {
1408 pr_err("Cancelled inflight0 request did not report -EIO\n");
1413 if (rq[1]->fence.error != 0) {
1414 pr_err("Normal inflight1 request did not complete\n");
1419 if (rq[2]->fence.error != -EIO) {
1420 pr_err("Cancelled queued request did not report -EIO\n");
1426 i915_request_put(rq[2]);
1427 i915_request_put(rq[1]);
1428 i915_request_put(rq[0]);
1429 if (igt_live_test_end(&t))
1434 static int __cancel_hostile(struct live_preempt_cancel *arg)
1436 struct i915_request *rq;
1439 /* Preempt cancel non-preemptible spinner in ELSP0 */
1440 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
1443 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1444 rq = spinner_create_request(&arg->a.spin,
1445 arg->a.ctx, arg->engine,
1446 MI_NOOP); /* preemption disabled */
1450 clear_bit(CONTEXT_BANNED, &rq->context->flags);
1451 i915_request_get(rq);
1452 i915_request_add(rq);
1453 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1458 intel_context_set_banned(rq->context);
1459 err = intel_engine_pulse(arg->engine); /* force reset */
1463 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1468 if (rq->fence.error != -EIO) {
1469 pr_err("Cancelled inflight0 request did not report -EIO\n");
1475 i915_request_put(rq);
1476 if (igt_flush_test(arg->engine->i915))
1481 static int live_preempt_cancel(void *arg)
1483 struct intel_gt *gt = arg;
1484 struct live_preempt_cancel data;
1485 enum intel_engine_id id;
1489 * To cancel an inflight context, we need to first remove it from the
1490 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
1493 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1496 if (preempt_client_init(gt, &data.a))
1498 if (preempt_client_init(gt, &data.b))
1501 for_each_engine(data.engine, gt, id) {
1502 if (!intel_engine_has_preemption(data.engine))
1505 err = __cancel_active0(&data);
1509 err = __cancel_active1(&data);
1513 err = __cancel_queued(&data);
1517 err = __cancel_hostile(&data);
1524 preempt_client_fini(&data.b);
1526 preempt_client_fini(&data.a);
1531 igt_spinner_end(&data.b.spin);
1532 igt_spinner_end(&data.a.spin);
1533 intel_gt_set_wedged(gt);
1537 static int live_suppress_self_preempt(void *arg)
1539 struct intel_gt *gt = arg;
1540 struct intel_engine_cs *engine;
1541 struct i915_sched_attr attr = {
1542 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
1544 struct preempt_client a, b;
1545 enum intel_engine_id id;
1549 * Verify that if a preemption request does not cause a change in
1550 * the current execution order, the preempt-to-idle injection is
1551 * skipped and that we do not accidentally apply it after the CS
1555 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1558 if (USES_GUC_SUBMISSION(gt->i915))
1559 return 0; /* presume black blox */
1561 if (intel_vgpu_active(gt->i915))
1562 return 0; /* GVT forces single port & request submission */
1564 if (preempt_client_init(gt, &a))
1566 if (preempt_client_init(gt, &b))
1569 for_each_engine(engine, gt, id) {
1570 struct i915_request *rq_a, *rq_b;
1573 if (!intel_engine_has_preemption(engine))
1576 if (igt_flush_test(gt->i915))
1579 intel_engine_pm_get(engine);
1580 engine->execlists.preempt_hang.count = 0;
1582 rq_a = spinner_create_request(&a.spin,
1586 err = PTR_ERR(rq_a);
1587 intel_engine_pm_put(engine);
1591 i915_request_add(rq_a);
1592 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1593 pr_err("First client failed to start\n");
1594 intel_engine_pm_put(engine);
1598 /* Keep postponing the timer to avoid premature slicing */
1599 mod_timer(&engine->execlists.timer, jiffies + HZ);
1600 for (depth = 0; depth < 8; depth++) {
1601 rq_b = spinner_create_request(&b.spin,
1605 err = PTR_ERR(rq_b);
1606 intel_engine_pm_put(engine);
1609 i915_request_add(rq_b);
1611 GEM_BUG_ON(i915_request_completed(rq_a));
1612 engine->schedule(rq_a, &attr);
1613 igt_spinner_end(&a.spin);
1615 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1616 pr_err("Second client failed to start\n");
1617 intel_engine_pm_put(engine);
1624 igt_spinner_end(&a.spin);
1626 if (engine->execlists.preempt_hang.count) {
1627 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
1629 engine->execlists.preempt_hang.count,
1631 intel_engine_pm_put(engine);
1636 intel_engine_pm_put(engine);
1637 if (igt_flush_test(gt->i915))
1643 preempt_client_fini(&b);
1645 preempt_client_fini(&a);
1649 igt_spinner_end(&b.spin);
1650 igt_spinner_end(&a.spin);
1651 intel_gt_set_wedged(gt);
1656 static int __i915_sw_fence_call
1657 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
1662 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
1664 struct i915_request *rq;
1666 rq = kzalloc(sizeof(*rq), GFP_KERNEL);
1670 rq->engine = engine;
1672 spin_lock_init(&rq->lock);
1673 INIT_LIST_HEAD(&rq->fence.cb_list);
1674 rq->fence.lock = &rq->lock;
1675 rq->fence.ops = &i915_fence_ops;
1677 i915_sched_node_init(&rq->sched);
1679 /* mark this request as permanently incomplete */
1680 rq->fence.seqno = 1;
1681 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
1682 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
1683 GEM_BUG_ON(i915_request_completed(rq));
1685 i915_sw_fence_init(&rq->submit, dummy_notify);
1686 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
1688 spin_lock_init(&rq->lock);
1689 rq->fence.lock = &rq->lock;
1690 INIT_LIST_HEAD(&rq->fence.cb_list);
1695 static void dummy_request_free(struct i915_request *dummy)
1697 /* We have to fake the CS interrupt to kick the next request */
1698 i915_sw_fence_commit(&dummy->submit);
1700 i915_request_mark_complete(dummy);
1701 dma_fence_signal(&dummy->fence);
1703 i915_sched_node_fini(&dummy->sched);
1704 i915_sw_fence_fini(&dummy->submit);
1706 dma_fence_free(&dummy->fence);
1709 static int live_suppress_wait_preempt(void *arg)
1711 struct intel_gt *gt = arg;
1712 struct preempt_client client[4];
1713 struct i915_request *rq[ARRAY_SIZE(client)] = {};
1714 struct intel_engine_cs *engine;
1715 enum intel_engine_id id;
1720 * Waiters are given a little priority nudge, but not enough
1721 * to actually cause any preemption. Double check that we do
1722 * not needlessly generate preempt-to-idle cycles.
1725 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1728 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
1730 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
1732 if (preempt_client_init(gt, &client[2])) /* head of queue */
1734 if (preempt_client_init(gt, &client[3])) /* bystander */
1737 for_each_engine(engine, gt, id) {
1740 if (!intel_engine_has_preemption(engine))
1743 if (!engine->emit_init_breadcrumb)
1746 for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
1747 struct i915_request *dummy;
1749 engine->execlists.preempt_hang.count = 0;
1751 dummy = dummy_request(engine);
1755 for (i = 0; i < ARRAY_SIZE(client); i++) {
1756 struct i915_request *this;
1758 this = spinner_create_request(&client[i].spin,
1759 client[i].ctx, engine,
1762 err = PTR_ERR(this);
1766 /* Disable NEWCLIENT promotion */
1767 __i915_active_fence_set(&i915_request_timeline(this)->last_request,
1770 rq[i] = i915_request_get(this);
1771 i915_request_add(this);
1774 dummy_request_free(dummy);
1776 GEM_BUG_ON(i915_request_completed(rq[0]));
1777 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
1778 pr_err("%s: First client failed to start\n",
1782 GEM_BUG_ON(!i915_request_started(rq[0]));
1784 if (i915_request_wait(rq[depth],
1787 pr_err("%s: Waiter depth:%d completed!\n",
1788 engine->name, depth);
1792 for (i = 0; i < ARRAY_SIZE(client); i++) {
1793 igt_spinner_end(&client[i].spin);
1794 i915_request_put(rq[i]);
1798 if (igt_flush_test(gt->i915))
1801 if (engine->execlists.preempt_hang.count) {
1802 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
1804 engine->execlists.preempt_hang.count,
1814 preempt_client_fini(&client[3]);
1816 preempt_client_fini(&client[2]);
1818 preempt_client_fini(&client[1]);
1820 preempt_client_fini(&client[0]);
1824 for (i = 0; i < ARRAY_SIZE(client); i++) {
1825 igt_spinner_end(&client[i].spin);
1826 i915_request_put(rq[i]);
1828 intel_gt_set_wedged(gt);
1833 static int live_chain_preempt(void *arg)
1835 struct intel_gt *gt = arg;
1836 struct intel_engine_cs *engine;
1837 struct preempt_client hi, lo;
1838 enum intel_engine_id id;
1842 * Build a chain AB...BA between two contexts (A, B) and request
1843 * preemption of the last request. It should then complete before
1844 * the previously submitted spinner in B.
1847 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1850 if (preempt_client_init(gt, &hi))
1853 if (preempt_client_init(gt, &lo))
1856 for_each_engine(engine, gt, id) {
1857 struct i915_sched_attr attr = {
1858 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1860 struct igt_live_test t;
1861 struct i915_request *rq;
1862 int ring_size, count, i;
1864 if (!intel_engine_has_preemption(engine))
1867 rq = spinner_create_request(&lo.spin,
1873 i915_request_get(rq);
1874 i915_request_add(rq);
1876 ring_size = rq->wa_tail - rq->head;
1878 ring_size += rq->ring->size;
1879 ring_size = rq->ring->size / ring_size;
1880 pr_debug("%s(%s): Using maximum of %d requests\n",
1881 __func__, engine->name, ring_size);
1883 igt_spinner_end(&lo.spin);
1884 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1885 pr_err("Timed out waiting to flush %s\n", engine->name);
1886 i915_request_put(rq);
1889 i915_request_put(rq);
1891 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1896 for_each_prime_number_from(count, 1, ring_size) {
1897 rq = spinner_create_request(&hi.spin,
1902 i915_request_add(rq);
1903 if (!igt_wait_for_spinner(&hi.spin, rq))
1906 rq = spinner_create_request(&lo.spin,
1911 i915_request_add(rq);
1913 for (i = 0; i < count; i++) {
1914 rq = igt_request_alloc(lo.ctx, engine);
1917 i915_request_add(rq);
1920 rq = igt_request_alloc(hi.ctx, engine);
1924 i915_request_get(rq);
1925 i915_request_add(rq);
1926 engine->schedule(rq, &attr);
1928 igt_spinner_end(&hi.spin);
1929 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1930 struct drm_printer p =
1931 drm_info_printer(gt->i915->drm.dev);
1933 pr_err("Failed to preempt over chain of %d\n",
1935 intel_engine_dump(engine, &p,
1936 "%s\n", engine->name);
1937 i915_request_put(rq);
1940 igt_spinner_end(&lo.spin);
1941 i915_request_put(rq);
1943 rq = igt_request_alloc(lo.ctx, engine);
1947 i915_request_get(rq);
1948 i915_request_add(rq);
1950 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1951 struct drm_printer p =
1952 drm_info_printer(gt->i915->drm.dev);
1954 pr_err("Failed to flush low priority chain of %d requests\n",
1956 intel_engine_dump(engine, &p,
1957 "%s\n", engine->name);
1959 i915_request_put(rq);
1962 i915_request_put(rq);
1965 if (igt_live_test_end(&t)) {
1973 preempt_client_fini(&lo);
1975 preempt_client_fini(&hi);
1979 igt_spinner_end(&hi.spin);
1980 igt_spinner_end(&lo.spin);
1981 intel_gt_set_wedged(gt);
1986 static int create_gang(struct intel_engine_cs *engine,
1987 struct i915_request **prev)
1989 struct drm_i915_gem_object *obj;
1990 struct intel_context *ce;
1991 struct i915_request *rq;
1992 struct i915_vma *vma;
1996 ce = intel_context_create(engine);
2000 obj = i915_gem_object_create_internal(engine->i915, 4096);
2006 vma = i915_vma_instance(obj, ce->vm, NULL);
2012 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2016 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2020 /* Semaphore target: spin until zero */
2021 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2023 *cs++ = MI_SEMAPHORE_WAIT |
2025 MI_SEMAPHORE_SAD_EQ_SDD;
2027 *cs++ = lower_32_bits(vma->node.start);
2028 *cs++ = upper_32_bits(vma->node.start);
2031 u64 offset = (*prev)->batch->node.start;
2033 /* Terminate the spinner in the next lower priority batch. */
2034 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2035 *cs++ = lower_32_bits(offset);
2036 *cs++ = upper_32_bits(offset);
2040 *cs++ = MI_BATCH_BUFFER_END;
2041 i915_gem_object_flush_map(obj);
2042 i915_gem_object_unpin_map(obj);
2044 rq = intel_context_create_request(ce);
2049 i915_request_get(rq);
2052 err = i915_request_await_object(rq, vma->obj, false);
2054 err = i915_vma_move_to_active(vma, rq, 0);
2056 err = rq->engine->emit_bb_start(rq,
2059 i915_vma_unlock(vma);
2060 i915_request_add(rq);
2064 i915_gem_object_put(obj);
2065 intel_context_put(ce);
2067 rq->client_link.next = &(*prev)->client_link;
2072 i915_request_put(rq);
2074 i915_gem_object_put(obj);
2076 intel_context_put(ce);
2080 static int live_preempt_gang(void *arg)
2082 struct intel_gt *gt = arg;
2083 struct intel_engine_cs *engine;
2084 enum intel_engine_id id;
2086 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2090 * Build as long a chain of preempters as we can, with each
2091 * request higher priority than the last. Once we are ready, we release
2092 * the last batch which then precolates down the chain, each releasing
2093 * the next oldest in turn. The intent is to simply push as hard as we
2094 * can with the number of preemptions, trying to exceed narrow HW
2095 * limits. At a minimum, we insist that we can sort all the user
2096 * high priority levels into execution order.
2099 for_each_engine(engine, gt, id) {
2100 struct i915_request *rq = NULL;
2101 struct igt_live_test t;
2102 IGT_TIMEOUT(end_time);
2107 if (!intel_engine_has_preemption(engine))
2110 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2114 struct i915_sched_attr attr = {
2115 .priority = I915_USER_PRIORITY(prio++),
2118 err = create_gang(engine, &rq);
2122 /* Submit each spinner at increasing priority */
2123 engine->schedule(rq, &attr);
2125 if (prio <= I915_PRIORITY_MAX)
2128 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2131 if (__igt_timeout(end_time, NULL))
2134 pr_debug("%s: Preempt chain of %d requests\n",
2135 engine->name, prio);
2138 * Such that the last spinner is the highest priority and
2139 * should execute first. When that spinner completes,
2140 * it will terminate the next lowest spinner until there
2141 * are no more spinners and the gang is complete.
2143 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2146 i915_gem_object_unpin_map(rq->batch->obj);
2149 intel_gt_set_wedged(gt);
2152 while (rq) { /* wait for each rq from highest to lowest prio */
2153 struct i915_request *n =
2154 list_next_entry(rq, client_link);
2156 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2157 struct drm_printer p =
2158 drm_info_printer(engine->i915->drm.dev);
2160 pr_err("Failed to flush chain of %d requests, at %d\n",
2161 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2162 intel_engine_dump(engine, &p,
2163 "%s\n", engine->name);
2168 i915_request_put(rq);
2172 if (igt_live_test_end(&t))
2181 static int live_preempt_hang(void *arg)
2183 struct intel_gt *gt = arg;
2184 struct i915_gem_context *ctx_hi, *ctx_lo;
2185 struct igt_spinner spin_hi, spin_lo;
2186 struct intel_engine_cs *engine;
2187 enum intel_engine_id id;
2190 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2193 if (!intel_has_reset_engine(gt))
2196 if (igt_spinner_init(&spin_hi, gt))
2199 if (igt_spinner_init(&spin_lo, gt))
2202 ctx_hi = kernel_context(gt->i915);
2205 ctx_hi->sched.priority =
2206 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2208 ctx_lo = kernel_context(gt->i915);
2211 ctx_lo->sched.priority =
2212 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2214 for_each_engine(engine, gt, id) {
2215 struct i915_request *rq;
2217 if (!intel_engine_has_preemption(engine))
2220 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2227 i915_request_add(rq);
2228 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2229 GEM_TRACE("lo spinner failed to start\n");
2231 intel_gt_set_wedged(gt);
2236 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
2239 igt_spinner_end(&spin_lo);
2244 init_completion(&engine->execlists.preempt_hang.completion);
2245 engine->execlists.preempt_hang.inject_hang = true;
2247 i915_request_add(rq);
2249 if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion,
2251 pr_err("Preemption did not occur within timeout!");
2253 intel_gt_set_wedged(gt);
2258 set_bit(I915_RESET_ENGINE + id, >->reset.flags);
2259 intel_engine_reset(engine, NULL);
2260 clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
2262 engine->execlists.preempt_hang.inject_hang = false;
2264 if (!igt_wait_for_spinner(&spin_hi, rq)) {
2265 GEM_TRACE("hi spinner failed to start\n");
2267 intel_gt_set_wedged(gt);
2272 igt_spinner_end(&spin_hi);
2273 igt_spinner_end(&spin_lo);
2274 if (igt_flush_test(gt->i915)) {
2282 kernel_context_close(ctx_lo);
2284 kernel_context_close(ctx_hi);
2286 igt_spinner_fini(&spin_lo);
2288 igt_spinner_fini(&spin_hi);
2292 static int live_preempt_timeout(void *arg)
2294 struct intel_gt *gt = arg;
2295 struct i915_gem_context *ctx_hi, *ctx_lo;
2296 struct igt_spinner spin_lo;
2297 struct intel_engine_cs *engine;
2298 enum intel_engine_id id;
2302 * Check that we force preemption to occur by cancelling the previous
2303 * context if it refuses to yield the GPU.
2305 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2308 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2311 if (!intel_has_reset_engine(gt))
2314 if (igt_spinner_init(&spin_lo, gt))
2317 ctx_hi = kernel_context(gt->i915);
2320 ctx_hi->sched.priority =
2321 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2323 ctx_lo = kernel_context(gt->i915);
2326 ctx_lo->sched.priority =
2327 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2329 for_each_engine(engine, gt, id) {
2330 unsigned long saved_timeout;
2331 struct i915_request *rq;
2333 if (!intel_engine_has_preemption(engine))
2336 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2337 MI_NOOP); /* preemption disabled */
2343 i915_request_add(rq);
2344 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2345 intel_gt_set_wedged(gt);
2350 rq = igt_request_alloc(ctx_hi, engine);
2352 igt_spinner_end(&spin_lo);
2357 /* Flush the previous CS ack before changing timeouts */
2358 while (READ_ONCE(engine->execlists.pending[0]))
2361 saved_timeout = engine->props.preempt_timeout_ms;
2362 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
2364 i915_request_get(rq);
2365 i915_request_add(rq);
2367 intel_engine_flush_submission(engine);
2368 engine->props.preempt_timeout_ms = saved_timeout;
2370 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
2371 intel_gt_set_wedged(gt);
2372 i915_request_put(rq);
2377 igt_spinner_end(&spin_lo);
2378 i915_request_put(rq);
2383 kernel_context_close(ctx_lo);
2385 kernel_context_close(ctx_hi);
2387 igt_spinner_fini(&spin_lo);
2391 static int random_range(struct rnd_state *rnd, int min, int max)
2393 return i915_prandom_u32_max_state(max - min, rnd) + min;
2396 static int random_priority(struct rnd_state *rnd)
2398 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
2401 struct preempt_smoke {
2402 struct intel_gt *gt;
2403 struct i915_gem_context **contexts;
2404 struct intel_engine_cs *engine;
2405 struct drm_i915_gem_object *batch;
2406 unsigned int ncontext;
2407 struct rnd_state prng;
2408 unsigned long count;
2411 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
2413 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
2417 static int smoke_submit(struct preempt_smoke *smoke,
2418 struct i915_gem_context *ctx, int prio,
2419 struct drm_i915_gem_object *batch)
2421 struct i915_request *rq;
2422 struct i915_vma *vma = NULL;
2426 struct i915_address_space *vm;
2428 vm = i915_gem_context_get_vm_rcu(ctx);
2429 vma = i915_vma_instance(batch, vm, NULL);
2432 return PTR_ERR(vma);
2434 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2439 ctx->sched.priority = prio;
2441 rq = igt_request_alloc(ctx, smoke->engine);
2449 err = i915_request_await_object(rq, vma->obj, false);
2451 err = i915_vma_move_to_active(vma, rq, 0);
2453 err = rq->engine->emit_bb_start(rq,
2456 i915_vma_unlock(vma);
2459 i915_request_add(rq);
2463 i915_vma_unpin(vma);
2468 static int smoke_crescendo_thread(void *arg)
2470 struct preempt_smoke *smoke = arg;
2471 IGT_TIMEOUT(end_time);
2472 unsigned long count;
2476 struct i915_gem_context *ctx = smoke_context(smoke);
2479 err = smoke_submit(smoke,
2480 ctx, count % I915_PRIORITY_MAX,
2486 } while (!__igt_timeout(end_time, NULL));
2488 smoke->count = count;
2492 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
2493 #define BATCH BIT(0)
2495 struct task_struct *tsk[I915_NUM_ENGINES] = {};
2496 struct preempt_smoke arg[I915_NUM_ENGINES];
2497 struct intel_engine_cs *engine;
2498 enum intel_engine_id id;
2499 unsigned long count;
2502 for_each_engine(engine, smoke->gt, id) {
2504 arg[id].engine = engine;
2505 if (!(flags & BATCH))
2506 arg[id].batch = NULL;
2509 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
2510 "igt/smoke:%d", id);
2511 if (IS_ERR(tsk[id])) {
2512 err = PTR_ERR(tsk[id]);
2515 get_task_struct(tsk[id]);
2518 yield(); /* start all threads before we kthread_stop() */
2521 for_each_engine(engine, smoke->gt, id) {
2524 if (IS_ERR_OR_NULL(tsk[id]))
2527 status = kthread_stop(tsk[id]);
2531 count += arg[id].count;
2533 put_task_struct(tsk[id]);
2536 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
2538 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2542 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
2544 enum intel_engine_id id;
2545 IGT_TIMEOUT(end_time);
2546 unsigned long count;
2550 for_each_engine(smoke->engine, smoke->gt, id) {
2551 struct i915_gem_context *ctx = smoke_context(smoke);
2554 err = smoke_submit(smoke,
2555 ctx, random_priority(&smoke->prng),
2556 flags & BATCH ? smoke->batch : NULL);
2562 } while (!__igt_timeout(end_time, NULL));
2564 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
2566 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2570 static int live_preempt_smoke(void *arg)
2572 struct preempt_smoke smoke = {
2574 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
2577 const unsigned int phase[] = { 0, BATCH };
2578 struct igt_live_test t;
2583 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
2586 smoke.contexts = kmalloc_array(smoke.ncontext,
2587 sizeof(*smoke.contexts),
2589 if (!smoke.contexts)
2593 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
2594 if (IS_ERR(smoke.batch)) {
2595 err = PTR_ERR(smoke.batch);
2599 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
2604 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
2605 cs[n] = MI_ARB_CHECK;
2606 cs[n] = MI_BATCH_BUFFER_END;
2607 i915_gem_object_flush_map(smoke.batch);
2608 i915_gem_object_unpin_map(smoke.batch);
2610 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
2615 for (n = 0; n < smoke.ncontext; n++) {
2616 smoke.contexts[n] = kernel_context(smoke.gt->i915);
2617 if (!smoke.contexts[n])
2621 for (n = 0; n < ARRAY_SIZE(phase); n++) {
2622 err = smoke_crescendo(&smoke, phase[n]);
2626 err = smoke_random(&smoke, phase[n]);
2632 if (igt_live_test_end(&t))
2635 for (n = 0; n < smoke.ncontext; n++) {
2636 if (!smoke.contexts[n])
2638 kernel_context_close(smoke.contexts[n]);
2642 i915_gem_object_put(smoke.batch);
2644 kfree(smoke.contexts);
2649 static int nop_virtual_engine(struct intel_gt *gt,
2650 struct intel_engine_cs **siblings,
2651 unsigned int nsibling,
2654 #define CHAIN BIT(0)
2656 IGT_TIMEOUT(end_time);
2657 struct i915_request *request[16] = {};
2658 struct intel_context *ve[16];
2659 unsigned long n, prime, nc;
2660 struct igt_live_test t;
2661 ktime_t times[2] = {};
2664 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
2666 for (n = 0; n < nctx; n++) {
2667 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
2668 if (IS_ERR(ve[n])) {
2669 err = PTR_ERR(ve[n]);
2674 err = intel_context_pin(ve[n]);
2676 intel_context_put(ve[n]);
2682 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
2686 for_each_prime_number_from(prime, 1, 8192) {
2687 times[1] = ktime_get_raw();
2689 if (flags & CHAIN) {
2690 for (nc = 0; nc < nctx; nc++) {
2691 for (n = 0; n < prime; n++) {
2692 struct i915_request *rq;
2694 rq = i915_request_create(ve[nc]);
2701 i915_request_put(request[nc]);
2702 request[nc] = i915_request_get(rq);
2703 i915_request_add(rq);
2707 for (n = 0; n < prime; n++) {
2708 for (nc = 0; nc < nctx; nc++) {
2709 struct i915_request *rq;
2711 rq = i915_request_create(ve[nc]);
2718 i915_request_put(request[nc]);
2719 request[nc] = i915_request_get(rq);
2720 i915_request_add(rq);
2725 for (nc = 0; nc < nctx; nc++) {
2726 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
2727 pr_err("%s(%s): wait for %llx:%lld timed out\n",
2728 __func__, ve[0]->engine->name,
2729 request[nc]->fence.context,
2730 request[nc]->fence.seqno);
2732 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2733 __func__, ve[0]->engine->name,
2734 request[nc]->fence.context,
2735 request[nc]->fence.seqno);
2737 intel_gt_set_wedged(gt);
2742 times[1] = ktime_sub(ktime_get_raw(), times[1]);
2744 times[0] = times[1];
2746 for (nc = 0; nc < nctx; nc++) {
2747 i915_request_put(request[nc]);
2751 if (__igt_timeout(end_time, NULL))
2755 err = igt_live_test_end(&t);
2759 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
2760 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
2761 prime, div64_u64(ktime_to_ns(times[1]), prime));
2764 if (igt_flush_test(gt->i915))
2767 for (nc = 0; nc < nctx; nc++) {
2768 i915_request_put(request[nc]);
2769 intel_context_unpin(ve[nc]);
2770 intel_context_put(ve[nc]);
2775 static int live_virtual_engine(void *arg)
2777 struct intel_gt *gt = arg;
2778 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
2779 struct intel_engine_cs *engine;
2780 enum intel_engine_id id;
2781 unsigned int class, inst;
2784 if (USES_GUC_SUBMISSION(gt->i915))
2787 for_each_engine(engine, gt, id) {
2788 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
2790 pr_err("Failed to wrap engine %s: err=%d\n",
2796 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
2800 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
2801 if (!gt->engine_class[class][inst])
2804 siblings[nsibling++] = gt->engine_class[class][inst];
2809 for (n = 1; n <= nsibling + 1; n++) {
2810 err = nop_virtual_engine(gt, siblings, nsibling,
2816 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
2824 static int mask_virtual_engine(struct intel_gt *gt,
2825 struct intel_engine_cs **siblings,
2826 unsigned int nsibling)
2828 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
2829 struct intel_context *ve;
2830 struct igt_live_test t;
2835 * Check that by setting the execution mask on a request, we can
2836 * restrict it to our desired engine within the virtual engine.
2839 ve = intel_execlists_create_virtual(siblings, nsibling);
2845 err = intel_context_pin(ve);
2849 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
2853 for (n = 0; n < nsibling; n++) {
2854 request[n] = i915_request_create(ve);
2855 if (IS_ERR(request[n])) {
2856 err = PTR_ERR(request[n]);
2861 /* Reverse order as it's more likely to be unnatural */
2862 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
2864 i915_request_get(request[n]);
2865 i915_request_add(request[n]);
2868 for (n = 0; n < nsibling; n++) {
2869 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
2870 pr_err("%s(%s): wait for %llx:%lld timed out\n",
2871 __func__, ve->engine->name,
2872 request[n]->fence.context,
2873 request[n]->fence.seqno);
2875 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2876 __func__, ve->engine->name,
2877 request[n]->fence.context,
2878 request[n]->fence.seqno);
2880 intel_gt_set_wedged(gt);
2885 if (request[n]->engine != siblings[nsibling - n - 1]) {
2886 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
2887 request[n]->engine->name,
2888 siblings[nsibling - n - 1]->name);
2894 err = igt_live_test_end(&t);
2896 if (igt_flush_test(gt->i915))
2899 for (n = 0; n < nsibling; n++)
2900 i915_request_put(request[n]);
2903 intel_context_unpin(ve);
2905 intel_context_put(ve);
2910 static int live_virtual_mask(void *arg)
2912 struct intel_gt *gt = arg;
2913 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
2914 unsigned int class, inst;
2917 if (USES_GUC_SUBMISSION(gt->i915))
2920 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
2921 unsigned int nsibling;
2924 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
2925 if (!gt->engine_class[class][inst])
2928 siblings[nsibling++] = gt->engine_class[class][inst];
2933 err = mask_virtual_engine(gt, siblings, nsibling);
2941 static int preserved_virtual_engine(struct intel_gt *gt,
2942 struct intel_engine_cs **siblings,
2943 unsigned int nsibling)
2945 struct i915_request *last = NULL;
2946 struct intel_context *ve;
2947 struct i915_vma *scratch;
2948 struct igt_live_test t;
2953 scratch = create_scratch(siblings[0]->gt);
2954 if (IS_ERR(scratch))
2955 return PTR_ERR(scratch);
2957 ve = intel_execlists_create_virtual(siblings, nsibling);
2963 err = intel_context_pin(ve);
2967 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
2971 for (n = 0; n < NUM_GPR_DW; n++) {
2972 struct intel_engine_cs *engine = siblings[n % nsibling];
2973 struct i915_request *rq;
2975 rq = i915_request_create(ve);
2981 i915_request_put(last);
2982 last = i915_request_get(rq);
2984 cs = intel_ring_begin(rq, 8);
2986 i915_request_add(rq);
2991 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
2992 *cs++ = CS_GPR(engine, n);
2993 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
2996 *cs++ = MI_LOAD_REGISTER_IMM(1);
2997 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3001 intel_ring_advance(rq, cs);
3003 /* Restrict this request to run on a particular engine */
3004 rq->execution_mask = engine->mask;
3005 i915_request_add(rq);
3008 if (i915_request_wait(last, 0, HZ / 5) < 0) {
3013 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3019 for (n = 0; n < NUM_GPR_DW; n++) {
3021 pr_err("Incorrect value[%d] found for GPR[%d]\n",
3028 i915_gem_object_unpin_map(scratch->obj);
3031 if (igt_live_test_end(&t))
3033 i915_request_put(last);
3035 intel_context_unpin(ve);
3037 intel_context_put(ve);
3039 i915_vma_unpin_and_release(&scratch, 0);
3043 static int live_virtual_preserved(void *arg)
3045 struct intel_gt *gt = arg;
3046 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3047 unsigned int class, inst;
3050 * Check that the context image retains non-privileged (user) registers
3051 * from one engine to the next. For this we check that the CS_GPR
3055 if (USES_GUC_SUBMISSION(gt->i915))
3058 /* As we use CS_GPR we cannot run before they existed on all engines. */
3059 if (INTEL_GEN(gt->i915) < 9)
3062 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3066 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3067 if (!gt->engine_class[class][inst])
3070 siblings[nsibling++] = gt->engine_class[class][inst];
3075 err = preserved_virtual_engine(gt, siblings, nsibling);
3083 static int bond_virtual_engine(struct intel_gt *gt,
3085 struct intel_engine_cs **siblings,
3086 unsigned int nsibling,
3088 #define BOND_SCHEDULE BIT(0)
3090 struct intel_engine_cs *master;
3091 struct i915_request *rq[16];
3092 enum intel_engine_id id;
3093 struct igt_spinner spin;
3098 * A set of bonded requests is intended to be run concurrently
3099 * across a number of engines. We use one request per-engine
3100 * and a magic fence to schedule each of the bonded requests
3101 * at the same time. A consequence of our current scheduler is that
3102 * we only move requests to the HW ready queue when the request
3103 * becomes ready, that is when all of its prerequisite fences have
3104 * been signaled. As one of those fences is the master submit fence,
3105 * there is a delay on all secondary fences as the HW may be
3106 * currently busy. Equally, as all the requests are independent,
3107 * they may have other fences that delay individual request
3108 * submission to HW. Ergo, we do not guarantee that all requests are
3109 * immediately submitted to HW at the same time, just that if the
3110 * rules are abided by, they are ready at the same time as the
3111 * first is submitted. Userspace can embed semaphores in its batch
3112 * to ensure parallel execution of its phases as it requires.
3113 * Though naturally it gets requested that perhaps the scheduler should
3114 * take care of parallel execution, even across preemption events on
3115 * different HW. (The proper answer is of course "lalalala".)
3117 * With the submit-fence, we have identified three possible phases
3118 * of synchronisation depending on the master fence: queued (not
3119 * ready), executing, and signaled. The first two are quite simple
3120 * and checked below. However, the signaled master fence handling is
3121 * contentious. Currently we do not distinguish between a signaled
3122 * fence and an expired fence, as once signaled it does not convey
3123 * any information about the previous execution. It may even be freed
3124 * and hence checking later it may not exist at all. Ergo we currently
3125 * do not apply the bonding constraint for an already signaled fence,
3126 * as our expectation is that it should not constrain the secondaries
3127 * and is outside of the scope of the bonded request API (i.e. all
3128 * userspace requests are meant to be running in parallel). As
3129 * it imposes no constraint, and is effectively a no-op, we do not
3130 * check below as normal execution flows are checked extensively above.
3132 * XXX Is the degenerate handling of signaled submit fences the
3133 * expected behaviour for userpace?
3136 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3138 if (igt_spinner_init(&spin, gt))
3142 rq[0] = ERR_PTR(-ENOMEM);
3143 for_each_engine(master, gt, id) {
3144 struct i915_sw_fence fence = {};
3146 if (master->class == class)
3149 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3151 rq[0] = igt_spinner_create_request(&spin,
3152 master->kernel_context,
3154 if (IS_ERR(rq[0])) {
3155 err = PTR_ERR(rq[0]);
3158 i915_request_get(rq[0]);
3160 if (flags & BOND_SCHEDULE) {
3161 onstack_fence_init(&fence);
3162 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3167 i915_request_add(rq[0]);
3171 if (!(flags & BOND_SCHEDULE) &&
3172 !igt_wait_for_spinner(&spin, rq[0])) {
3177 for (n = 0; n < nsibling; n++) {
3178 struct intel_context *ve;
3180 ve = intel_execlists_create_virtual(siblings, nsibling);
3183 onstack_fence_fini(&fence);
3187 err = intel_virtual_engine_attach_bond(ve->engine,
3191 intel_context_put(ve);
3192 onstack_fence_fini(&fence);
3196 err = intel_context_pin(ve);
3197 intel_context_put(ve);
3199 onstack_fence_fini(&fence);
3203 rq[n + 1] = i915_request_create(ve);
3204 intel_context_unpin(ve);
3205 if (IS_ERR(rq[n + 1])) {
3206 err = PTR_ERR(rq[n + 1]);
3207 onstack_fence_fini(&fence);
3210 i915_request_get(rq[n + 1]);
3212 err = i915_request_await_execution(rq[n + 1],
3214 ve->engine->bond_execute);
3215 i915_request_add(rq[n + 1]);
3217 onstack_fence_fini(&fence);
3221 onstack_fence_fini(&fence);
3222 intel_engine_flush_submission(master);
3223 igt_spinner_end(&spin);
3225 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
3226 pr_err("Master request did not execute (on %s)!\n",
3227 rq[0]->engine->name);
3232 for (n = 0; n < nsibling; n++) {
3233 if (i915_request_wait(rq[n + 1], 0,
3234 MAX_SCHEDULE_TIMEOUT) < 0) {
3239 if (rq[n + 1]->engine != siblings[n]) {
3240 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3242 rq[n + 1]->engine->name,
3243 rq[0]->engine->name);
3249 for (n = 0; !IS_ERR(rq[n]); n++)
3250 i915_request_put(rq[n]);
3251 rq[0] = ERR_PTR(-ENOMEM);
3255 for (n = 0; !IS_ERR(rq[n]); n++)
3256 i915_request_put(rq[n]);
3257 if (igt_flush_test(gt->i915))
3260 igt_spinner_fini(&spin);
3264 static int live_virtual_bond(void *arg)
3266 static const struct phase {
3271 { "schedule", BOND_SCHEDULE },
3274 struct intel_gt *gt = arg;
3275 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3276 unsigned int class, inst;
3279 if (USES_GUC_SUBMISSION(gt->i915))
3282 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3283 const struct phase *p;
3287 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3288 if (!gt->engine_class[class][inst])
3291 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
3292 siblings[nsibling++] = gt->engine_class[class][inst];
3297 for (p = phases; p->name; p++) {
3298 err = bond_virtual_engine(gt,
3299 class, siblings, nsibling,
3302 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3303 __func__, p->name, class, nsibling, err);
3312 int intel_execlists_live_selftests(struct drm_i915_private *i915)
3314 static const struct i915_subtest tests[] = {
3315 SUBTEST(live_sanitycheck),
3316 SUBTEST(live_unlite_switch),
3317 SUBTEST(live_unlite_preempt),
3318 SUBTEST(live_timeslice_preempt),
3319 SUBTEST(live_timeslice_queue),
3320 SUBTEST(live_busywait_preempt),
3321 SUBTEST(live_preempt),
3322 SUBTEST(live_late_preempt),
3323 SUBTEST(live_nopreempt),
3324 SUBTEST(live_preempt_cancel),
3325 SUBTEST(live_suppress_self_preempt),
3326 SUBTEST(live_suppress_wait_preempt),
3327 SUBTEST(live_chain_preempt),
3328 SUBTEST(live_preempt_gang),
3329 SUBTEST(live_preempt_hang),
3330 SUBTEST(live_preempt_timeout),
3331 SUBTEST(live_preempt_smoke),
3332 SUBTEST(live_virtual_engine),
3333 SUBTEST(live_virtual_mask),
3334 SUBTEST(live_virtual_preserved),
3335 SUBTEST(live_virtual_bond),
3338 if (!HAS_EXECLISTS(i915))
3341 if (intel_gt_is_wedged(&i915->gt))
3344 return intel_gt_live_subtests(tests, &i915->gt);
3347 static void hexdump(const void *buf, size_t len)
3349 const size_t rowsize = 8 * sizeof(u32);
3350 const void *prev = NULL;
3354 for (pos = 0; pos < len; pos += rowsize) {
3357 if (prev && !memcmp(prev, buf + pos, rowsize)) {
3365 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
3366 rowsize, sizeof(u32),
3368 false) >= sizeof(line));
3369 pr_info("[%04zx] %s\n", pos, line);
3376 static int live_lrc_layout(void *arg)
3378 struct intel_gt *gt = arg;
3379 struct intel_engine_cs *engine;
3380 enum intel_engine_id id;
3385 * Check the registers offsets we use to create the initial reg state
3386 * match the layout saved by HW.
3389 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
3394 for_each_engine(engine, gt, id) {
3398 if (!engine->default_state)
3401 hw = i915_gem_object_pin_map(engine->default_state,
3407 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3409 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
3410 engine->kernel_context,
3412 engine->kernel_context->ring,
3425 pr_debug("%s: skipped instruction %x at dword %d\n",
3426 engine->name, lri, dw);
3431 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
3432 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
3433 engine->name, dw, lri);
3438 if (lrc[dw] != lri) {
3439 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
3440 engine->name, dw, lri, lrc[dw]);
3450 if (hw[dw] != lrc[dw]) {
3451 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
3452 engine->name, dw, hw[dw], lrc[dw]);
3458 * Skip over the actual register value as we
3459 * expect that to differ.
3464 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
3467 pr_info("%s: HW register image:\n", engine->name);
3468 hexdump(hw, PAGE_SIZE);
3470 pr_info("%s: SW register image:\n", engine->name);
3471 hexdump(lrc, PAGE_SIZE);
3474 i915_gem_object_unpin_map(engine->default_state);
3483 static int find_offset(const u32 *lri, u32 offset)
3487 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
3488 if (lri[i] == offset)
3494 static int live_lrc_fixed(void *arg)
3496 struct intel_gt *gt = arg;
3497 struct intel_engine_cs *engine;
3498 enum intel_engine_id id;
3502 * Check the assumed register offsets match the actual locations in
3503 * the context image.
3506 for_each_engine(engine, gt, id) {
3513 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
3518 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
3523 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
3528 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
3533 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
3534 lrc_ring_mi_mode(engine),
3538 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
3546 if (!engine->default_state)
3549 hw = i915_gem_object_pin_map(engine->default_state,
3555 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3557 for (t = tbl; t->name; t++) {
3558 int dw = find_offset(hw, t->reg);
3560 if (dw != t->offset) {
3561 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
3571 i915_gem_object_unpin_map(engine->default_state);
3577 static int __live_lrc_state(struct intel_engine_cs *engine,
3578 struct i915_vma *scratch)
3580 struct intel_context *ce;
3581 struct i915_request *rq;
3587 u32 expected[MAX_IDX];
3592 ce = intel_context_create(engine);
3596 err = intel_context_pin(ce);
3600 rq = i915_request_create(ce);
3606 cs = intel_ring_begin(rq, 4 * MAX_IDX);
3609 i915_request_add(rq);
3613 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3614 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
3615 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
3618 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
3620 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3621 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
3622 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
3625 i915_request_get(rq);
3626 i915_request_add(rq);
3628 intel_engine_flush_submission(engine);
3629 expected[RING_TAIL_IDX] = ce->ring->tail;
3631 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3636 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3642 for (n = 0; n < MAX_IDX; n++) {
3643 if (cs[n] != expected[n]) {
3644 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
3645 engine->name, n, cs[n], expected[n]);
3651 i915_gem_object_unpin_map(scratch->obj);
3654 i915_request_put(rq);
3656 intel_context_unpin(ce);
3658 intel_context_put(ce);
3662 static int live_lrc_state(void *arg)
3664 struct intel_gt *gt = arg;
3665 struct intel_engine_cs *engine;
3666 struct i915_vma *scratch;
3667 enum intel_engine_id id;
3671 * Check the live register state matches what we expect for this
3675 scratch = create_scratch(gt);
3676 if (IS_ERR(scratch))
3677 return PTR_ERR(scratch);
3679 for_each_engine(engine, gt, id) {
3680 err = __live_lrc_state(engine, scratch);
3685 if (igt_flush_test(gt->i915))
3688 i915_vma_unpin_and_release(&scratch, 0);
3692 static int gpr_make_dirty(struct intel_engine_cs *engine)
3694 struct i915_request *rq;
3698 rq = intel_engine_create_kernel_request(engine);
3702 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
3704 i915_request_add(rq);
3708 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
3709 for (n = 0; n < NUM_GPR_DW; n++) {
3710 *cs++ = CS_GPR(engine, n);
3711 *cs++ = STACK_MAGIC;
3715 intel_ring_advance(rq, cs);
3716 i915_request_add(rq);
3721 static int __live_gpr_clear(struct intel_engine_cs *engine,
3722 struct i915_vma *scratch)
3724 struct intel_context *ce;
3725 struct i915_request *rq;
3730 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
3731 return 0; /* GPR only on rcs0 for gen8 */
3733 err = gpr_make_dirty(engine);
3737 ce = intel_context_create(engine);
3741 rq = intel_context_create_request(ce);
3747 cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
3750 i915_request_add(rq);
3754 for (n = 0; n < NUM_GPR_DW; n++) {
3755 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3756 *cs++ = CS_GPR(engine, n);
3757 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3761 i915_request_get(rq);
3762 i915_request_add(rq);
3764 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3769 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3775 for (n = 0; n < NUM_GPR_DW; n++) {
3777 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
3779 n / 2, n & 1 ? "udw" : "ldw",
3786 i915_gem_object_unpin_map(scratch->obj);
3789 i915_request_put(rq);
3791 intel_context_put(ce);
3795 static int live_gpr_clear(void *arg)
3797 struct intel_gt *gt = arg;
3798 struct intel_engine_cs *engine;
3799 struct i915_vma *scratch;
3800 enum intel_engine_id id;
3804 * Check that GPR registers are cleared in new contexts as we need
3805 * to avoid leaking any information from previous contexts.
3808 scratch = create_scratch(gt);
3809 if (IS_ERR(scratch))
3810 return PTR_ERR(scratch);
3812 for_each_engine(engine, gt, id) {
3813 err = __live_gpr_clear(engine, scratch);
3818 if (igt_flush_test(gt->i915))
3821 i915_vma_unpin_and_release(&scratch, 0);
3825 int intel_lrc_live_selftests(struct drm_i915_private *i915)
3827 static const struct i915_subtest tests[] = {
3828 SUBTEST(live_lrc_layout),
3829 SUBTEST(live_lrc_fixed),
3830 SUBTEST(live_lrc_state),
3831 SUBTEST(live_gpr_clear),
3834 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
3837 return intel_gt_live_subtests(tests, &i915->gt);