2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include <linux/prime_numbers.h>
27 #include "gem/i915_gem_pm.h"
28 #include "gem/selftests/mock_context.h"
30 #include "gt/intel_engine_pm.h"
31 #include "gt/intel_gt.h"
33 #include "i915_random.h"
34 #include "i915_selftest.h"
35 #include "igt_live_test.h"
36 #include "igt_spinner.h"
37 #include "lib_sw_fence.h"
40 #include "mock_gem_device.h"
42 static unsigned int num_uabi_engines(struct drm_i915_private *i915)
44 struct intel_engine_cs *engine;
48 for_each_uabi_engine(engine, i915)
54 static int igt_add_request(void *arg)
56 struct drm_i915_private *i915 = arg;
57 struct i915_request *request;
59 /* Basic preliminary test to create a request and let it loose! */
61 request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10);
65 i915_request_add(request);
70 static int igt_wait_request(void *arg)
72 const long T = HZ / 4;
73 struct drm_i915_private *i915 = arg;
74 struct i915_request *request;
77 /* Submit a request, then wait upon it */
79 request = mock_request(i915->engine[RCS0]->kernel_context, T);
83 i915_request_get(request);
85 if (i915_request_wait(request, 0, 0) != -ETIME) {
86 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
90 if (i915_request_wait(request, 0, T) != -ETIME) {
91 pr_err("request wait succeeded (expected timeout before submit!)\n");
95 if (i915_request_completed(request)) {
96 pr_err("request completed before submit!!\n");
100 i915_request_add(request);
102 if (i915_request_wait(request, 0, 0) != -ETIME) {
103 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
107 if (i915_request_completed(request)) {
108 pr_err("request completed immediately!\n");
112 if (i915_request_wait(request, 0, T / 2) != -ETIME) {
113 pr_err("request wait succeeded (expected timeout!)\n");
117 if (i915_request_wait(request, 0, T) == -ETIME) {
118 pr_err("request wait timed out!\n");
122 if (!i915_request_completed(request)) {
123 pr_err("request not complete after waiting!\n");
127 if (i915_request_wait(request, 0, T) == -ETIME) {
128 pr_err("request wait timed out when already complete!\n");
134 i915_request_put(request);
135 mock_device_flush(i915);
139 static int igt_fence_wait(void *arg)
141 const long T = HZ / 4;
142 struct drm_i915_private *i915 = arg;
143 struct i915_request *request;
146 /* Submit a request, treat it as a fence and wait upon it */
148 request = mock_request(i915->engine[RCS0]->kernel_context, T);
152 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
153 pr_err("fence wait success before submit (expected timeout)!\n");
157 i915_request_add(request);
159 if (dma_fence_is_signaled(&request->fence)) {
160 pr_err("fence signaled immediately!\n");
164 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
165 pr_err("fence wait success after submit (expected timeout)!\n");
169 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
170 pr_err("fence wait timed out (expected success)!\n");
174 if (!dma_fence_is_signaled(&request->fence)) {
175 pr_err("fence unsignaled after waiting!\n");
179 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
180 pr_err("fence wait timed out when complete (expected success)!\n");
186 mock_device_flush(i915);
190 static int igt_request_rewind(void *arg)
192 struct drm_i915_private *i915 = arg;
193 struct i915_request *request, *vip;
194 struct i915_gem_context *ctx[2];
195 struct intel_context *ce;
198 ctx[0] = mock_context(i915, "A");
200 ce = i915_gem_context_get_engine(ctx[0], RCS0);
201 GEM_BUG_ON(IS_ERR(ce));
202 request = mock_request(ce, 2 * HZ);
203 intel_context_put(ce);
209 i915_request_get(request);
210 i915_request_add(request);
212 ctx[1] = mock_context(i915, "B");
214 ce = i915_gem_context_get_engine(ctx[1], RCS0);
215 GEM_BUG_ON(IS_ERR(ce));
216 vip = mock_request(ce, 0);
217 intel_context_put(ce);
223 /* Simulate preemption by manual reordering */
224 if (!mock_cancel_request(request)) {
225 pr_err("failed to cancel request (already executed)!\n");
226 i915_request_add(vip);
229 i915_request_get(vip);
230 i915_request_add(vip);
232 request->engine->submit_request(request);
236 if (i915_request_wait(vip, 0, HZ) == -ETIME) {
237 pr_err("timed out waiting for high priority request\n");
241 if (i915_request_completed(request)) {
242 pr_err("low priority request already completed\n");
248 i915_request_put(vip);
250 mock_context_close(ctx[1]);
251 i915_request_put(request);
253 mock_context_close(ctx[0]);
254 mock_device_flush(i915);
259 struct intel_engine_cs *engine;
260 struct i915_gem_context **contexts;
261 atomic_long_t num_waits, num_fences;
262 int ncontexts, max_batch;
263 struct i915_request *(*request_alloc)(struct intel_context *ce);
266 static struct i915_request *
267 __mock_request_alloc(struct intel_context *ce)
269 return mock_request(ce, 0);
272 static struct i915_request *
273 __live_request_alloc(struct intel_context *ce)
275 return intel_context_create_request(ce);
278 static int __igt_breadcrumbs_smoketest(void *arg)
280 struct smoketest *t = arg;
281 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
282 const unsigned int total = 4 * t->ncontexts + 1;
283 unsigned int num_waits = 0, num_fences = 0;
284 struct i915_request **requests;
285 I915_RND_STATE(prng);
290 * A very simple test to catch the most egregious of list handling bugs.
292 * At its heart, we simply create oodles of requests running across
293 * multiple kthreads and enable signaling on them, for the sole purpose
294 * of stressing our breadcrumb handling. The only inspection we do is
295 * that the fences were marked as signaled.
298 requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
302 order = i915_random_order(total, &prng);
308 while (!kthread_should_stop()) {
309 struct i915_sw_fence *submit, *wait;
310 unsigned int n, count;
312 submit = heap_fence_create(GFP_KERNEL);
318 wait = heap_fence_create(GFP_KERNEL);
320 i915_sw_fence_commit(submit);
321 heap_fence_put(submit);
326 i915_random_reorder(order, total, &prng);
327 count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
329 for (n = 0; n < count; n++) {
330 struct i915_gem_context *ctx =
331 t->contexts[order[n] % t->ncontexts];
332 struct i915_request *rq;
333 struct intel_context *ce;
335 ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
336 GEM_BUG_ON(IS_ERR(ce));
337 rq = t->request_alloc(ce);
338 intel_context_put(ce);
345 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
349 requests[n] = i915_request_get(rq);
350 i915_request_add(rq);
353 err = i915_sw_fence_await_dma_fence(wait,
359 i915_request_put(rq);
365 i915_sw_fence_commit(submit);
366 i915_sw_fence_commit(wait);
368 if (!wait_event_timeout(wait->wait,
369 i915_sw_fence_done(wait),
371 struct i915_request *rq = requests[count - 1];
373 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
374 atomic_read(&wait->pending), count,
375 rq->fence.context, rq->fence.seqno,
379 intel_gt_set_wedged(t->engine->gt);
380 GEM_BUG_ON(!i915_request_completed(rq));
381 i915_sw_fence_wait(wait);
385 for (n = 0; n < count; n++) {
386 struct i915_request *rq = requests[n];
388 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
390 pr_err("%llu:%llu was not signaled!\n",
391 rq->fence.context, rq->fence.seqno);
395 i915_request_put(rq);
398 heap_fence_put(wait);
399 heap_fence_put(submit);
410 atomic_long_add(num_fences, &t->num_fences);
411 atomic_long_add(num_waits, &t->num_waits);
419 static int mock_breadcrumbs_smoketest(void *arg)
421 struct drm_i915_private *i915 = arg;
422 struct smoketest t = {
423 .engine = i915->engine[RCS0],
426 .request_alloc = __mock_request_alloc
428 unsigned int ncpus = num_online_cpus();
429 struct task_struct **threads;
434 * Smoketest our breadcrumb/signal handling for requests across multiple
435 * threads. A very simple test to only catch the most egregious of bugs.
436 * See __igt_breadcrumbs_smoketest();
439 threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL);
443 t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
449 for (n = 0; n < t.ncontexts; n++) {
450 t.contexts[n] = mock_context(t.engine->i915, "mock");
451 if (!t.contexts[n]) {
457 for (n = 0; n < ncpus; n++) {
458 threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
460 if (IS_ERR(threads[n])) {
461 ret = PTR_ERR(threads[n]);
466 get_task_struct(threads[n]);
469 yield(); /* start all threads before we begin */
470 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
472 for (n = 0; n < ncpus; n++) {
475 err = kthread_stop(threads[n]);
479 put_task_struct(threads[n]);
481 pr_info("Completed %lu waits for %lu fence across %d cpus\n",
482 atomic_long_read(&t.num_waits),
483 atomic_long_read(&t.num_fences),
487 for (n = 0; n < t.ncontexts; n++) {
490 mock_context_close(t.contexts[n]);
498 int i915_request_mock_selftests(void)
500 static const struct i915_subtest tests[] = {
501 SUBTEST(igt_add_request),
502 SUBTEST(igt_wait_request),
503 SUBTEST(igt_fence_wait),
504 SUBTEST(igt_request_rewind),
505 SUBTEST(mock_breadcrumbs_smoketest),
507 struct drm_i915_private *i915;
508 intel_wakeref_t wakeref;
511 i915 = mock_gem_device();
515 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
516 err = i915_subtests(tests, i915);
518 drm_dev_put(&i915->drm);
523 static int live_nop_request(void *arg)
525 struct drm_i915_private *i915 = arg;
526 struct intel_engine_cs *engine;
527 struct igt_live_test t;
531 * Submit various sized batches of empty requests, to each engine
532 * (individually), and wait for the batch to complete. We can check
533 * the overhead of submitting requests to the hardware.
536 for_each_uabi_engine(engine, i915) {
537 unsigned long n, prime;
538 IGT_TIMEOUT(end_time);
539 ktime_t times[2] = {};
541 err = igt_live_test_begin(&t, i915, __func__, engine->name);
545 intel_engine_pm_get(engine);
546 for_each_prime_number_from(prime, 1, 8192) {
547 struct i915_request *request = NULL;
549 times[1] = ktime_get_raw();
551 for (n = 0; n < prime; n++) {
552 i915_request_put(request);
553 request = i915_request_create(engine->kernel_context);
555 return PTR_ERR(request);
558 * This space is left intentionally blank.
560 * We do not actually want to perform any
561 * action with this request, we just want
562 * to measure the latency in allocation
563 * and submission of our breadcrumbs -
564 * ensuring that the bare request is sufficient
565 * for the system to work (i.e. proper HEAD
566 * tracking of the rings, interrupt handling,
567 * etc). It also gives us the lowest bounds
571 i915_request_get(request);
572 i915_request_add(request);
574 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
575 i915_request_put(request);
577 times[1] = ktime_sub(ktime_get_raw(), times[1]);
581 if (__igt_timeout(end_time, NULL))
584 intel_engine_pm_put(engine);
586 err = igt_live_test_end(&t);
590 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
592 ktime_to_ns(times[0]),
593 prime, div64_u64(ktime_to_ns(times[1]), prime));
599 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
601 struct drm_i915_gem_object *obj;
602 struct i915_vma *vma;
606 obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
608 return ERR_CAST(obj);
610 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
616 *cmd = MI_BATCH_BUFFER_END;
618 __i915_gem_object_flush_map(obj, 0, 64);
619 i915_gem_object_unpin_map(obj);
621 intel_gt_chipset_flush(&i915->gt);
623 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
629 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
633 /* Force the wait wait now to avoid including it in the benchmark */
634 err = i915_vma_sync(vma);
643 i915_gem_object_put(obj);
647 static struct i915_request *
648 empty_request(struct intel_engine_cs *engine,
649 struct i915_vma *batch)
651 struct i915_request *request;
654 request = i915_request_create(engine->kernel_context);
658 err = engine->emit_bb_start(request,
661 I915_DISPATCH_SECURE);
665 i915_request_get(request);
667 i915_request_add(request);
668 return err ? ERR_PTR(err) : request;
671 static int live_empty_request(void *arg)
673 struct drm_i915_private *i915 = arg;
674 struct intel_engine_cs *engine;
675 struct igt_live_test t;
676 struct i915_vma *batch;
680 * Submit various sized batches of empty requests, to each engine
681 * (individually), and wait for the batch to complete. We can check
682 * the overhead of submitting requests to the hardware.
685 batch = empty_batch(i915);
687 return PTR_ERR(batch);
689 for_each_uabi_engine(engine, i915) {
690 IGT_TIMEOUT(end_time);
691 struct i915_request *request;
692 unsigned long n, prime;
693 ktime_t times[2] = {};
695 err = igt_live_test_begin(&t, i915, __func__, engine->name);
699 intel_engine_pm_get(engine);
701 /* Warmup / preload */
702 request = empty_request(engine, batch);
703 if (IS_ERR(request)) {
704 err = PTR_ERR(request);
705 intel_engine_pm_put(engine);
708 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
710 for_each_prime_number_from(prime, 1, 8192) {
711 times[1] = ktime_get_raw();
713 for (n = 0; n < prime; n++) {
714 i915_request_put(request);
715 request = empty_request(engine, batch);
716 if (IS_ERR(request)) {
717 err = PTR_ERR(request);
718 intel_engine_pm_put(engine);
722 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
724 times[1] = ktime_sub(ktime_get_raw(), times[1]);
728 if (__igt_timeout(end_time, NULL))
731 i915_request_put(request);
732 intel_engine_pm_put(engine);
734 err = igt_live_test_end(&t);
738 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
740 ktime_to_ns(times[0]),
741 prime, div64_u64(ktime_to_ns(times[1]), prime));
745 i915_vma_unpin(batch);
750 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
752 struct drm_i915_gem_object *obj;
753 const int gen = INTEL_GEN(i915);
754 struct i915_vma *vma;
758 obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
760 return ERR_CAST(obj);
762 vma = i915_vma_instance(obj, i915->gt.vm, NULL);
768 err = i915_vma_pin(vma, 0, 0, PIN_USER);
772 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
779 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
780 *cmd++ = lower_32_bits(vma->node.start);
781 *cmd++ = upper_32_bits(vma->node.start);
782 } else if (gen >= 6) {
783 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
784 *cmd++ = lower_32_bits(vma->node.start);
786 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
787 *cmd++ = lower_32_bits(vma->node.start);
789 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
791 __i915_gem_object_flush_map(obj, 0, 64);
792 i915_gem_object_unpin_map(obj);
794 intel_gt_chipset_flush(&i915->gt);
799 i915_gem_object_put(obj);
803 static int recursive_batch_resolve(struct i915_vma *batch)
807 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
811 *cmd = MI_BATCH_BUFFER_END;
812 intel_gt_chipset_flush(batch->vm->gt);
814 i915_gem_object_unpin_map(batch->obj);
819 static int live_all_engines(void *arg)
821 struct drm_i915_private *i915 = arg;
822 const unsigned int nengines = num_uabi_engines(i915);
823 struct intel_engine_cs *engine;
824 struct i915_request **request;
825 struct igt_live_test t;
826 struct i915_vma *batch;
831 * Check we can submit requests to all engines simultaneously. We
832 * send a recursive batch to each engine - checking that we don't
833 * block doing so, and that they don't complete too soon.
836 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
840 err = igt_live_test_begin(&t, i915, __func__, "");
844 batch = recursive_batch(i915);
846 err = PTR_ERR(batch);
847 pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
852 for_each_uabi_engine(engine, i915) {
853 request[idx] = intel_engine_create_kernel_request(engine);
854 if (IS_ERR(request[idx])) {
855 err = PTR_ERR(request[idx]);
856 pr_err("%s: Request allocation failed with err=%d\n",
861 err = engine->emit_bb_start(request[idx],
866 request[idx]->batch = batch;
868 i915_vma_lock(batch);
869 err = i915_request_await_object(request[idx], batch->obj, 0);
871 err = i915_vma_move_to_active(batch, request[idx], 0);
872 i915_vma_unlock(batch);
875 i915_request_get(request[idx]);
876 i915_request_add(request[idx]);
881 for_each_uabi_engine(engine, i915) {
882 if (i915_request_completed(request[idx])) {
883 pr_err("%s(%s): request completed too early!\n",
884 __func__, engine->name);
891 err = recursive_batch_resolve(batch);
893 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
898 for_each_uabi_engine(engine, i915) {
901 timeout = i915_request_wait(request[idx], 0,
902 MAX_SCHEDULE_TIMEOUT);
905 pr_err("%s: error waiting for request on %s, err=%d\n",
906 __func__, engine->name, err);
910 GEM_BUG_ON(!i915_request_completed(request[idx]));
911 i915_request_put(request[idx]);
916 err = igt_live_test_end(&t);
920 for_each_uabi_engine(engine, i915) {
922 i915_request_put(request[idx]);
925 i915_vma_unpin(batch);
932 static int live_sequential_engines(void *arg)
934 struct drm_i915_private *i915 = arg;
935 const unsigned int nengines = num_uabi_engines(i915);
936 struct i915_request **request;
937 struct i915_request *prev = NULL;
938 struct intel_engine_cs *engine;
939 struct igt_live_test t;
944 * Check we can submit requests to all engines sequentially, such
945 * that each successive request waits for the earlier ones. This
946 * tests that we don't execute requests out of order, even though
947 * they are running on independent engines.
950 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
954 err = igt_live_test_begin(&t, i915, __func__, "");
959 for_each_uabi_engine(engine, i915) {
960 struct i915_vma *batch;
962 batch = recursive_batch(i915);
964 err = PTR_ERR(batch);
965 pr_err("%s: Unable to create batch for %s, err=%d\n",
966 __func__, engine->name, err);
970 request[idx] = intel_engine_create_kernel_request(engine);
971 if (IS_ERR(request[idx])) {
972 err = PTR_ERR(request[idx]);
973 pr_err("%s: Request allocation failed for %s with err=%d\n",
974 __func__, engine->name, err);
979 err = i915_request_await_dma_fence(request[idx],
982 i915_request_add(request[idx]);
983 pr_err("%s: Request await failed for %s with err=%d\n",
984 __func__, engine->name, err);
989 err = engine->emit_bb_start(request[idx],
994 request[idx]->batch = batch;
996 i915_vma_lock(batch);
997 err = i915_request_await_object(request[idx],
1000 err = i915_vma_move_to_active(batch, request[idx], 0);
1001 i915_vma_unlock(batch);
1004 i915_request_get(request[idx]);
1005 i915_request_add(request[idx]);
1007 prev = request[idx];
1012 for_each_uabi_engine(engine, i915) {
1015 if (i915_request_completed(request[idx])) {
1016 pr_err("%s(%s): request completed too early!\n",
1017 __func__, engine->name);
1022 err = recursive_batch_resolve(request[idx]->batch);
1024 pr_err("%s: failed to resolve batch, err=%d\n",
1029 timeout = i915_request_wait(request[idx], 0,
1030 MAX_SCHEDULE_TIMEOUT);
1033 pr_err("%s: error waiting for request on %s, err=%d\n",
1034 __func__, engine->name, err);
1038 GEM_BUG_ON(!i915_request_completed(request[idx]));
1042 err = igt_live_test_end(&t);
1046 for_each_uabi_engine(engine, i915) {
1052 cmd = i915_gem_object_pin_map(request[idx]->batch->obj,
1055 *cmd = MI_BATCH_BUFFER_END;
1056 intel_gt_chipset_flush(engine->gt);
1058 i915_gem_object_unpin_map(request[idx]->batch->obj);
1061 i915_vma_put(request[idx]->batch);
1062 i915_request_put(request[idx]);
1070 static int __live_parallel_engine1(void *arg)
1072 struct intel_engine_cs *engine = arg;
1073 IGT_TIMEOUT(end_time);
1074 unsigned long count;
1078 intel_engine_pm_get(engine);
1080 struct i915_request *rq;
1082 rq = i915_request_create(engine->kernel_context);
1088 i915_request_get(rq);
1089 i915_request_add(rq);
1092 if (i915_request_wait(rq, 0, HZ / 5) < 0)
1094 i915_request_put(rq);
1099 } while (!__igt_timeout(end_time, NULL));
1100 intel_engine_pm_put(engine);
1102 pr_info("%s: %lu request + sync\n", engine->name, count);
1106 static int __live_parallel_engineN(void *arg)
1108 struct intel_engine_cs *engine = arg;
1109 IGT_TIMEOUT(end_time);
1110 unsigned long count;
1114 intel_engine_pm_get(engine);
1116 struct i915_request *rq;
1118 rq = i915_request_create(engine->kernel_context);
1124 i915_request_add(rq);
1126 } while (!__igt_timeout(end_time, NULL));
1127 intel_engine_pm_put(engine);
1129 pr_info("%s: %lu requests\n", engine->name, count);
1133 static bool wake_all(struct drm_i915_private *i915)
1135 if (atomic_dec_and_test(&i915->selftest.counter)) {
1136 wake_up_var(&i915->selftest.counter);
1143 static int wait_for_all(struct drm_i915_private *i915)
1148 if (wait_var_event_timeout(&i915->selftest.counter,
1149 !atomic_read(&i915->selftest.counter),
1150 i915_selftest.timeout_jiffies))
1156 static int __live_parallel_spin(void *arg)
1158 struct intel_engine_cs *engine = arg;
1159 struct igt_spinner spin;
1160 struct i915_request *rq;
1164 * Create a spinner running for eternity on each engine. If a second
1165 * spinner is incorrectly placed on the same engine, it will not be
1166 * able to start in time.
1169 if (igt_spinner_init(&spin, engine->gt)) {
1170 wake_all(engine->i915);
1174 intel_engine_pm_get(engine);
1175 rq = igt_spinner_create_request(&spin,
1176 engine->kernel_context,
1177 MI_NOOP); /* no preemption */
1178 intel_engine_pm_put(engine);
1183 wake_all(engine->i915);
1187 i915_request_get(rq);
1188 i915_request_add(rq);
1189 if (igt_wait_for_spinner(&spin, rq)) {
1190 /* Occupy this engine for the whole test */
1191 err = wait_for_all(engine->i915);
1193 pr_err("Failed to start spinner on %s\n", engine->name);
1196 igt_spinner_end(&spin);
1198 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0)
1200 i915_request_put(rq);
1203 igt_spinner_fini(&spin);
1207 static int live_parallel_engines(void *arg)
1209 struct drm_i915_private *i915 = arg;
1210 static int (* const func[])(void *arg) = {
1211 __live_parallel_engine1,
1212 __live_parallel_engineN,
1213 __live_parallel_spin,
1216 const unsigned int nengines = num_uabi_engines(i915);
1217 struct intel_engine_cs *engine;
1218 int (* const *fn)(void *arg);
1219 struct task_struct **tsk;
1223 * Check we can submit requests to all engines concurrently. This
1224 * tests that we load up the system maximally.
1227 tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
1231 for (fn = func; !err && *fn; fn++) {
1232 char name[KSYM_NAME_LEN];
1233 struct igt_live_test t;
1236 snprintf(name, sizeof(name), "%pS", fn);
1237 err = igt_live_test_begin(&t, i915, __func__, name);
1241 atomic_set(&i915->selftest.counter, nengines);
1244 for_each_uabi_engine(engine, i915) {
1245 tsk[idx] = kthread_run(*fn, engine,
1248 if (IS_ERR(tsk[idx])) {
1249 err = PTR_ERR(tsk[idx]);
1252 get_task_struct(tsk[idx++]);
1255 yield(); /* start all threads before we kthread_stop() */
1258 for_each_uabi_engine(engine, i915) {
1261 if (IS_ERR(tsk[idx]))
1264 status = kthread_stop(tsk[idx]);
1268 put_task_struct(tsk[idx++]);
1271 if (igt_live_test_end(&t))
1280 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1282 struct i915_request *rq;
1286 * Before execlists, all contexts share the same ringbuffer. With
1287 * execlists, each context/engine has a separate ringbuffer and
1288 * for the purposes of this test, inexhaustible.
1290 * For the global ringbuffer though, we have to be very careful
1291 * that we do not wrap while preventing the execution of requests
1292 * with a unsignaled fence.
1294 if (HAS_EXECLISTS(ctx->i915))
1297 rq = igt_request_alloc(ctx, engine);
1303 ret = rq->ring->size - rq->reserved_space;
1304 i915_request_add(rq);
1306 sz = rq->ring->emit - rq->head;
1308 sz += rq->ring->size;
1310 ret /= 2; /* leave half spare, in case of emergency! */
1316 static int live_breadcrumbs_smoketest(void *arg)
1318 struct drm_i915_private *i915 = arg;
1319 const unsigned int nengines = num_uabi_engines(i915);
1320 const unsigned int ncpus = num_online_cpus();
1321 unsigned long num_waits, num_fences;
1322 struct intel_engine_cs *engine;
1323 struct task_struct **threads;
1324 struct igt_live_test live;
1325 intel_wakeref_t wakeref;
1326 struct smoketest *smoke;
1327 unsigned int n, idx;
1332 * Smoketest our breadcrumb/signal handling for requests across multiple
1333 * threads. A very simple test to only catch the most egregious of bugs.
1334 * See __igt_breadcrumbs_smoketest();
1336 * On real hardware this time.
1339 wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1341 file = mock_file(i915);
1343 ret = PTR_ERR(file);
1347 smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
1353 threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
1359 smoke[0].request_alloc = __live_request_alloc;
1360 smoke[0].ncontexts = 64;
1361 smoke[0].contexts = kcalloc(smoke[0].ncontexts,
1362 sizeof(*smoke[0].contexts),
1364 if (!smoke[0].contexts) {
1369 for (n = 0; n < smoke[0].ncontexts; n++) {
1370 smoke[0].contexts[n] = live_context(i915, file);
1371 if (!smoke[0].contexts[n]) {
1377 ret = igt_live_test_begin(&live, i915, __func__, "");
1382 for_each_uabi_engine(engine, i915) {
1383 smoke[idx] = smoke[0];
1384 smoke[idx].engine = engine;
1385 smoke[idx].max_batch =
1386 max_batches(smoke[0].contexts[0], engine);
1387 if (smoke[idx].max_batch < 0) {
1388 ret = smoke[idx].max_batch;
1391 /* One ring interleaved between requests from all cpus */
1392 smoke[idx].max_batch /= num_online_cpus() + 1;
1393 pr_debug("Limiting batches to %d requests on %s\n",
1394 smoke[idx].max_batch, engine->name);
1396 for (n = 0; n < ncpus; n++) {
1397 struct task_struct *tsk;
1399 tsk = kthread_run(__igt_breadcrumbs_smoketest,
1400 &smoke[idx], "igt/%d.%d", idx, n);
1406 get_task_struct(tsk);
1407 threads[idx * ncpus + n] = tsk;
1413 yield(); /* start all threads before we begin */
1414 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1420 for_each_uabi_engine(engine, i915) {
1421 for (n = 0; n < ncpus; n++) {
1422 struct task_struct *tsk = threads[idx * ncpus + n];
1428 err = kthread_stop(tsk);
1429 if (err < 0 && !ret)
1432 put_task_struct(tsk);
1435 num_waits += atomic_long_read(&smoke[idx].num_waits);
1436 num_fences += atomic_long_read(&smoke[idx].num_fences);
1439 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1440 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1442 ret = igt_live_test_end(&live) ?: ret;
1444 kfree(smoke[0].contexts);
1452 intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1457 int i915_request_live_selftests(struct drm_i915_private *i915)
1459 static const struct i915_subtest tests[] = {
1460 SUBTEST(live_nop_request),
1461 SUBTEST(live_all_engines),
1462 SUBTEST(live_sequential_engines),
1463 SUBTEST(live_parallel_engines),
1464 SUBTEST(live_empty_request),
1465 SUBTEST(live_breadcrumbs_smoketest),
1468 if (intel_gt_is_wedged(&i915->gt))
1471 return i915_subtests(tests, i915);